From 33927f3d0ecdcff06326d6e4edb6166aed42811c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 12 Jul 2025 06:02:31 +0100 Subject: [PATCH 0001/1307] habanalabs: fix UAF in export_dmabuf() As soon as we'd inserted a file reference into descriptor table, another thread could close it. That's fine for the case when all we are doing is returning that descriptor to userland (it's a race, but it's a userland race and there's nothing the kernel can do about it). However, if we follow fd_install() with any kind of access to objects that would be destroyed on close (be it the struct file itself or anything destroyed by its ->release()), we have a UAF. dma_buf_fd() is a combination of reserving a descriptor and fd_install(). habanalabs export_dmabuf() calls it and then proceeds to access the objects destroyed on close. In particular, it grabs an extra reference to another struct file that will be dropped as part of ->release() for ours; that "will be" is actually "might have already been". Fix that by reserving descriptor before anything else and do fd_install() only when everything had been set up. As a side benefit, we no longer have the failure exit with file already created, but reference to underlying file (as well as ->dmabuf_export_cnt, etc.) not grabbed yet; unlike dma_buf_fd(), fd_install() can't fail. Fixes: db1a8dd916aa ("habanalabs: add support for dma-buf exporter") Signed-off-by: Al Viro --- drivers/accel/habanalabs/common/memory.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 601fdbe70179..61472a381904 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -1829,9 +1829,6 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf) struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; struct hl_ctx *ctx; - if (!hl_dmabuf) - return; - ctx = hl_dmabuf->ctx; if (hl_dmabuf->memhash_hnode) @@ -1859,7 +1856,12 @@ static int export_dmabuf(struct hl_ctx *ctx, { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); struct hl_device *hdev = ctx->hdev; - int rc, fd; + CLASS(get_unused_fd, fd)(flags); + + if (fd < 0) { + dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); + return fd; + } exp_info.ops = &habanalabs_dmabuf_ops; exp_info.size = total_size; @@ -1872,13 +1874,6 @@ static int export_dmabuf(struct hl_ctx *ctx, return PTR_ERR(hl_dmabuf->dmabuf); } - fd = dma_buf_fd(hl_dmabuf->dmabuf, flags); - if (fd < 0) { - dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); - rc = fd; - goto err_dma_buf_put; - } - hl_dmabuf->ctx = ctx; hl_ctx_get(hl_dmabuf->ctx); atomic_inc(&ctx->hdev->dmabuf_export_cnt); @@ -1890,13 +1885,9 @@ static int export_dmabuf(struct hl_ctx *ctx, get_file(ctx->hpriv->file_priv->filp); *dmabuf_fd = fd; + fd_install(take_fd(fd), hl_dmabuf->dmabuf->file); return 0; - -err_dma_buf_put: - hl_dmabuf->dmabuf->priv = NULL; - dma_buf_put(hl_dmabuf->dmabuf); - return rc; } static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset) From a2f54ff15c3bdc0132e20aae041607e2320dbd73 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 28 Jul 2025 13:17:00 +0900 Subject: [PATCH 0002/1307] scsi: core: sysfs: Correct sysfs attributes access rights The SCSI sysfs attributes "supported_mode" and "active_mode" do not define a store method and thus cannot be modified. Correct the DEVICE_ATTR() call for these two attributes to not include S_IWUSR to allow write access as they are read-only. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250728041700.76660-1-dlemoal@kernel.org Reviewed-by: John Garry Reviewed-by: Johannes Thumshin Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index d772258e29ad..e6464b998960 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -265,7 +265,7 @@ show_shost_supported_mode(struct device *dev, struct device_attribute *attr, return show_shost_mode(supported_mode, buf); } -static DEVICE_ATTR(supported_mode, S_IRUGO | S_IWUSR, show_shost_supported_mode, NULL); +static DEVICE_ATTR(supported_mode, S_IRUGO, show_shost_supported_mode, NULL); static ssize_t show_shost_active_mode(struct device *dev, @@ -279,7 +279,7 @@ show_shost_active_mode(struct device *dev, return show_shost_mode(shost->active_mode, buf); } -static DEVICE_ATTR(active_mode, S_IRUGO | S_IWUSR, show_shost_active_mode, NULL); +static DEVICE_ATTR(active_mode, S_IRUGO, show_shost_active_mode, NULL); static int check_reset_type(const char *str) { From 7b8346bd9fce6b76a96c6780d2e5bba76687f97f Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Tue, 29 Jul 2025 21:06:44 +0000 Subject: [PATCH 0003/1307] KVM: arm64: Don't attempt vLPI mappings when vPE allocation is disabled commit c652887a9288 ("KVM: arm64: vgic-v3: Allow userspace to write GICD_TYPER2.nASSGIcap") makes the allocation of vPEs depend on nASSGIcap for GICv4.1 hosts. While the vGIC v4 initialization and teardown is handled correctly, it erroneously attempts to establish a vLPI mapping to a VM that has no vPEs allocated: Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a8 Mem abort info: ESR = 0x0000000096000044 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000044, ISS2 = 0x00000000 CM = 0, WnR = 1, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00000073a453b000 [00000000000000a8] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000044 [#1] SMP pstate: 23400009 (nzCv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : its_irq_set_vcpu_affinity+0x58c/0x95c lr : its_irq_set_vcpu_affinity+0x1e0/0x95c sp : ffff8001029bb9e0 pmr_save: 00000060 x29: ffff8001029bba20 x28: ffff0001ca5e28c0 x27: 0000000000000000 x26: 0000000000000000 x25: ffff00019eee9f80 x24: ffff0001992b3f00 x23: ffff8001029bbab8 x22: ffff00001159fb80 x21: 00000000000024a7 x20: 00000000000024a7 x19: ffff00019eee9fb4 x18: 0000000000000494 x17: 000000000000000e x16: 0000000000000494 x15: 0000000000000002 x14: ffff0001a7f34600 x13: ffffccaad1203000 x12: 0000000000000018 x11: ffff000011991000 x10: 0000000000000000 x9 : 00000000000000a2 x8 : 00000000000020a8 x7 : 0000000000000000 x6 : 000000000000003f x5 : 0000000000000040 x4 : 0000000000000000 x3 : 0000000000000004 x2 : 0000000000000000 x1 : ffff8001029bbab8 x0 : 00000000000000a8 Call trace: its_irq_set_vcpu_affinity+0x58c/0x95c irq_set_vcpu_affinity+0x74/0xc8 its_map_vlpi+0x4c/0x94 kvm_vgic_v4_set_forwarding+0x134/0x298 kvm_arch_irq_bypass_add_producer+0x28/0x34 irq_bypass_register_producer+0xf8/0x1d8 vfio_msi_set_vector_signal+0x2c8/0x308 vfio_pci_set_msi_trigger+0x198/0x2d4 vfio_pci_set_irqs_ioctl+0xf0/0x104 vfio_pci_core_ioctl+0x6ac/0xc5c vfio_device_fops_unl_ioctl+0x128/0x370 __arm64_sys_ioctl+0x98/0xd0 el0_svc_common+0xd8/0x1d8 do_el0_svc+0x28/0x34 el0_svc+0x40/0xb8 el0t_64_sync_handler+0x70/0xbc el0t_64_sync+0x1a8/0x1ac Code: 321f0129 f940094a 8b080148 d1400900 (39000009) ---[ end trace 0000000000000000 ]--- Fix it by moving the GICv4.1 special-casing to vgic_supports_direct_msis(), returning false if the user explicitly disabled nASSGIcap for the VM. Fixes: c652887a9288 ("KVM: arm64: vgic-v3: Allow userspace to write GICD_TYPER2.nASSGIcap") Suggested-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta Link: https://lore.kernel.org/r/20250729210644.830364-1-rananta@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 8 ++++++++ arch/arm64/kvm/vgic/vgic.h | 10 +--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a3ef185209e9..70d50c77e5dc 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -50,6 +50,14 @@ bool vgic_has_its(struct kvm *kvm) bool vgic_supports_direct_msis(struct kvm *kvm) { + /* + * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware, + * indirectly allowing userspace to control whether or not vPEs are + * allocated for the VM. + */ + if (system_supports_direct_sgis() && !vgic_supports_direct_sgis(kvm)) + return false; + return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 1384a04c0784..de1c1d3261c3 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -396,15 +396,7 @@ bool vgic_supports_direct_sgis(struct kvm *kvm); static inline bool vgic_supports_direct_irqs(struct kvm *kvm) { - /* - * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware, - * indirectly allowing userspace to control whether or not vPEs are - * allocated for the VM. - */ - if (system_supports_direct_sgis()) - return vgic_supports_direct_sgis(kvm); - - return vgic_supports_direct_msis(kvm); + return vgic_supports_direct_msis(kvm) || vgic_supports_direct_sgis(kvm); } int vgic_v4_init(struct kvm *kvm); From 79aef1a3705bbc95b36dad892af1f313490bd65c Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 30 Jul 2025 08:31:13 +0700 Subject: [PATCH 0004/1307] of: Clarify OF device context in of_match_device() comment Open Firmware abbreviation (OF) in of_match_device() comment is written in lowercase instead, which is mistaken for prepositional word "of" ([1], [2], [3], [4]) duplicate. Clarify the context. Link: https://lore.kernel.org/all/CAL_JsqLypcBCOVZ8yYWK0J_xc2Vcr+ANrX_3v4vN55Srp4RknQ@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/20220926185852.GA2581083-robh@kernel.org/ [2] Link: https://lore.kernel.org/all/CAL_JsqL4GvgFYzGUfhW5pvm4wYGrFaj6gHOYZjnOMuk2zCz67w@mail.gmail.com/ [3] Link: https://lore.kernel.org/all/20220627173825.GA2637590-robh@kernel.org/ [4] Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20250730013113.11264-1-bagasdotme@gmail.com Signed-off-by: Rob Herring (Arm) --- drivers/of/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index c80426510ec2..f7e75e527667 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -17,8 +17,8 @@ /** * of_match_device - Tell if a struct device matches an of_device_id list - * @matches: array of of device match structures to search in - * @dev: the of device structure to match against + * @matches: array of of_device_id match structures to search in + * @dev: the OF device structure to match against * * Used by a driver to check whether an platform_device present in the * system is in its list of supported devices. From 383cd6d879a18acdaa84c29330b25c49cbc0b490 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 29 Jul 2025 07:49:30 +0100 Subject: [PATCH 0005/1307] scsi: scsi_debug: Make read-only arrays static const Don't populate the read-only arrays on the stack at run time, instead make them static const. Also reduces overall size. before: text data bss dec hex filename 367439 89582 5952 462973 7107d drivers/scsi/scsi_debug.o after: text data bss dec hex filename 365847 90702 5952 462501 70ea5 drivers/scsi/scsi_debug.o (gcc 14.2.0, x86-64) Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20250729064930.1659007-1-colin.i.king@gmail.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 91 ++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 34 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 0847767d4d43..353cb60e1abe 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2674,8 +2674,10 @@ static int resp_rsup_tmfs(struct scsi_cmnd *scp, static int resp_err_recov_pg(unsigned char *p, int pcontrol, int target) { /* Read-Write Error Recovery page for mode_sense */ - unsigned char err_recov_pg[] = {0x1, 0xa, 0xc0, 11, 240, 0, 0, 0, - 5, 0, 0xff, 0xff}; + static const unsigned char err_recov_pg[] = { + 0x1, 0xa, 0xc0, 11, 240, 0, 0, 0, + 5, 0, 0xff, 0xff + }; memcpy(p, err_recov_pg, sizeof(err_recov_pg)); if (1 == pcontrol) @@ -2685,8 +2687,10 @@ static int resp_err_recov_pg(unsigned char *p, int pcontrol, int target) static int resp_disconnect_pg(unsigned char *p, int pcontrol, int target) { /* Disconnect-Reconnect page for mode_sense */ - unsigned char disconnect_pg[] = {0x2, 0xe, 128, 128, 0, 10, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; + static const unsigned char disconnect_pg[] = { + 0x2, 0xe, 128, 128, 0, 10, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }; memcpy(p, disconnect_pg, sizeof(disconnect_pg)); if (1 == pcontrol) @@ -2696,9 +2700,11 @@ static int resp_disconnect_pg(unsigned char *p, int pcontrol, int target) static int resp_format_pg(unsigned char *p, int pcontrol, int target) { /* Format device page for mode_sense */ - unsigned char format_pg[] = {0x3, 0x16, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0x40, 0, 0, 0}; + static const unsigned char format_pg[] = { + 0x3, 0x16, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0x40, 0, 0, 0 + }; memcpy(p, format_pg, sizeof(format_pg)); put_unaligned_be16(sdebug_sectors_per, p + 10); @@ -2716,10 +2722,14 @@ static unsigned char caching_pg[] = {0x8, 18, 0x14, 0, 0xff, 0xff, 0, 0, static int resp_caching_pg(unsigned char *p, int pcontrol, int target) { /* Caching page for mode_sense */ - unsigned char ch_caching_pg[] = {/* 0x8, 18, */ 0x4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - unsigned char d_caching_pg[] = {0x8, 18, 0x14, 0, 0xff, 0xff, 0, 0, - 0xff, 0xff, 0xff, 0xff, 0x80, 0x14, 0, 0, 0, 0, 0, 0}; + static const unsigned char ch_caching_pg[] = { + /* 0x8, 18, */ 0x4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + static const unsigned char d_caching_pg[] = { + 0x8, 18, 0x14, 0, 0xff, 0xff, 0, 0, + 0xff, 0xff, 0xff, 0xff, 0x80, 0x14, 0, 0, 0, 0, 0, 0 + }; if (SDEBUG_OPT_N_WCE & sdebug_opts) caching_pg[2] &= ~0x4; /* set WCE=0 (default WCE=1) */ @@ -2738,8 +2748,10 @@ static int resp_ctrl_m_pg(unsigned char *p, int pcontrol, int target) { /* Control mode page for mode_sense */ unsigned char ch_ctrl_m_pg[] = {/* 0xa, 10, */ 0x6, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - unsigned char d_ctrl_m_pg[] = {0xa, 10, 2, 0, 0, 0, 0, 0, - 0, 0, 0x2, 0x4b}; + static const unsigned char d_ctrl_m_pg[] = { + 0xa, 10, 2, 0, 0, 0, 0, 0, + 0, 0, 0x2, 0x4b + }; if (sdebug_dsense) ctrl_m_pg[2] |= 0x4; @@ -2794,10 +2806,14 @@ static int resp_grouping_m_pg(unsigned char *p, int pcontrol, int target) static int resp_iec_m_pg(unsigned char *p, int pcontrol, int target) { /* Informational Exceptions control mode page for mode_sense */ - unsigned char ch_iec_m_pg[] = {/* 0x1c, 0xa, */ 0x4, 0xf, 0, 0, 0, 0, - 0, 0, 0x0, 0x0}; - unsigned char d_iec_m_pg[] = {0x1c, 0xa, 0x08, 0, 0, 0, 0, 0, - 0, 0, 0x0, 0x0}; + static const unsigned char ch_iec_m_pg[] = { + /* 0x1c, 0xa, */ 0x4, 0xf, 0, 0, 0, 0, + 0, 0, 0x0, 0x0 + }; + static const unsigned char d_iec_m_pg[] = { + 0x1c, 0xa, 0x08, 0, 0, 0, 0, 0, + 0, 0, 0x0, 0x0 + }; memcpy(p, iec_m_pg, sizeof(iec_m_pg)); if (1 == pcontrol) @@ -2809,8 +2825,9 @@ static int resp_iec_m_pg(unsigned char *p, int pcontrol, int target) static int resp_sas_sf_m_pg(unsigned char *p, int pcontrol, int target) { /* SAS SSP mode page - short format for mode_sense */ - unsigned char sas_sf_m_pg[] = {0x19, 0x6, - 0x6, 0x0, 0x7, 0xd0, 0x0, 0x0}; + static const unsigned char sas_sf_m_pg[] = { + 0x19, 0x6, 0x6, 0x0, 0x7, 0xd0, 0x0, 0x0 + }; memcpy(p, sas_sf_m_pg, sizeof(sas_sf_m_pg)); if (1 == pcontrol) @@ -2854,9 +2871,10 @@ static int resp_sas_pcd_m_spg(unsigned char *p, int pcontrol, int target, static int resp_sas_sha_m_spg(unsigned char *p, int pcontrol) { /* SAS SSP shared protocol specific port mode subpage */ - unsigned char sas_sha_m_pg[] = {0x59, 0x2, 0, 0xc, 0, 0x6, 0x10, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }; + static const unsigned char sas_sha_m_pg[] = { + 0x59, 0x2, 0, 0xc, 0, 0x6, 0x10, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; memcpy(p, sas_sha_m_pg, sizeof(sas_sha_m_pg)); if (1 == pcontrol) @@ -2923,8 +2941,10 @@ static int process_medium_part_m_pg(struct sdebug_dev_info *devip, static int resp_compression_m_pg(unsigned char *p, int pcontrol, int target, unsigned char dce) { /* Compression page for mode_sense (tape) */ - unsigned char compression_pg[] = {0x0f, 14, 0x40, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 00, 00}; + static const unsigned char compression_pg[] = { + 0x0f, 14, 0x40, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + }; memcpy(p, compression_pg, sizeof(compression_pg)); if (dce) @@ -3282,9 +3302,10 @@ static int resp_mode_select(struct scsi_cmnd *scp, static int resp_temp_l_pg(unsigned char *arr) { - unsigned char temp_l_pg[] = {0x0, 0x0, 0x3, 0x2, 0x0, 38, - 0x0, 0x1, 0x3, 0x2, 0x0, 65, - }; + static const unsigned char temp_l_pg[] = { + 0x0, 0x0, 0x3, 0x2, 0x0, 38, + 0x0, 0x1, 0x3, 0x2, 0x0, 65, + }; memcpy(arr, temp_l_pg, sizeof(temp_l_pg)); return sizeof(temp_l_pg); @@ -3292,8 +3313,9 @@ static int resp_temp_l_pg(unsigned char *arr) static int resp_ie_l_pg(unsigned char *arr) { - unsigned char ie_l_pg[] = {0x0, 0x0, 0x3, 0x3, 0x0, 0x0, 38, - }; + static const unsigned char ie_l_pg[] = { + 0x0, 0x0, 0x3, 0x3, 0x0, 0x0, 38, + }; memcpy(arr, ie_l_pg, sizeof(ie_l_pg)); if (iec_m_pg[2] & 0x4) { /* TEST bit set */ @@ -3305,11 +3327,12 @@ static int resp_ie_l_pg(unsigned char *arr) static int resp_env_rep_l_spg(unsigned char *arr) { - unsigned char env_rep_l_spg[] = {0x0, 0x0, 0x23, 0x8, - 0x0, 40, 72, 0xff, 45, 18, 0, 0, - 0x1, 0x0, 0x23, 0x8, - 0x0, 55, 72, 35, 55, 45, 0, 0, - }; + static const unsigned char env_rep_l_spg[] = { + 0x0, 0x0, 0x23, 0x8, + 0x0, 40, 72, 0xff, 45, 18, 0, 0, + 0x1, 0x0, 0x23, 0x8, + 0x0, 55, 72, 35, 55, 45, 0, 0, + }; memcpy(arr, env_rep_l_spg, sizeof(env_rep_l_spg)); return sizeof(env_rep_l_spg); From bb324f85f722848f5e5e53325bc00f13302e01d0 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 9 Jul 2025 09:45:01 +0700 Subject: [PATCH 0006/1307] drm/gpuvm: Wrap drm_gpuvm_sm_map_exec_lock() expected usage in literal code block Stephen Rothwell reports multiple indentation warnings when merging drm-msm tree: Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2445: ERROR: Unexpected indentation. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2447: WARNING: Block quote ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2451: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2452: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2456: ERROR: Unexpected indentation. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2457: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2458: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Documentation/gpu/drm-mm:506: ./drivers/gpu/drm/drm_gpuvm.c:2459: WARNING: Definition list ends without a blank line; unexpected unindent. [docutils] Fix these by wrapping drm_gpuvm_sm_map_exec_lock() expected usage example in literal code block. Fixes: 471920ce25d5 ("drm/gpuvm: Add locking helpers") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250708192038.6b0fd31d@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Acked-by: Randy Dunlap Tested-by: Randy Dunlap Acked-by: Danilo Krummrich Patchwork: https://patchwork.freedesktop.org/patch/663121/ Signed-off-by: Rob Clark --- drivers/gpu/drm/drm_gpuvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index bbc7fecb6f4a..f62005ff9b2e 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -2430,7 +2430,7 @@ static const struct drm_gpuvm_ops lock_ops = { * remapped, and locks+prepares (drm_exec_prepare_object()) objects that * will be newly mapped. * - * The expected usage is: + * The expected usage is:: * * vm_bind { * struct drm_exec exec; From 7abb543ff03e7874eba50a27ab025f09c96f6f7a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 9 Jul 2025 07:08:38 -0700 Subject: [PATCH 0007/1307] drm/msm: Fix build with KMS disabled When commit 98290b0a7d60 ("drm/msm: make it possible to disable KMS-related code.") was rebased on top of commit 3bebfd53af0f ("drm/msm: Defer VMA unmap for fb unpins"), the additional use of msm_kms was overlooked, resulting in a build break when KMS is disabled. Add some additional ifdef to fix that. Reported-by: Arnd Bergmann Fixes: 98290b0a7d60 ("drm/msm: make it possible to disable KMS-related code.") Signed-off-by: Rob Clark Tested-by: Arnd Bergmann Reviewed-by: Jessica Zhang Patchwork: https://patchwork.freedesktop.org/patch/663240/ --- drivers/gpu/drm/msm/msm_gem.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 33d3354c6102..c853ab3a2cda 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -96,7 +96,6 @@ void msm_gem_vma_get(struct drm_gem_object *obj) void msm_gem_vma_put(struct drm_gem_object *obj) { struct msm_drm_private *priv = obj->dev->dev_private; - struct drm_exec exec; if (atomic_dec_return(&to_msm_bo(obj)->vma_ref)) return; @@ -104,9 +103,13 @@ void msm_gem_vma_put(struct drm_gem_object *obj) if (!priv->kms) return; +#ifdef CONFIG_DRM_MSM_KMS + struct drm_exec exec; + msm_gem_lock_vm_and_obj(&exec, obj, priv->kms->vm); put_iova_spaces(obj, priv->kms->vm, true, "vma_put"); drm_exec_fini(&exec); /* drop locks */ +#endif } /* @@ -664,9 +667,13 @@ int msm_gem_set_iova(struct drm_gem_object *obj, static bool is_kms_vm(struct drm_gpuvm *vm) { +#ifdef CONFIG_DRM_MSM_KMS struct msm_drm_private *priv = vm->drm->dev_private; return priv->kms && (priv->kms->vm == vm); +#else + return false; +#endif } /* From f4ca529de235791aeeddc32ee6741a6b6872f564 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 17 Jul 2025 08:19:30 -0700 Subject: [PATCH 0008/1307] drm/msm: Fix pagetables setup/teardown serialization An atomic counter is not sufficient, as one task could still be in the process of tearing things down while another task increments the counter back up to one and begins setup again. The race condition existed since commit b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") but got bigger in commit dbbde63c9e9d ("drm/msm: Add PRR support"). Fixes: dbbde63c9e9d ("drm/msm: Add PRR support") Fixes: b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/664433/ --- drivers/gpu/drm/msm/msm_iommu.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 55c29f49b788..76cdd5ea06a0 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -14,7 +14,9 @@ struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain; - atomic_t pagetables; + + struct mutex init_lock; /* protects pagetables counter and prr_page */ + int pagetables; struct page *prr_page; struct kmem_cache *pt_cache; @@ -227,7 +229,8 @@ static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) * If this is the last attached pagetable for the parent, * disable TTBR0 in the arm-smmu driver */ - if (atomic_dec_return(&iommu->pagetables) == 0) { + mutex_lock(&iommu->init_lock); + if (--iommu->pagetables == 0) { adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, NULL); if (adreno_smmu->set_prr_bit) { @@ -236,6 +239,7 @@ static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) iommu->prr_page = NULL; } } + mutex_unlock(&iommu->init_lock); free_io_pgtable_ops(pagetable->pgtbl_ops); kfree(pagetable); @@ -568,9 +572,12 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_m * If this is the first pagetable that we've allocated, send it back to * the arm-smmu driver as a trigger to set up TTBR0 */ - if (atomic_inc_return(&iommu->pagetables) == 1) { + mutex_lock(&iommu->init_lock); + if (iommu->pagetables++ == 0) { ret = adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &ttbr0_cfg); if (ret) { + iommu->pagetables--; + mutex_unlock(&iommu->init_lock); free_io_pgtable_ops(pagetable->pgtbl_ops); kfree(pagetable); return ERR_PTR(ret); @@ -595,6 +602,7 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_m adreno_smmu->set_prr_bit(adreno_smmu->cookie, true); } } + mutex_unlock(&iommu->init_lock); /* Needed later for TLB flush */ pagetable->parent = parent; @@ -730,7 +738,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) iommu->domain = domain; msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); - atomic_set(&iommu->pagetables, 0); + mutex_init(&iommu->init_lock); ret = iommu_attach_device(iommu->domain, dev); if (ret) { From 25654a1756a4ace072404e89882d7ba8391900bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Sun, 20 Jul 2025 18:42:31 -0300 Subject: [PATCH 0009/1307] drm/msm: Update global fault counter when faulty process has already ended MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The global fault counter is no longer used since commit 12578c075f89 ("drm/msm/gpu: Skip retired submits in recover worker"). However, it's still needed, as we need to handle cases where a GPU fault occurs after the faulting process has already ended. Hence, increment the global fault counter when the submitting process had already ended. This way, the number of faults returned by MSM_PARAM_FAULTS will stay consistent. While here, s/unusuable/unusable. Fixes: 12578c075f89 ("drm/msm/gpu: Skip retired submits in recover worker") Signed-off-by: Maíra Canal Patchwork: https://patchwork.freedesktop.org/patch/664853/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index c317b25a8162..416d47185ef0 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -465,6 +465,7 @@ static void recover_worker(struct kthread_work *work) struct msm_gem_submit *submit; struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); char *comm = NULL, *cmd = NULL; + struct task_struct *task; int i; mutex_lock(&gpu->lock); @@ -482,16 +483,20 @@ static void recover_worker(struct kthread_work *work) /* Increment the fault counts */ submit->queue->faults++; - if (submit->vm) { + + task = get_pid_task(submit->pid, PIDTYPE_PID); + if (!task) + gpu->global_faults++; + else { struct msm_gem_vm *vm = to_msm_vm(submit->vm); vm->faults++; /* * If userspace has opted-in to VM_BIND (and therefore userspace - * management of the VM), faults mark the VM as unusuable. This + * management of the VM), faults mark the VM as unusable. This * matches vulkan expectations (vulkan is the main target for - * VM_BIND) + * VM_BIND). */ if (!vm->managed) msm_gem_vm_unusable(submit->vm); From de651b6e040ba419418a37401e45d24f133e8a59 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Jul 2025 12:08:49 -0700 Subject: [PATCH 0010/1307] drm/msm: Fix refcnt underflow in error path If we hit an error path in GEM obj creation before msm_gem_new_handle() updates obj->resv to point to the gpuvm resv object, then obj->resv still points to &obj->_resv. In this case we don't want to decrement the refcount of the object being freed (since the refcnt is already zero). This fixes the following splat: ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 9 PID: 7013 at lib/refcount.c:28 refcount_warn_saturate+0xf4/0x148 Modules linked in: uinput snd_seq_dummy snd_hrtimer aes_ce_ccm snd_soc_wsa884x regmap_sdw q6prm_clocks q6apm_lpass_da> qcom_pil_info i2c_hid drm_kms_helper qcom_common qcom_q6v5 phy_snps_eusb2 qcom_geni_serial drm qcom_sysmon pinctrl_s> CPU: 9 UID: 1000 PID: 7013 Comm: deqp-vk Not tainted 6.16.0-rc4-debug+ #25 PREEMPT(voluntary) Hardware name: LENOVO 83ED/LNVNB161216, BIOS NHCN53WW 08/02/2024 pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : refcount_warn_saturate+0xf4/0x148 lr : refcount_warn_saturate+0xf4/0x148 sp : ffff8000a2073920 x29: ffff8000a2073920 x28: 0000000000000010 x27: 0000000000000010 x26: 0000000000000042 x25: ffff000810e09800 x24: 0000000000000010 x23: ffff8000a2073b94 x22: ffff000ddb22de00 x21: ffff000ddb22dc00 x20: ffff000ddb22ddf8 x19: ffff0008024934e0 x18: 000000000000000a x17: 0000000000000000 x16: ffff9f8c67d77340 x15: 0000000000000000 x14: 00000000ffffffff x13: 2e656572662d7265 x12: 7466612d65737520 x11: 3b776f6c66726564 x10: 00000000ffff7fff x9 : ffff9f8c67506c70 x8 : ffff9f8c69fa26f0 x7 : 00000000000bffe8 x6 : c0000000ffff7fff x5 : ffff000f53e14548 x4 : ffff6082ea2b2000 x3 : ffff0008b86ab080 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0008b86ab080 Call trace: refcount_warn_saturate+0xf4/0x148 (P) msm_gem_free_object+0x248/0x260 [msm] drm_gem_object_free+0x24/0x40 [drm] msm_gem_new+0x1c4/0x1e0 [msm] msm_gem_new_handle+0x3c/0x1a0 [msm] msm_ioctl_gem_new+0x38/0x70 [msm] drm_ioctl_kernel+0xc8/0x138 [drm] drm_ioctl+0x2c8/0x618 [drm] __arm64_sys_ioctl+0xac/0x108 invoke_syscall.constprop.0+0x64/0xe8 el0_svc_common.constprop.0+0x40/0xe8 do_el0_svc+0x24/0x38 el0_svc+0x54/0x1d8 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x19c/0x1a0 irq event stamp: 3698694 hardirqs last enabled at (3698693): [] __up_console_sem+0x74/0x90 hardirqs last disabled at (3698694): [] el1_dbg+0x24/0x90 softirqs last enabled at (3697578): [] handle_softirqs+0x454/0x4b0 softirqs last disabled at (3697567): [] __do_softirq+0x1c/0x28 ---[ end trace 0000000000000000 ]--- Fixes: b58e12a66e47 ("drm/msm: Add _NO_SHARE flag") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/665355/ --- drivers/gpu/drm/msm/msm_gem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index c853ab3a2cda..9f0f5b77f1bd 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1121,10 +1121,12 @@ static void msm_gem_free_object(struct drm_gem_object *obj) put_pages(obj); } - if (msm_obj->flags & MSM_BO_NO_SHARE) { + if (obj->resv != &obj->_resv) { struct drm_gem_object *r_obj = container_of(obj->resv, struct drm_gem_object, _resv); + WARN_ON(!(msm_obj->flags & MSM_BO_NO_SHARE)); + /* Drop reference we hold to shared resv obj: */ drm_gem_object_put(r_obj); } From ad70e46e130a7f4024961a5dd5ae0ee8e7d9a3c4 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Jul 2025 12:08:50 -0700 Subject: [PATCH 0011/1307] drm/msm: Fix submit error path cleanup submit_unpin_objects() should come before we unlock the objects. This fixes the splat: WARNING: CPU: 2 PID: 2171 at drivers/gpu/drm/msm/msm_gem.h:395 msm_gem_unpin_locked+0x8c/0xd8 [msm] Modules linked in: uinput snd_seq_dummy snd_hrtimer aes_ce_ccm snd_soc_wsa884x regmap_sdw q6prm_clocks q6apm_lpass_dais q6apm_dai snd_q6dsp_common q6prm snd_q6apm qcom_pd_mapper cdc_mbim cdc_wdm cdc_ncm r8153_ecm cdc_ether usbnet sunrpc nls_ascii nls_cp437 vfat fat snd_soc_x1e80100 snd_soc_lpass_rx_macro snd_soc_lpass_tx_macro snd_soc_lpass_va_macro snd_soc_lpass_wsa_macro snd_soc_qcom_common soundwire_qcom snd_soc_lpass_macro_common snd_soc_hdmi_codec snd_soc_qcom_sdw ext4 snd_soc_core snd_compress soundwire_bus snd_pcm_dmaengine snd_seq mbcache jbd2 snd_seq_device snd_pcm pm8941_pwrkey snd_timer r8152 qcom_spmi_temp_alarm industrialio snd lenovo_yoga_slim7x ath12k mii arm_smccc_trng soundcore rng_core evdev loop panel_samsung_atna33xc20 msm ubwc_config drm_client_lib drm_gpuvm drm_exec gpu_sched drm_display_helper pmic_glink_altmode aux_hpd_bridge ucsi_glink qcom_battmgr phy_qcom_qmp_combo ps883x cec aux_bridge drm_dp_aux_bus i2c_hid_of aes_ce_blk drm_kms_helper aes_ce_cipher i2c_hid qcom_q6v5_pas ghash_ce qcom_pil_info drm sha1_ce qcom_common phy_snps_eusb2 qcom_geni_serial qcom_q6v5 qcom_sysmon pinctrl_sm8550_lpass_lpi lpasscc_sc8280xp sbsa_gwdt mdt_loader gpio_keys pmic_glink i2c_dev efivarfs autofs4 CPU: 2 UID: 1000 PID: 2171 Comm: gnome-shell Not tainted 6.16.0-rc4-debug+ #25 PREEMPT(voluntary) Hardware name: LENOVO 83ED/LNVNB161216, BIOS NHCN53WW 08/02/2024 pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : msm_gem_unpin_locked+0x8c/0xd8 [msm] lr : msm_gem_unpin_locked+0x88/0xd8 [msm] sp : ffff80009c963820 x29: ffff80009c963820 x28: ffff80009c9639f8 x27: ffff00080552a830 x26: 0000000000000000 x25: ffff0009d5655800 x24: 0000000000000000 x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000 x20: ffff000831db5480 x19: ffff000816e74400 x18: 0000000000000000 x17: 0000000000000000 x16: ffffc1396afdd720 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: ffff0008c065bc00 x11: ffff0008c065c000 x10: 0000000000000000 x9 : ffffc13945b19074 x8 : 0000000000000000 x7 : 0000000000000209 x6 : 0000000000000002 x5 : 0000000000019d01 x4 : ffff0008ba8db080 x3 : 000000000004093f x2 : ffff3ed5e727f000 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: msm_gem_unpin_locked+0x8c/0xd8 [msm] (P) msm_ioctl_gem_submit+0x32c/0x1760 [msm] drm_ioctl_kernel+0xc8/0x138 [drm] drm_ioctl+0x2c8/0x618 [drm] __arm64_sys_ioctl+0xac/0x108 invoke_syscall.constprop.0+0x64/0xe8 el0_svc_common.constprop.0+0x40/0xe8 do_el0_svc+0x24/0x38 el0_svc+0x54/0x1d8 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x19c/0x1a0 irq event stamp: 2185036 hardirqs last enabled at (2185035): [] _raw_spin_unlock_irqrestore+0x74/0x80 hardirqs last disabled at (2185036): [] el1_dbg+0x24/0x90 softirqs last enabled at (2184778): [] fpsimd_restore_current_state+0x3c/0x328 softirqs last disabled at (2184776): [] fpsimd_restore_current_state+0xc/0x328 ---[ end trace 0000000000000000 ]--- Fixes: 111fdd2198e6 ("drm/msm: drm_gpuvm conversion") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/665357/ --- drivers/gpu/drm/msm/msm_gem_submit.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5f8e939a5906..0ac4c199ec93 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -514,14 +514,15 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob */ static void submit_cleanup(struct msm_gem_submit *submit, bool error) { + if (error) + submit_unpin_objects(submit); + if (submit->exec.objects) drm_exec_fini(&submit->exec); - if (error) { - submit_unpin_objects(submit); - /* job wasn't enqueued to scheduler, so early retirement: */ + /* if job wasn't enqueued to scheduler, early retirement: */ + if (error) msm_submit_retire(submit); - } } void msm_submit_retire(struct msm_gem_submit *submit) From f22853435bbd1e9836d0dce7fd99c040b94c2bf1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Jul 2025 13:28:22 -0700 Subject: [PATCH 0012/1307] drm/msm: Defer fd_install in SUBMIT ioctl Avoid fd_install() until there are no more potential error paths, to avoid put_unused_fd() after the fd is made visible to userspace. Fixes: 68dc6c2d5eec ("drm/msm: Fix submit error-path leaks") Reported-by: Dan Carpenter Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/665363/ --- drivers/gpu/drm/msm/msm_gem_submit.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 0ac4c199ec93..bfea19baf6d9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -770,12 +770,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) { sync_file = sync_file_create(submit->user_fence); - if (!sync_file) { + if (!sync_file) ret = -ENOMEM; - } else { - fd_install(out_fence_fd, sync_file->file); - args->fence_fd = out_fence_fd; - } } if (ret) @@ -813,10 +809,14 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) { - put_unused_fd(out_fence_fd); + if (ret) { + if (out_fence_fd >= 0) + put_unused_fd(out_fence_fd); if (sync_file) fput(sync_file->file); + } else if (sync_file) { + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; } if (!IS_ERR_OR_NULL(submit)) { From be71ce9796c36517c677ab1d3c6691423dd0bdec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 8 Jul 2025 10:51:23 +0200 Subject: [PATCH 0013/1307] drm/bridge: fix OF node leak Make sure to drop the OF node reference taken when creating the aux bridge device when the device is later released. Fixes: 6914968a0b52 ("drm/bridge: properly refcount DT nodes in aux bridge drivers") Cc: Dmitry Baryshkov Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250708085124.15445-2-johan@kernel.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/aux-bridge.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c index b63304d3a80f..b3e4cdff61d6 100644 --- a/drivers/gpu/drm/bridge/aux-bridge.c +++ b/drivers/gpu/drm/bridge/aux-bridge.c @@ -18,6 +18,7 @@ static void drm_aux_bridge_release(struct device *dev) { struct auxiliary_device *adev = to_auxiliary_dev(dev); + of_node_put(dev->of_node); ida_free(&drm_aux_bridge_ida, adev->id); kfree(adev); @@ -65,6 +66,7 @@ int drm_aux_bridge_register(struct device *parent) ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.of_node); ida_free(&drm_aux_bridge_ida, adev->id); kfree(adev); return ret; From 7c527c15cdda2e0a26a05ac15a44d3e14738fc55 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:12 +0900 Subject: [PATCH 0014/1307] firewire: core: use reference counting to invoke address handlers safely The lifetime of address handler has been managed by linked list and RCU. This approach was introduced in commit 35202f7d8420 ("firewire: remove global lock around address handlers, convert to RCU"). The invocations of address handler are performed within RCU read-side critical sections. In commit 57e6d9f85fff ("firewire: ohci: use workqueue to handle events of AR request/response contexts"), the invocations are in a workqueue context. The approach still imposes limitation that sleeping is not allowed within RCU read-side critical sections. However, since sleeping is not permitted within RCU read-side critical sections, this approach still has a limitation. This commit adds reference counting to decouple handler invocation from handler discovery. The linked list and RCU is used to discover the handlers, while the reference counting is used to invoke them safely. Link: https://lore.kernel.org/r/20250803122015.236493-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 32 +++++++++++++++++++++++++++-- include/linux/firewire.h | 4 ++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index d28477d84697..29ca9f3f14ce 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -550,6 +550,23 @@ const struct fw_address_region fw_unit_space_region = { .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, }; #endif /* 0 */ +static void complete_address_handler(struct kref *kref) +{ + struct fw_address_handler *handler = container_of(kref, struct fw_address_handler, kref); + + complete(&handler->done); +} + +static void get_address_handler(struct fw_address_handler *handler) +{ + kref_get(&handler->kref); +} + +static int put_address_handler(struct fw_address_handler *handler) +{ + return kref_put(&handler->kref, complete_address_handler); +} + /** * fw_core_add_address_handler() - register for incoming requests * @handler: callback @@ -596,6 +613,8 @@ int fw_core_add_address_handler(struct fw_address_handler *handler, if (other != NULL) { handler->offset += other->length; } else { + init_completion(&handler->done); + kref_init(&handler->kref); list_add_tail_rcu(&handler->link, &address_handler_list); ret = 0; break; @@ -621,6 +640,9 @@ void fw_core_remove_address_handler(struct fw_address_handler *handler) list_del_rcu(&handler->link); synchronize_rcu(); + + if (!put_address_handler(handler)) + wait_for_completion(&handler->done); } EXPORT_SYMBOL(fw_core_remove_address_handler); @@ -913,10 +935,13 @@ static void handle_exclusive_region_request(struct fw_card *card, scoped_guard(rcu) { handler = lookup_enclosing_address_handler(&address_handler_list, offset, request->length); - if (handler) + if (handler) { + get_address_handler(handler); handler->address_callback(card, request, tcode, destination, source, p->generation, offset, request->data, request->length, handler->callback_data); + put_address_handler(handler); + } } if (!handler) @@ -952,10 +977,13 @@ static void handle_fcp_region_request(struct fw_card *card, scoped_guard(rcu) { list_for_each_entry_rcu(handler, &address_handler_list, link) { - if (is_enclosing_handler(handler, offset, request->length)) + if (is_enclosing_handler(handler, offset, request->length)) { + get_address_handler(handler); handler->address_callback(card, request, tcode, destination, source, p->generation, offset, request->data, request->length, handler->callback_data); + put_address_handler(handler); + } } } diff --git a/include/linux/firewire.h b/include/linux/firewire.h index cceb70415ed2..d38c6e538e5c 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -341,7 +341,11 @@ struct fw_address_handler { u64 length; fw_address_callback_t address_callback; void *callback_data; + + // Only for core functions. struct list_head link; + struct kref kref; + struct completion done; }; struct fw_address_region { From e8cf6875005b017c293bf1b9be707c43f3eff9f4 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:13 +0900 Subject: [PATCH 0015/1307] firewire: core: call handler for exclusive regions outside RCU read-side critical section The previous commit added reference counting to ensure safe invocations of address handlers. This commit moves the invocation of handlers for exclusive regions outside of the RCU read-side critical section. The address handler for the requested region is selected within the critical section, then invoked outside of it. Link: https://lore.kernel.org/r/20250803122015.236493-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 29ca9f3f14ce..a742971c65fa 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -935,17 +935,19 @@ static void handle_exclusive_region_request(struct fw_card *card, scoped_guard(rcu) { handler = lookup_enclosing_address_handler(&address_handler_list, offset, request->length); - if (handler) { + if (handler) get_address_handler(handler); - handler->address_callback(card, request, tcode, destination, source, - p->generation, offset, request->data, - request->length, handler->callback_data); - put_address_handler(handler); - } } - if (!handler) + if (!handler) { fw_send_response(card, request, RCODE_ADDRESS_ERROR); + return; + } + + // Outside the RCU read-side critical section. Without spinlock. With reference count. + handler->address_callback(card, request, tcode, destination, source, p->generation, offset, + request->data, request->length, handler->callback_data); + put_address_handler(handler); } static void handle_fcp_region_request(struct fw_card *card, From e884a8a0c573ca5c191b269f31993733ecb6250e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:14 +0900 Subject: [PATCH 0016/1307] firewire: core: call FCP address handlers outside RCU read-side critical section The former commit added reference counting to ensure safe invocations of address handlers. Unlike the exclusive-region address handlers, all FCP address handlers should be called on receiving an FCP request. This commit uses the part of kernel stack to collect address handlers up to 4 within the section, then invoke them outside of the section. Reference counting ensures that each handler remains valid and safe to call. Lifting the limitation of supporting only 4 handlers is left for next work. Link: https://lore.kernel.org/r/20250803122015.236493-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index a742971c65fa..7a62c660e912 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -950,13 +950,17 @@ static void handle_exclusive_region_request(struct fw_card *card, put_address_handler(handler); } +// To use kmalloc allocator efficiently, this should be power of two. +#define BUFFER_ON_KERNEL_STACK_SIZE 4 + static void handle_fcp_region_request(struct fw_card *card, struct fw_packet *p, struct fw_request *request, unsigned long long offset) { - struct fw_address_handler *handler; - int tcode, destination, source; + struct fw_address_handler *buffer_on_kernel_stack[BUFFER_ON_KERNEL_STACK_SIZE]; + struct fw_address_handler *handler, **handlers; + int tcode, destination, source, i, count; if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) || @@ -977,18 +981,27 @@ static void handle_fcp_region_request(struct fw_card *card, return; } + count = 0; + handlers = buffer_on_kernel_stack; scoped_guard(rcu) { list_for_each_entry_rcu(handler, &address_handler_list, link) { if (is_enclosing_handler(handler, offset, request->length)) { get_address_handler(handler); - handler->address_callback(card, request, tcode, destination, source, - p->generation, offset, request->data, - request->length, handler->callback_data); - put_address_handler(handler); + handlers[count] = handler; + if (++count >= ARRAY_SIZE(buffer_on_kernel_stack)) + break; } } } + for (i = 0; i < count; ++i) { + handler = handlers[i]; + handler->address_callback(card, request, tcode, destination, source, + p->generation, offset, request->data, + request->length, handler->callback_data); + put_address_handler(handler); + } + fw_send_response(card, request, RCODE_COMPLETE); } From 0342273e14c25971f2916de2b598db2e9cfeec15 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 Aug 2025 21:20:15 +0900 Subject: [PATCH 0017/1307] firewire: core: reallocate buffer for FCP address handlers when more than 4 are registered The former commit has a limitation that only up to 4 FCP address handlers could be processed per request. Although it suffices for most use cases, it is technically a regression. This commit lifts the restriction by reallocating the buffer from kernel heap when more than 4 handlers are registered. The allocation is performed within RCU read-side critical section, thus it uses GCP_ATOMIC flag. The buffer size is rounded up to the next power of two to align with kmalloc allocation units. Link: https://lore.kernel.org/r/20250803122015.236493-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 36 +++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 7a62c660e912..1d1c2d8f85ae 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -960,7 +960,7 @@ static void handle_fcp_region_request(struct fw_card *card, { struct fw_address_handler *buffer_on_kernel_stack[BUFFER_ON_KERNEL_STACK_SIZE]; struct fw_address_handler *handler, **handlers; - int tcode, destination, source, i, count; + int tcode, destination, source, i, count, buffer_size; if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) || @@ -983,13 +983,38 @@ static void handle_fcp_region_request(struct fw_card *card, count = 0; handlers = buffer_on_kernel_stack; + buffer_size = ARRAY_SIZE(buffer_on_kernel_stack); scoped_guard(rcu) { list_for_each_entry_rcu(handler, &address_handler_list, link) { if (is_enclosing_handler(handler, offset, request->length)) { + if (count >= buffer_size) { + int next_size = buffer_size * 2; + struct fw_address_handler **buffer_on_kernel_heap; + + if (handlers == buffer_on_kernel_stack) + buffer_on_kernel_heap = NULL; + else + buffer_on_kernel_heap = handlers; + + buffer_on_kernel_heap = + krealloc_array(buffer_on_kernel_heap, next_size, + sizeof(*buffer_on_kernel_heap), GFP_ATOMIC); + // FCP is used for purposes unrelated to significant system + // resources (e.g. storage or networking), so allocation + // failures are not considered so critical. + if (!buffer_on_kernel_heap) + break; + + if (handlers == buffer_on_kernel_stack) { + memcpy(buffer_on_kernel_heap, buffer_on_kernel_stack, + sizeof(buffer_on_kernel_stack)); + } + + handlers = buffer_on_kernel_heap; + buffer_size = next_size; + } get_address_handler(handler); - handlers[count] = handler; - if (++count >= ARRAY_SIZE(buffer_on_kernel_stack)) - break; + handlers[count++] = handler; } } } @@ -1002,6 +1027,9 @@ static void handle_fcp_region_request(struct fw_card *card, put_address_handler(handler); } + if (handlers != buffer_on_kernel_stack) + kfree(handlers); + fw_send_response(card, request, RCODE_COMPLETE); } From 584460393efbcccb6388b1cd5d37284b5326709c Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Wed, 16 Jul 2025 20:55:55 +0800 Subject: [PATCH 0018/1307] drm/bridge: Describe the newly introduced drm_connector parameter for drm_bridge_detect This fix the make htmldocs warnings: drivers/gpu/drm/drm_bridge.c:1242: warning: Function parameter or struct member 'connector' not described in 'drm_bridge_detect' Fixes: 5d156a9c3d5e ("drm/bridge: Pass down connector to drm bridge detect hook") Signed-off-by: Andy Yan Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250716125602.3166573-1-andyshrk@163.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/drm_bridge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index dd45d9b504d8..4bde00083047 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -1227,6 +1227,7 @@ EXPORT_SYMBOL(drm_atomic_bridge_chain_check); /** * drm_bridge_detect - check if anything is attached to the bridge output * @bridge: bridge control structure + * @connector: attached connector * * If the bridge supports output detection, as reported by the * DRM_BRIDGE_OP_DETECT bridge ops flag, call &drm_bridge_funcs.detect for the From e4a718a3a47e89805c3be9d46a84de1949a98d5d Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Wed, 23 Jul 2025 10:09:07 +0800 Subject: [PATCH 0019/1307] tee: fix NULL pointer dereference in tee_shm_put tee_shm_put have NULL pointer dereference: __optee_disable_shm_cache --> shm = reg_pair_to_ptr(...);//shm maybe return NULL tee_shm_free(shm); --> tee_shm_put(shm);//crash Add check in tee_shm_put to fix it. panic log: Unable to handle kernel paging request at virtual address 0000000000100cca Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000002049d07000 [0000000000100cca] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP CPU: 2 PID: 14442 Comm: systemd-sleep Tainted: P OE ------- ---- 6.6.0-39-generic #38 Source Version: 938b255f6cb8817c95b0dd5c8c2944acfce94b07 Hardware name: greatwall GW-001Y1A-FTH, BIOS Great Wall BIOS V3.0 10/26/2022 pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : tee_shm_put+0x24/0x188 lr : tee_shm_free+0x14/0x28 sp : ffff001f98f9faf0 x29: ffff001f98f9faf0 x28: ffff0020df543cc0 x27: 0000000000000000 x26: ffff001f811344a0 x25: ffff8000818dac00 x24: ffff800082d8d048 x23: ffff001f850fcd18 x22: 0000000000000001 x21: ffff001f98f9fb88 x20: ffff001f83e76218 x19: ffff001f83e761e0 x18: 000000000000ffff x17: 303a30303a303030 x16: 0000000000000000 x15: 0000000000000003 x14: 0000000000000001 x13: 0000000000000000 x12: 0101010101010101 x11: 0000000000000001 x10: 0000000000000001 x9 : ffff800080e08d0c x8 : ffff001f98f9fb88 x7 : 0000000000000000 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 x2 : ffff001f83e761e0 x1 : 00000000ffff001f x0 : 0000000000100cca Call trace: tee_shm_put+0x24/0x188 tee_shm_free+0x14/0x28 __optee_disable_shm_cache+0xa8/0x108 optee_shutdown+0x28/0x38 platform_shutdown+0x28/0x40 device_shutdown+0x144/0x2b0 kernel_power_off+0x3c/0x80 hibernate+0x35c/0x388 state_store+0x64/0x80 kobj_attr_store+0x14/0x28 sysfs_kf_write+0x48/0x60 kernfs_fop_write_iter+0x128/0x1c0 vfs_write+0x270/0x370 ksys_write+0x6c/0x100 __arm64_sys_write+0x20/0x30 invoke_syscall+0x4c/0x120 el0_svc_common.constprop.0+0x44/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x24/0x88 el0t_64_sync_handler+0x134/0x150 el0t_64_sync+0x14c/0x15 Fixes: dfd0743f1d9e ("tee: handle lookup of shm with reference count 0") Signed-off-by: Pei Xiao Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/tee_shm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index daf6e5cfd59a..915239b033f5 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -560,9 +560,13 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id); */ void tee_shm_put(struct tee_shm *shm) { - struct tee_device *teedev = shm->ctx->teedev; + struct tee_device *teedev; bool do_release = false; + if (!shm || !shm->ctx || !shm->ctx->teedev) + return; + + teedev = shm->ctx->teedev; mutex_lock(&teedev->mutex); if (refcount_dec_and_test(&shm->refcount)) { /* From 50a74d0095cd23d2012133e208df45a298868870 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Wed, 23 Jul 2025 14:22:41 +0800 Subject: [PATCH 0020/1307] tee: fix memory leak in tee_dyn_shm_alloc_helper When shm_register() fails in tee_dyn_shm_alloc_helper(), the pre-allocated pages array is not freed, resulting in a memory leak. Fixes: cf4441503e20 ("tee: optee: Move pool_op helper functions") Signed-off-by: Pei Xiao Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/tee_shm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 915239b033f5..2a7d253d9c55 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -230,7 +230,7 @@ int tee_dyn_shm_alloc_helper(struct tee_shm *shm, size_t size, size_t align, pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); if (!pages) { rc = -ENOMEM; - goto err; + goto err_pages; } for (i = 0; i < nr_pages; i++) @@ -243,11 +243,13 @@ int tee_dyn_shm_alloc_helper(struct tee_shm *shm, size_t size, size_t align, rc = shm_register(shm->ctx, shm, pages, nr_pages, (unsigned long)shm->kaddr); if (rc) - goto err; + goto err_kfree; } return 0; -err: +err_kfree: + kfree(pages); +err_pages: free_pages_exact(shm->kaddr, shm->size); shm->kaddr = NULL; return rc; From 6bc829220b33da8522572cc50fdf5067c51d3bf3 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Fri, 1 Aug 2025 09:58:35 +0200 Subject: [PATCH 0021/1307] spi: spi-qpic-snand: use correct CW_PER_PAGE value for OOB write The qcom_spi_program_oob() function uses only the last codeword to write the OOB data into the flash, but it sets the CW_PER_PAGE field in the CFG0 register as it would use all codewords. It seems that this confuses the hardware somehow, and any access to the flash fails with a timeout error after the function is called. The problem can be easily reproduced with the following commands: # dd if=/dev/zero bs=2176 count=1 > /tmp/test.bin 1+0 records in 1+0 records out # flash_erase /dev/mtd4 0 0 Erasing 128 Kibyte @ 0 -- 100 % complete # nandwrite -O /dev/mtd4 /tmp/test.bin Writing data to block 0 at offset 0x0 # nanddump -o /dev/mtd4 >/dev/null ECC failed: 0 ECC corrected: 0 Number of bad blocks: 0 Number of bbt blocks: 0 Block size 131072, page size 2048, OOB size 128 Dumping data starting at 0x00000000 and ending at 0x00020000... [ 33.197605] qcom_snand 79b0000.spi: failure to read oob libmtd: error!: MEMREADOOB64 ioctl failed for mtd4, offset 0 (eraseblock 0) error 110 (Operation timed out) [ 35.277582] qcom_snand 79b0000.spi: failure in submitting cmd descriptor libmtd: error!: cannot read 2048 bytes from mtd4 (eraseblock 0, offset 2048) error 110 (Operation timed out) nanddump: error!: mtd_read Change the code to use the correct CW_PER_PAGE value to avoid this. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Link: https://patch.msgid.link/20250801-qpic-snand-oob-cwpp-fix-v1-1-f5a41b86af2e@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 0cfa0d960fd3..5216d60e01aa 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -1196,7 +1196,7 @@ static int qcom_spi_program_oob(struct qcom_nand_controller *snandc, u32 cfg0, cfg1, ecc_bch_cfg, ecc_buf_cfg; cfg0 = (ecc_cfg->cfg0 & ~CW_PER_PAGE_MASK) | - FIELD_PREP(CW_PER_PAGE_MASK, num_cw - 1); + FIELD_PREP(CW_PER_PAGE_MASK, 0); cfg1 = ecc_cfg->cfg1; ecc_bch_cfg = ecc_cfg->ecc_bch_cfg; ecc_buf_cfg = ecc_cfg->ecc_buf_cfg; From 72332439e6b0a39e763d4604e71774ab83423275 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 4 Aug 2025 11:50:13 +0200 Subject: [PATCH 0022/1307] spi: spi-mem: Add missing kdoc argument The "*mem" pointer has been added without description, describe it in the kdoc comment in order to fix the following W=1 warning: Warning: drivers/spi/spi-mem.c:594 function parameter 'mem' not described in 'spi_mem_calc_op_duration' Fixes: a11a51896572 ("spi: spi-mem: Take into account the actual maximum frequency") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507310409.fnuQ21qb-lkp@intel.com/ Signed-off-by: Miquel Raynal Link: https://patch.msgid.link/20250804095013.409700-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 5db0639d3b01..dfa8ab1ec80f 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -577,6 +577,7 @@ EXPORT_SYMBOL_GPL(spi_mem_adjust_op_freq); * spi_mem_calc_op_duration() - Derives the theoretical length (in ns) of an * operation. This helps finding the best variant * among a list of possible choices. + * @mem: the SPI memory * @op: the operation to benchmark * * Some chips have per-op frequency limitations, PCBs usually have their own From 07f557f60a9a4e15288c29a2924e19e44200db51 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 Jul 2025 11:18:28 +0100 Subject: [PATCH 0023/1307] KVM: arm64: nv: Properly check ESR_EL2.VNCR on taking a VNCR_EL2 related fault Instead of checking for the ESR_EL2.VNCR bit being set (the only case we should be here), we are actually testing random bits in ESR_EL2.DFSC. 13 obviously being a lucky number, it matches both permission and translation fault status codes, which explains why we never saw it failing. This was found by inspection, while reviewing a vaguely related patch. Whilst we're at it, turn the BUG_ON() into a WARN_ON_ONCE(), as exploding here is just silly. Fixes: 069a05e535496 ("KVM: arm64: nv: Handle VNCR_EL2-triggered faults") Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20250730101828.1168707-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index efb37aad11ec..74a2a94dad9b 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1287,7 +1287,7 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu) struct vncr_tlb *vt = vcpu->arch.vncr_tlb; u64 esr = kvm_vcpu_get_esr(vcpu); - BUG_ON(!(esr & ESR_ELx_VNCR_SHIFT)); + WARN_ON_ONCE(!(esr & ESR_ELx_VNCR)); if (esr_fsc_is_permission_fault(esr)) { inject_vncr_perm(vcpu); From 69f8fe955d0b4a951c3726669ab58360ed562484 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 29 Jul 2025 11:23:42 -0700 Subject: [PATCH 0024/1307] KVM: arm64: nv: Handle SEAs due to VNCR redirection System register accesses redirected to the VNCR page can also generate external aborts just like any other form of memory access. Route to kvm_handle_guest_sea() for potential APEI handling, falling back to a vSError if the kernel didn't handle the abort. Take the opportunity to throw out the useless kvm_ras.h which provided a helper with a single callsite... Cc: Jiaqi Yan Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250729182342.3281742-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_mmu.h | 1 + arch/arm64/include/asm/kvm_ras.h | 25 ------------------------- arch/arm64/kvm/mmu.c | 29 +++++++++++++++++------------ arch/arm64/kvm/nested.c | 3 +++ 4 files changed, 21 insertions(+), 37 deletions(-) delete mode 100644 arch/arm64/include/asm/kvm_ras.h diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ae563ebd6aee..e4069f2ce642 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -180,6 +180,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); +int kvm_handle_guest_sea(struct kvm_vcpu *vcpu); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h deleted file mode 100644 index 9398ade632aa..000000000000 --- a/arch/arm64/include/asm/kvm_ras.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2018 - Arm Ltd */ - -#ifndef __ARM64_KVM_RAS_H__ -#define __ARM64_KVM_RAS_H__ - -#include -#include -#include - -#include - -/* - * Was this synchronous external abort a RAS notification? - * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. - */ -static inline int kvm_handle_guest_sea(void) -{ - /* apei_claim_sea(NULL) expects to mask interrupts itself */ - lockdep_assert_irqs_enabled(); - - return apei_claim_sea(NULL); -} - -#endif /* __ARM64_KVM_RAS_H__ */ diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1c78864767c5..9a45daf817bf 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -4,19 +4,20 @@ * Author: Christoffer Dall */ +#include #include #include #include #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include @@ -1811,6 +1812,19 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) read_unlock(&vcpu->kvm->mmu_lock); } +int kvm_handle_guest_sea(struct kvm_vcpu *vcpu) +{ + /* + * Give APEI the opportunity to claim the abort before handling it + * within KVM. apei_claim_sea() expects to be called with IRQs enabled. + */ + lockdep_assert_irqs_enabled(); + if (apei_claim_sea(NULL) == 0) + return 1; + + return kvm_inject_serror(vcpu); +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1834,17 +1848,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; - /* Synchronous External Abort? */ - if (kvm_vcpu_abt_issea(vcpu)) { - /* - * For RAS the host kernel may handle this abort. - * There is no need to pass the error into the guest. - */ - if (kvm_handle_guest_sea()) - return kvm_inject_serror(vcpu); - - return 1; - } + if (kvm_vcpu_abt_issea(vcpu)) + return kvm_handle_guest_sea(vcpu); esr = kvm_vcpu_get_esr(vcpu); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 74a2a94dad9b..64b980dbfbf8 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1289,6 +1289,9 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!(esr & ESR_ELx_VNCR)); + if (kvm_vcpu_abt_issea(vcpu)) + return kvm_handle_guest_sea(vcpu); + if (esr_fsc_is_permission_fault(esr)) { inject_vncr_perm(vcpu); } else if (esr_fsc_is_translation_fault(esr)) { From eec8e8c048caa826ecbde7bf40f0ac2d11eef99d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 1 Aug 2025 13:46:41 +0300 Subject: [PATCH 0025/1307] drm/bridge: document HDMI CEC callbacks Provide documentation for the drm_bridge callbacks related to the DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag. Fixes: a74288c8ded7 ("drm/display: bridge-connector: handle CEC adapters") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/r/20250611140933.1429a1b8@canb.auug.org.au Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250801-drm-hdmi-cec-docs-v1-1-be63e6008d0e@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 48 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 8ed80cad77ec..b0e6653ee42e 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -866,13 +866,61 @@ struct drm_bridge_funcs { struct drm_connector *connector, bool enable, int direction); + /** + * @hdmi_cec_init: + * + * Initialize CEC part of the bridge. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag in their + * &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ int (*hdmi_cec_init)(struct drm_bridge *bridge, struct drm_connector *connector); + /** + * @hdmi_cec_enable: + * + * Enable or disable the CEC adapter inside the bridge. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag in their + * &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ int (*hdmi_cec_enable)(struct drm_bridge *bridge, bool enable); + /** + * @hdmi_cec_log_addr: + * + * Set the logical address of the CEC adapter inside the bridge. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag in their + * &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ int (*hdmi_cec_log_addr)(struct drm_bridge *bridge, u8 logical_addr); + /** + * @hdmi_cec_transmit: + * + * Transmit the message using the CEC adapter inside the bridge. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_CEC_ADAPTER flag in their + * &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ int (*hdmi_cec_transmit)(struct drm_bridge *bridge, u8 attempts, u32 signal_free_time, struct cec_msg *msg); From f9a348e0de19226fc3c7e81de7677d3fa2c4b2d8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2025 09:34:29 -0400 Subject: [PATCH 0026/1307] nfsd: don't set the ctime on delegated atime updates Clients will typically precede a DELEGRETURN for a delegation with delegated timestamp with a SETATTR to set the timestamps on the server to match what the client has. knfsd implements this by using the nfsd_setattr() infrastructure, which will set ATTR_CTIME on any update that goes to notify_change(). This is problematic as it means that the client will get a spurious ctime update when updating the atime. POSIX unfortunately doesn't phrase it succinctly, but updating the atime due to reads should not update the ctime. In this case, the client is sending a SETATTR to update the atime on the server to match its latest value. The ctime should not be advanced in this case as that would incorrectly indicate a change to the inode. Fix this by not implicitly setting ATTR_CTIME when ATTR_DELEG is set in __nfsd_setattr(). The decoder for FATTR4_WORD2_TIME_DELEG_MODIFY already sets ATTR_CTIME, so this is sufficient to make it skip setting the ctime on atime-only updates. Fixes: 7e13f4f8d27d ("nfsd: handle delegated timestamps in SETATTR") Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ee78b6fb1709..eaf04751d07f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -470,7 +470,15 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) if (!iap->ia_valid) return 0; - iap->ia_valid |= ATTR_CTIME; + /* + * If ATTR_DELEG is set, then this is an update from a client that + * holds a delegation. If this is an update for only the atime, the + * ctime should not be changed. If the update contains the mtime + * too, then ATTR_CTIME should already be set. + */ + if (!(iap->ia_valid & ATTR_DELEG)) + iap->ia_valid |= ATTR_CTIME; + return notify_change(&nop_mnt_idmap, dentry, iap, NULL); } From e5a73150776f18547ee685c9f6bfafe549714899 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 18 Jul 2025 11:26:14 +1000 Subject: [PATCH 0027/1307] nfsd: avoid ref leak in nfsd_open_local_fh() If two calls to nfsd_open_local_fh() race and both successfully call nfsd_file_acquire_local(), they will both get an extra reference to the net to accompany the file reference stored in *pnf. One of them will fail to store (using xchg()) the file reference in *pnf and will drop that reference but WON'T drop the accompanying reference to the net. This leak means that when the nfs server is shut down it will hang in nfsd_shutdown_net() waiting for &nn->nfsd_net_free_done. This patch adds the missing nfsd_net_put(). Reported-by: Mike Snitzer Fixes: e6f7e1487ab5 ("nfs_localio: simplify interface to nfsd for getting nfsd_file") Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Tested-by: Mike Snitzer Reviewed-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/localio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 4f6468eb2adf..cb237f1b902a 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -103,10 +103,11 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (nfsd_file_get(new) == NULL) goto again; /* - * Drop the ref we were going to install and the - * one we were going to return. + * Drop the ref we were going to install (both file and + * net) and the one we were going to return (only file). */ nfsd_file_put(localio); + nfsd_net_put(net); nfsd_file_put(localio); localio = new; } From 475356fe2814f2f0b188da8bf0f1fcc579d81272 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 28 Jul 2025 22:11:54 +0200 Subject: [PATCH 0028/1307] kasan/test: fix protection against compiler elision The kunit test is using assignments to "static volatile void *kasan_ptr_result" to prevent elision of memory loads, but that's not working: In this variable definition, the "volatile" applies to the "void", not to the pointer. To make "volatile" apply to the pointer as intended, it must follow after the "*". This makes the kasan_memchr test pass again on my system. The kasan_strings test is still failing because all the definitions of load_unaligned_zeropad() are lacking explicit instrumentation hooks and ASAN does not instrument asm() memory operands. Link: https://lkml.kernel.org/r/20250728-kasan-kunit-fix-volatile-v1-1-e7157c9af82d@google.com Fixes: 5f1c8108e7ad ("mm:kasan: fix sparse warnings: Should it be static?") Signed-off-by: Jann Horn Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Jann Horn Cc: Nihar Chaithanya Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- mm/kasan/kasan_test_c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 2aa12dfa427a..e0968acc03aa 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -47,7 +47,7 @@ static struct { * Some tests use these global variables to store return values from function * calls that could otherwise be eliminated by the compiler as dead code. */ -static volatile void *kasan_ptr_result; +static void *volatile kasan_ptr_result; static volatile int kasan_int_result; /* Probe for console output: obtains test_status lines of interest. */ From 47b0f6d8f0d2be4d311a49e13d2fd5f152f492b2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 31 Jul 2025 02:57:18 -0700 Subject: [PATCH 0029/1307] mm/kmemleak: avoid deadlock by moving pr_warn() outside kmemleak_lock When netpoll is enabled, calling pr_warn_once() while holding kmemleak_lock in mem_pool_alloc() can cause a deadlock due to lock inversion with the netconsole subsystem. This occurs because pr_warn_once() may trigger netpoll, which eventually leads to __alloc_skb() and back into kmemleak code, attempting to reacquire kmemleak_lock. This is the path for the deadlock. mem_pool_alloc() -> raw_spin_lock_irqsave(&kmemleak_lock, flags); -> pr_warn_once() -> netconsole subsystem -> netpoll -> __alloc_skb -> __create_object -> raw_spin_lock_irqsave(&kmemleak_lock, flags); Fix this by setting a flag and issuing the pr_warn_once() after kmemleak_lock is released. Link: https://lkml.kernel.org/r/20250731-kmemleak_lock-v1-1-728fd470198f@debian.org Fixes: c5665868183f ("mm: kmemleak: use the memory pool for early allocations") Signed-off-by: Breno Leitao Reported-by: Jakub Kicinski Acked-by: Catalin Marinas Cc: Signed-off-by: Andrew Morton --- mm/kmemleak.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 8d588e685311..e0333455c738 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -470,6 +470,7 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) { unsigned long flags; struct kmemleak_object *object; + bool warn = false; /* try the slab allocator first */ if (object_cache) { @@ -488,8 +489,10 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) else if (mem_pool_free_count) object = &mem_pool[--mem_pool_free_count]; else - pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); + warn = true; raw_spin_unlock_irqrestore(&kmemleak_lock, flags); + if (warn) + pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); return object; } From 5a309dbf1f829de7f8dc84a518d0b6e7e9be9994 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 30 Jul 2025 23:25:08 +0900 Subject: [PATCH 0030/1307] MAINTAINERS: add Masami as a reviewer of hung task detector Since I'm actively working on hung task blocker detector, add myself to a reviewer of the HUNG TASK DETECTOR feature. Link: https://lkml.kernel.org/r/175388550841.627474.3260499035226455392.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Acked-by: Lance Yang Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1b57dd4fcf01..3f957983c192 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11438,6 +11438,7 @@ F: drivers/tty/hvc/ HUNG TASK DETECTOR M: Andrew Morton R: Lance Yang +R: Masami Hiramatsu L: linux-kernel@vger.kernel.org S: Maintained F: include/linux/hung_task.h From d1534ae23c2b6be350c8ab060803fbf6e9682adc Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 28 Jul 2025 15:02:48 -0400 Subject: [PATCH 0031/1307] mm/kmemleak: avoid soft lockup in __kmemleak_do_cleanup() A soft lockup warning was observed on a relative small system x86-64 system with 16 GB of memory when running a debug kernel with kmemleak enabled. watchdog: BUG: soft lockup - CPU#8 stuck for 33s! [kworker/8:1:134] The test system was running a workload with hot unplug happening in parallel. Then kemleak decided to disable itself due to its inability to allocate more kmemleak objects. The debug kernel has its CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE set to 40,000. The soft lockup happened in kmemleak_do_cleanup() when the existing kmemleak objects were being removed and deleted one-by-one in a loop via a workqueue. In this particular case, there are at least 40,000 objects that need to be processed and given the slowness of a debug kernel and the fact that a raw_spinlock has to be acquired and released in __delete_object(), it could take a while to properly handle all these objects. As kmemleak has been disabled in this case, the object removal and deletion process can be further optimized as locking isn't really needed. However, it is probably not worth the effort to optimize for such an edge case that should rarely happen. So the simple solution is to call cond_resched() at periodic interval in the iteration loop to avoid soft lockup. Link: https://lkml.kernel.org/r/20250728190248.605750-1-longman@redhat.com Signed-off-by: Waiman Long Acked-by: Catalin Marinas Cc: Signed-off-by: Andrew Morton --- mm/kmemleak.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e0333455c738..84265983f239 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -2184,6 +2184,7 @@ static const struct file_operations kmemleak_fops = { static void __kmemleak_do_cleanup(void) { struct kmemleak_object *object, *tmp; + unsigned int cnt = 0; /* * Kmemleak has already been disabled, no need for RCU list traversal @@ -2192,6 +2193,10 @@ static void __kmemleak_do_cleanup(void) list_for_each_entry_safe(object, tmp, &object_list, object_list) { __remove_object(object); __delete_object(object); + + /* Call cond_resched() once per 64 iterations to avoid soft lockup */ + if (!(++cnt & 0x3f)) + cond_resched(); } } From 366a4532d96fc357998465133db34d34edb79e4c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 5 Aug 2025 11:54:47 +0800 Subject: [PATCH 0032/1307] mm: fix the race between collapse and PT_RECLAIM under per-vma lock The check_pmd_still_valid() call during collapse is currently only protected by the mmap_lock in write mode, which was sufficient when pt_reclaim always ran under mmap_lock in read mode. However, since madvise_dontneed can now execute under a per-VMA lock, this assumption is no longer valid. As a result, a race condition can occur between collapse and PT_RECLAIM, potentially leading to a kernel panic. [ 38.151897] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] SMP KASI [ 38.153519] KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] [ 38.154605] CPU: 0 UID: 0 PID: 721 Comm: repro Not tainted 6.16.0-next-20250801-next-2025080 #1 PREEMPT(voluntary) [ 38.155929] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org4 [ 38.157418] RIP: 0010:kasan_byte_accessible+0x15/0x30 [ 38.158125] Code: 03 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 48 b8 00 00 00 00 00 fc0 [ 38.160461] RSP: 0018:ffff88800feef678 EFLAGS: 00010286 [ 38.161220] RAX: dffffc0000000000 RBX: 0000000000000001 RCX: 1ffffffff0dde60c [ 38.162232] RDX: 0000000000000000 RSI: ffffffff85da1e18 RDI: dffffc0000000003 [ 38.163176] RBP: ffff88800feef698 R08: 0000000000000001 R09: 0000000000000000 [ 38.164195] R10: 0000000000000000 R11: ffff888016a8ba58 R12: 0000000000000018 [ 38.165189] R13: 0000000000000018 R14: ffffffff85da1e18 R15: 0000000000000000 [ 38.166100] FS: 0000000000000000(0000) GS:ffff8880e3b40000(0000) knlGS:0000000000000000 [ 38.167137] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 38.167891] CR2: 00007f97fadfe504 CR3: 0000000007088005 CR4: 0000000000770ef0 [ 38.168812] PKRU: 55555554 [ 38.169275] Call Trace: [ 38.169647] [ 38.169975] ? __kasan_check_byte+0x19/0x50 [ 38.170581] lock_acquire+0xea/0x310 [ 38.171083] ? rcu_is_watching+0x19/0xc0 [ 38.171615] ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20 [ 38.172343] ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30 [ 38.173130] _raw_spin_lock+0x38/0x50 [ 38.173707] ? __pte_offset_map_lock+0x1a2/0x3c0 [ 38.174390] __pte_offset_map_lock+0x1a2/0x3c0 [ 38.174987] ? __pfx___pte_offset_map_lock+0x10/0x10 [ 38.175724] ? __pfx_pud_val+0x10/0x10 [ 38.176308] ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30 [ 38.177183] unmap_page_range+0xb60/0x43e0 [ 38.177824] ? __pfx_unmap_page_range+0x10/0x10 [ 38.178485] ? mas_next_slot+0x133a/0x1a50 [ 38.179079] unmap_single_vma.constprop.0+0x15b/0x250 [ 38.179830] unmap_vmas+0x1fa/0x460 [ 38.180373] ? __pfx_unmap_vmas+0x10/0x10 [ 38.180994] ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20 [ 38.181877] exit_mmap+0x1a2/0xb40 [ 38.182396] ? lock_release+0x14f/0x2c0 [ 38.182929] ? __pfx_exit_mmap+0x10/0x10 [ 38.183474] ? __pfx___mutex_unlock_slowpath+0x10/0x10 [ 38.184188] ? mutex_unlock+0x16/0x20 [ 38.184704] mmput+0x132/0x370 [ 38.185208] do_exit+0x7e7/0x28c0 [ 38.185682] ? __this_cpu_preempt_check+0x21/0x30 [ 38.186328] ? do_group_exit+0x1d8/0x2c0 [ 38.186873] ? __pfx_do_exit+0x10/0x10 [ 38.187401] ? __this_cpu_preempt_check+0x21/0x30 [ 38.188036] ? _raw_spin_unlock_irq+0x2c/0x60 [ 38.188634] ? lockdep_hardirqs_on+0x89/0x110 [ 38.189313] do_group_exit+0xe4/0x2c0 [ 38.189831] __x64_sys_exit_group+0x4d/0x60 [ 38.190413] x64_sys_call+0x2174/0x2180 [ 38.190935] do_syscall_64+0x6d/0x2e0 [ 38.191449] entry_SYSCALL_64_after_hwframe+0x76/0x7e This patch moves the vma_start_write() call to precede check_pmd_still_valid(), ensuring that the check is also properly protected by the per-VMA lock. Link: https://lkml.kernel.org/r/20250805035447.7958-1-21cnbao@gmail.com Fixes: a6fde7add78d ("mm: use per_vma lock for MADV_DONTNEED") Signed-off-by: Barry Song Tested-by: "Lai, Yi" Reported-by: "Lai, Yi" Closes: https://lore.kernel.org/all/aJAFrYfyzGpbm+0m@ly-workstation/ Reviewed-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Qi Zheng Cc: Vlastimil Babka Cc: Jann Horn Cc: Suren Baghdasaryan Cc: Lokesh Gidra Cc: Tangquan Zheng Cc: Lance Yang Cc: Zi Yan Cc: Baolin Wang Cc: Liam R. Howlett Cc: Nico Pache Cc: Ryan Roberts Cc: Dev Jain Signed-off-by: Andrew Morton --- mm/khugepaged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 374a6a5193a7..6b40bdfd224c 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1172,11 +1172,11 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, if (result != SCAN_SUCCEED) goto out_up_write; /* check if the pmd is still valid */ + vma_start_write(vma); result = check_pmd_still_valid(mm, address, pmd); if (result != SCAN_SUCCEED) goto out_up_write; - vma_start_write(vma); anon_vma_lock_write(vma->anon_vma); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, address, From 45d19b4b6c2d422771c29b83462d84afcbb33f01 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 24 Jul 2025 17:09:56 +0800 Subject: [PATCH 0033/1307] mm/smaps: fix race between smaps_hugetlb_range and migration smaps_hugetlb_range() handles the pte without holdling ptl, and may be concurrenct with migration, leaing to BUG_ON in pfn_swap_entry_to_page(). The race is as follows. smaps_hugetlb_range migrate_pages huge_ptep_get remove_migration_ptes folio_unlock pfn_swap_entry_folio BUG_ON To fix it, hold ptl lock in smaps_hugetlb_range(). Link: https://lkml.kernel.org/r/20250724090958.455887-1-tujinjiang@huawei.com Link: https://lkml.kernel.org/r/20250724090958.455887-2-tujinjiang@huawei.com Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps") Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3d6d8a9f13fc..55bab10bc779 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1148,10 +1148,13 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; - pte_t ptent = huge_ptep_get(walk->mm, addr, pte); struct folio *folio = NULL; bool present = false; + spinlock_t *ptl; + pte_t ptent; + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); + ptent = huge_ptep_get(walk->mm, addr, pte); if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; @@ -1170,6 +1173,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); } + spin_unlock(ptl); return 0; } #else From aa5a10b070690225317ed4d85413d144abfff750 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 24 Jul 2025 17:09:57 +0800 Subject: [PATCH 0034/1307] fs/proc/task_mmu: hold PTL in pagemap_hugetlb_range and gather_hugetlb_stats Hold PTL in pagemap_hugetlb_range() and gather_hugetlb_stats() to avoid operating on stale page, as pagemap_pmd_range() and gather_pte_stats() have done. Link: https://lkml.kernel.org/r/20250724090958.455887-3-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 55bab10bc779..ee1e4ccd33bd 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2021,12 +2021,14 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, struct pagemapread *pm = walk->private; struct vm_area_struct *vma = walk->vma; u64 flags = 0, frame = 0; + spinlock_t *ptl; int err = 0; pte_t pte; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, ptep); pte = huge_ptep_get(walk->mm, addr, ptep); if (pte_present(pte)) { struct folio *folio = page_folio(pte_page(pte)); @@ -2054,11 +2056,12 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, err = add_to_pagemap(&pme, pm); if (err) - return err; + break; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; } + spin_unlock(ptl); cond_resched(); return err; @@ -3132,17 +3135,22 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { - pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte); + pte_t huge_pte; struct numa_maps *md; struct page *page; + spinlock_t *ptl; + ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); + huge_pte = huge_ptep_get(walk->mm, addr, pte); if (!pte_present(huge_pte)) - return 0; + goto out; page = pte_page(huge_pte); md = walk->private; gather_stats(page, md, pte_dirty(huge_pte), 1); +out: + spin_unlock(ptl); return 0; } From 034d319c8899e8c5c0a35c6692c7fc7e8c12c374 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Tue, 29 Jul 2025 04:27:11 +0530 Subject: [PATCH 0035/1307] scsi: ufs: core: Fix interrupt handling for MCQ Mode Commit 3c7ac40d7322 ("scsi: ufs: core: Delegate the interrupt service routine to a threaded IRQ handler") introduced a regression where the UFS interrupt status register (IS) was not cleared in ufshcd_intr() when operating in MCQ mode. As a result, the IS register remained uncleared. This led to a persistent issue during UIC interrupts: ufshcd_is_auto_hibern8_error() consistently returned true because the UFSHCD_UIC_HIBERN8_MASK bit was set, while the active command was neither UIC_CMD_DME_HIBER_ENTER nor UIC_CMD_DME_HIBER_EXIT. This caused continuous auto hibern8 enter errors and device failed to boot. To fix this, ensure that the interrupt status register is properly cleared in the ufshcd_intr() function for both MCQ mode with ESI enabled. [ 4.553226] ufshcd-qcom 1d84000.ufs: ufshcd_check_errors: Auto Hibern8 Enter failed - status: 0x00000040, upmcrs: 0x00000001 [ 4.553229] ufshcd-qcom 1d84000.ufs: ufshcd_check_errors: saved_err 0x40 saved_uic_err 0x0 [ 4.553311] host_regs: 00000000: d5c7033f 20e0071f 00000400 00000000 [ 4.553312] host_regs: 00000010: 01000000 00010217 00000c96 00000000 [ 4.553314] host_regs: 00000020: 00000440 00170ef5 00000000 00000000 [ 4.553316] host_regs: 00000030: 0000010f 00000001 00000000 00000000 [ 4.553317] host_regs: 00000040: 00000000 00000000 00000000 00000000 [ 4.553319] host_regs: 00000050: fffdf000 0000000f 00000000 00000000 [ 4.553320] host_regs: 00000060: 00000001 80000000 00000000 00000000 [ 4.553322] host_regs: 00000070: fffde000 0000000f 00000000 00000000 [ 4.553323] host_regs: 00000080: 00000001 00000000 00000000 00000000 [ 4.553325] host_regs: 00000090: 00000002 d0020000 00000000 01930200 Fixes: 3c7ac40d7322 ("scsi: ufs: core: Delegate the interrupt service routine to a threaded IRQ handler") Co-developed-by: Palash Kambar Signed-off-by: Palash Kambar Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250728225711.29273-1-quic_nitirawa@quicinc.com Tested-by: Neil Armstrong # on SM8650-QRD Reviewed-by: Bart Van Assche Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 5442bb8540b5..baf28bd748cc 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7133,14 +7133,19 @@ static irqreturn_t ufshcd_threaded_intr(int irq, void *__hba) static irqreturn_t ufshcd_intr(int irq, void *__hba) { struct ufs_hba *hba = __hba; + u32 intr_status, enabled_intr_status; /* Move interrupt handling to thread when MCQ & ESI are not enabled */ if (!hba->mcq_enabled || !hba->mcq_esi_enabled) return IRQ_WAKE_THREAD; + intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); + enabled_intr_status = intr_status & ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + + ufshcd_writel(hba, intr_status, REG_INTERRUPT_STATUS); + /* Directly handle interrupts since MCQ ESI handlers does the hard job */ - return ufshcd_sl_intr(hba, ufshcd_readl(hba, REG_INTERRUPT_STATUS) & - ufshcd_readl(hba, REG_INTERRUPT_ENABLE)); + return ufshcd_sl_intr(hba, enabled_intr_status); } static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag) From a59976116a01dad1c72460f9ed700bf4b3fdbebd Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 31 Jul 2025 13:33:11 +0200 Subject: [PATCH 0036/1307] scsi: lpfc: Fix wrong function reference in a comment Function scsi_host_remove() doesn't exist, the actual function name is scsi_remove_host(). Signed-off-by: Jean Delvare Link: https://lore.kernel.org/r/20250731133311.52034cc4@endymion Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 2797aa75a689..aff6c9d5e7c2 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -666,7 +666,7 @@ lpfc_vport_delete(struct fc_vport *fc_vport) * Take early refcount for outstanding I/O requests we schedule during * delete processing for unreg_vpi. Always keep this before * scsi_remove_host() as we can no longer obtain a reference through - * scsi_host_get() after scsi_host_remove as shost is set to SHOST_DEL. + * scsi_host_get() after scsi_remove_host as shost is set to SHOST_DEL. */ if (!scsi_host_get(shost)) return VPORT_INVAL; From eea6cafb5890db488fce1c69d05464214616d800 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 1 Aug 2025 18:52:02 +0000 Subject: [PATCH 0037/1307] scsi: lpfc: Remove redundant assignment to avoid memory leak Remove the redundant assignment if kzalloc() succeeds to avoid memory leak. Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20250801185202.42631-1-jiashengjiangcool@gmail.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 2c7d876c64c7..47cffdf2a8ac 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -6289,7 +6289,6 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) } phba->nvmeio_trc_on = 1; phba->nvmeio_trc_output_idx = 0; - phba->nvmeio_trc = NULL; } else { nvmeio_off: phba->nvmeio_trc_size = 0; From 7ec2bd6cd2d0ce6d6224519f895cb932ed5af667 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 4 Aug 2025 14:01:54 +0800 Subject: [PATCH 0038/1307] scsi: ufs: mediatek: Fix out-of-bounds access in MCQ IRQ mapping Address a potential out-of-bounds access issue when accessing 'host->mcq_intr_info[q_index]'. The value of 'q_index' might exceed the valid array bounds if 'q_index == nr'. Correct condition to 'q_index >= nr' to prevent accessing invalid memory. Fixes: 66e26a4b8a77 ("scsi: ufs: host: mediatek: Set IRQ affinity policy for MCQ mode") Cc: stable@vger.kernel.org Reported-by: Dan Carpenter Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20250804060249.1387057-1-peter.wang@mediatek.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 86ae73b89d4d..f902ce08c95a 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -818,7 +818,7 @@ static u32 ufs_mtk_mcq_get_irq(struct ufs_hba *hba, unsigned int cpu) unsigned int q_index; q_index = map->mq_map[cpu]; - if (q_index > nr) { + if (q_index >= nr) { dev_err(hba->dev, "hwq index %d exceed %d\n", q_index, nr); return MTK_MCQ_INVALID_IRQ; From 72fc388d8bc0b49fd038477b74618cc15ce18b56 Mon Sep 17 00:00:00 2001 From: Waqar Hameed Date: Tue, 5 Aug 2025 11:33:36 +0200 Subject: [PATCH 0039/1307] scsi: ufs: core: Remove error print for devm_add_action_or_reset() When devm_add_action_or_reset() fails, it is due to a failed memory allocation and will thus return -ENOMEM. dev_err_probe() doesn't do anything when error is -ENOMEM. Therefore, remove the useless call to dev_err_probe() when devm_add_action_or_reset() fails, and just return the value instead. Signed-off-by: Waqar Hameed Link: https://lore.kernel.org/r/pndtt2mkt8v.a.out@axis.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index baf28bd748cc..2e1fa8cf83f5 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10515,8 +10515,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = devm_add_action_or_reset(dev, ufshcd_devres_release, host); if (err) - return dev_err_probe(dev, err, - "failed to add ufshcd dealloc action\n"); + return err; host->nr_maps = HCTX_TYPE_POLL + 1; hba = shost_priv(host); From 42e42562c9cfcdacf000f1b42284a4fad24f8546 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:05:43 +0200 Subject: [PATCH 0040/1307] xfrm: flush all states in xfrm_state_fini While reverting commit f75a2804da39 ("xfrm: destroy xfrm_state synchronously on net exit path"), I incorrectly changed xfrm_state_flush's "proto" argument back to IPSEC_PROTO_ANY. This reverts some of the changes in commit dbb2483b2a46 ("xfrm: clean up xfrm protocol checks"), and leads to some states not being removed when we exit the netns. Pass 0 instead of IPSEC_PROTO_ANY from both xfrm_state_fini xfrm6_tunnel_net_exit, so that xfrm_state_flush deletes all states. Fixes: 2a198bbec691 ("Revert "xfrm: destroy xfrm_state synchronously on net exit path"") Reported-by: syzbot+6641a61fe0e2e89ae8c5@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6641a61fe0e2e89ae8c5 Tested-by: syzbot+6641a61fe0e2e89ae8c5@syzkaller.appspotmail.com Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_tunnel.c | 2 +- net/xfrm/xfrm_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5120a763da0d..0a0eeaed0591 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -334,7 +334,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + xfrm_state_flush(net, 0, false); xfrm_flush_gc(); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 77db3b5fe4ac..78fcbb89cf32 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -3297,7 +3297,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + xfrm_state_flush(net, 0, false); flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); From f8f6e72fe28595969829d63db93ecaa56a0c2811 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 5 Aug 2025 20:57:50 +0300 Subject: [PATCH 0041/1307] drm/omap: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. The patch is based on the driver parts of the patchset at Link: below, which missed converting the omap driver. Due to the absence of this change in the patchset at Link:, after the Fixed: commit below, omap_framebuffer_init() -> drm_helper_mode_fill_fb_struct() set drm_framebuffer::format incorrectly to NULL, which lead to the !fb->format WARN() in drm_framebuffer_init() and causing framebuffer creation to fail. This patch fixes both of these issues. v2: Amend the commit log mentioning the functional issues the patch fixes. (Tomi) Cc: Ville Syrjälä Cc: Tomi Valkeinen Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Fixes: 41ab92d35ccd ("drm: Make passing of format info to drm_helper_mode_fill_fb_struct() mandatory") Reported-by: Mark Brown Closes: https://lore.kernel.org/all/98b3a62c-91ff-4f91-a58b-e1265f84180b@sirena.org.uk Link: https://lore.kernel.org/all/20250701090722.13645-1-ville.syrjala@linux.intel.com Tested-by: Mark Brown Tested-by: Linux Kernel Functional Testing Acked-by: Alex Deucher Reviewed-by: Tomi Valkeinen Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250805175752.690504-2-imre.deak@intel.com --- drivers/gpu/drm/omapdrm/omap_fb.c | 23 ++++++++++------------- drivers/gpu/drm/omapdrm/omap_fb.h | 2 ++ drivers/gpu/drm/omapdrm/omap_fbdev.c | 5 ++++- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 30c81e2e5d6b..bb3105556f19 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -351,7 +351,7 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, } } - fb = omap_framebuffer_init(dev, mode_cmd, bos); + fb = omap_framebuffer_init(dev, info, mode_cmd, bos); if (IS_ERR(fb)) goto error; @@ -365,9 +365,9 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, } struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos) { - const struct drm_format_info *format = NULL; struct omap_framebuffer *omap_fb = NULL; struct drm_framebuffer *fb = NULL; unsigned int pitch = mode_cmd->pitches[0]; @@ -377,15 +377,12 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, dev, mode_cmd, mode_cmd->width, mode_cmd->height, (char *)&mode_cmd->pixel_format); - format = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - for (i = 0; i < ARRAY_SIZE(formats); i++) { if (formats[i] == mode_cmd->pixel_format) break; } - if (!format || i == ARRAY_SIZE(formats)) { + if (i == ARRAY_SIZE(formats)) { dev_dbg(dev->dev, "unsupported pixel format: %4.4s\n", (char *)&mode_cmd->pixel_format); ret = -EINVAL; @@ -399,7 +396,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, } fb = &omap_fb->base; - omap_fb->format = format; + omap_fb->format = info; mutex_init(&omap_fb->lock); /* @@ -407,23 +404,23 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, * that the two planes of multiplane formats need the same number of * bytes per pixel. */ - if (format->num_planes == 2 && pitch != mode_cmd->pitches[1]) { + if (info->num_planes == 2 && pitch != mode_cmd->pitches[1]) { dev_dbg(dev->dev, "pitches differ between planes 0 and 1\n"); ret = -EINVAL; goto fail; } - if (pitch % format->cpp[0]) { + if (pitch % info->cpp[0]) { dev_dbg(dev->dev, "buffer pitch (%u bytes) is not a multiple of pixel size (%u bytes)\n", - pitch, format->cpp[0]); + pitch, info->cpp[0]); ret = -EINVAL; goto fail; } - for (i = 0; i < format->num_planes; i++) { + for (i = 0; i < info->num_planes; i++) { struct plane *plane = &omap_fb->planes[i]; - unsigned int vsub = i == 0 ? 1 : format->vsub; + unsigned int vsub = i == 0 ? 1 : info->vsub; unsigned int size; size = pitch * mode_cmd->height / vsub; @@ -440,7 +437,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, plane->dma_addr = 0; } - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &omap_framebuffer_funcs); if (ret) { diff --git a/drivers/gpu/drm/omapdrm/omap_fb.h b/drivers/gpu/drm/omapdrm/omap_fb.h index 0873f953cf1d..e6010302a22b 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.h +++ b/drivers/gpu/drm/omapdrm/omap_fb.h @@ -13,6 +13,7 @@ struct drm_connector; struct drm_device; struct drm_file; struct drm_framebuffer; +struct drm_format_info; struct drm_gem_object; struct drm_mode_fb_cmd2; struct drm_plane_state; @@ -23,6 +24,7 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, struct drm_file *file, const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); int omap_framebuffer_pin(struct drm_framebuffer *fb); void omap_framebuffer_unpin(struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index 7b6396890681..948af7ec1130 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -197,7 +197,10 @@ int omap_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, goto fail; } - fb = omap_framebuffer_init(dev, &mode_cmd, &bo); + fb = omap_framebuffer_init(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, &bo); if (IS_ERR(fb)) { dev_err(dev->dev, "failed to allocate fb\n"); /* note: if fb creation failed, we can't rely on fb destroy From d2b524c9064301471e8ffe4ffd85ab8870966aa4 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 5 Aug 2025 20:57:51 +0300 Subject: [PATCH 0042/1307] drm/nouveau: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. The patch is based on the driver parts of the patchset at Link: below, which missed converting the nouveau driver. Due to the absence of this change in the patchset at Link:, after the Fixed: commit below, nouveau_framebuffer_new() -> drm_helper_mode_fill_fb_struct() set drm_framebuffer::format incorrectly to NULL, which lead to the !fb->format WARN() in drm_framebuffer_init() and causing framebuffer creation to fail. This patch fixes both of these issues. v2: Amend the commit log mentioning the functional issues the patch fixes. (Tomi) Cc: Ville Syrjälä Cc: Lyude Paul Cc: Danilo Krummrich Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Tomi Valkeinen Cc: nouveau@lists.freedesktop.org Fixes: 41ab92d35ccd ("drm: Make passing of format info to drm_helper_mode_fill_fb_struct() mandatory") Link: https://lore.kernel.org/all/20250701090722.13645-1-ville.syrjala@linux.intel.com Acked-by: Alex Deucher Acked-by: Danilo Krummrich Reviewed-by: James Jones Tested-by: Linux Kernel Functional Testing Tested-by: James Jones Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250805175752.690504-3-imre.deak@intel.com --- drivers/gpu/drm/nouveau/nouveau_display.c | 9 +++------ drivers/gpu/drm/nouveau/nouveau_display.h | 3 +++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index e1e542126310..805d0a87aa54 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -253,6 +253,7 @@ nouveau_check_bl_size(struct nouveau_drm *drm, struct nouveau_bo *nvbo, int nouveau_framebuffer_new(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *gem, struct drm_framebuffer **pfb) @@ -260,7 +261,6 @@ nouveau_framebuffer_new(struct drm_device *dev, struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct drm_framebuffer *fb; - const struct drm_format_info *info; unsigned int height, i; uint32_t tile_mode; uint8_t kind; @@ -295,9 +295,6 @@ nouveau_framebuffer_new(struct drm_device *dev, kind = nvbo->kind; } - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - for (i = 0; i < info->num_planes; i++) { height = drm_format_info_plane_height(info, mode_cmd->height, @@ -321,7 +318,7 @@ nouveau_framebuffer_new(struct drm_device *dev, if (!(fb = *pfb = kzalloc(sizeof(*fb), GFP_KERNEL))) return -ENOMEM; - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); fb->obj[0] = gem; ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs); @@ -344,7 +341,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev, if (!gem) return ERR_PTR(-ENOENT); - ret = nouveau_framebuffer_new(dev, mode_cmd, gem, &fb); + ret = nouveau_framebuffer_new(dev, info, mode_cmd, gem, &fb); if (ret == 0) return fb; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index e45f211501f6..470e0910d484 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -8,8 +8,11 @@ #include +struct drm_format_info; + int nouveau_framebuffer_new(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *gem, struct drm_framebuffer **pfb); From c0a8e4443d768e5c86ddb52a3a744a151e7b72b0 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 5 Aug 2025 20:57:52 +0300 Subject: [PATCH 0043/1307] drm/radeon: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. The patch is based on the driver parts of the patchset at Link: below, which missed converting the radeon driver. Due to the absence of this change in the patchset at Link:, after the Fixed: commit below, radeon_framebuffer_init() -> drm_helper_mode_fill_fb_struct() set drm_framebuffer::format incorrectly to NULL, which lead to the !fb->format WARN() in drm_framebuffer_init() and causing framebuffer creation to fail. This patch fixes both of these issues. v2: Amend the commit log mentioning the functional issues the patch fixes. (Tomi) Cc: Ville Syrjälä Cc: Alex Deucher Cc: Christian König Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: amd-gfx@lists.freedesktop.org Cc: Tomi Valkeinen Fixes: 41ab92d35ccd ("drm: Make passing of format info to drm_helper_mode_fill_fb_struct() mandatory") Link: https://lore.kernel.org/all/20250701090722.13645-1-ville.syrjala@linux.intel.com Acked-by: Alex Deucher Tested-by: Linux Kernel Functional Testing Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250805175752.690504-4-imre.deak@intel.com --- drivers/gpu/drm/radeon/radeon_display.c | 5 +++-- drivers/gpu/drm/radeon/radeon_fbdev.c | 11 ++++++----- drivers/gpu/drm/radeon/radeon_mode.h | 2 ++ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index b4bf5dfeea2d..4dc77c398617 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1297,12 +1297,13 @@ static const struct drm_framebuffer_funcs radeon_fb_funcs = { int radeon_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; fb->obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &radeon_fb_funcs); if (ret) { fb->obj[0] = NULL; @@ -1341,7 +1342,7 @@ radeon_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOMEM); } - ret = radeon_framebuffer_init(dev, fb, mode_cmd, obj); + ret = radeon_framebuffer_init(dev, fb, info, mode_cmd, obj); if (ret) { kfree(fb); drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index e3a481bbee7b..dc81b0c2dbff 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -53,10 +53,10 @@ static void radeon_fbdev_destroy_pinned_object(struct drm_gem_object *gobj) } static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **gobj_p) { - const struct drm_format_info *info; struct radeon_device *rdev = fb_helper->dev->dev_private; struct drm_gem_object *gobj = NULL; struct radeon_bo *rbo = NULL; @@ -67,8 +67,6 @@ static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, int height = mode_cmd->height; u32 cpp; - info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd->pixel_format, - mode_cmd->modifier[0]); cpp = info->cpp[0]; /* need to align pitch with crtc limits */ @@ -206,6 +204,7 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct radeon_device *rdev = fb_helper->dev->dev_private; + const struct drm_format_info *format_info; struct drm_mode_fb_cmd2 mode_cmd = { }; struct fb_info *info; struct drm_gem_object *gobj; @@ -224,7 +223,9 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - ret = radeon_fbdev_create_pinned_object(fb_helper, &mode_cmd, &gobj); + format_info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd.pixel_format, + mode_cmd.modifier[0]); + ret = radeon_fbdev_create_pinned_object(fb_helper, format_info, &mode_cmd, &gobj); if (ret) { DRM_ERROR("failed to create fbcon object %d\n", ret); return ret; @@ -236,7 +237,7 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, ret = -ENOMEM; goto err_radeon_fbdev_destroy_pinned_object; } - ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, format_info, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto err_kfree; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 3102f6c2d055..9e34da2cacef 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -40,6 +40,7 @@ struct drm_fb_helper; struct drm_fb_helper_surface_size; +struct drm_format_info; struct edid; struct drm_edid; @@ -890,6 +891,7 @@ extern void radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on); int radeon_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *rfb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); From a4f8e70d75dd11ab1a01894893e0b03f1d0b61fd Mon Sep 17 00:00:00 2001 From: Tianyu Xu Date: Tue, 5 Aug 2025 09:54:03 +0800 Subject: [PATCH 0044/1307] spi: spi-mem: add spi_mem_adjust_op_freq() in spi_mem_supports_op() The function spi_mem_adjust_op_freq() within spi_mem_exec_op() adjusts the op->max_freq, which will informs the SPI controller of the maximum frequency for each operation. This adjustment is based on combined information from the SPI device and the board's wiring conditions. Similarly, spi_mem_supports_op() will check the capabilities of the SPI controller. It also requires the combined information before it can accurately determine whether the SPI controller supports a given operation. Signed-off-by: Tianyu Xu Reviewed-by: Miquel Raynal Link: https://patch.msgid.link/20250805015403.43928-1-tianyxu@cisco.com Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index dfa8ab1ec80f..a8f14c608d2d 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -265,6 +265,9 @@ static bool spi_mem_internal_supports_op(struct spi_mem *mem, */ bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op) { + /* Make sure the operation frequency is correct before going futher */ + spi_mem_adjust_op_freq(mem, (struct spi_mem_op *)op); + if (spi_mem_check_op(op)) return false; From bee47cb026e762841f3faece47b51f985e215edb Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 29 Jul 2025 12:40:20 -0400 Subject: [PATCH 0045/1307] sunrpc: fix handling of server side tls alerts Scott Mayhew discovered a security exploit in NFS over TLS in tls_alert_recv() due to its assumption it can read data from the msg iterator's kvec.. kTLS implementation splits TLS non-data record payload between the control message buffer (which includes the type such as TLS aler or TLS cipher change) and the rest of the payload (say TLS alert's level/description) which goes into the msg payload buffer. This patch proposes to rework how control messages are setup and used by sock_recvmsg(). If no control message structure is setup, kTLS layer will read and process TLS data record types. As soon as it encounters a TLS control message, it would return an error. At that point, NFS can setup a kvec backed msg buffer and read in the control message such as a TLS alert. Msg iterator can advance the kvec pointer as a part of the copy process thus we need to revert the iterator before calling into the tls_alert_recv. Reported-by: Scott Mayhew Fixes: 5e052dda121e ("SUNRPC: Recognize control messages in server-side TCP socket code") Suggested-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 46c156b121db..e2c5e0e626f9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -257,20 +257,47 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; - struct socket *sock = svsk->sk_sock; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; int ret; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN); + } + return ret; +} + +static int +svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg) +{ + int ret; + struct socket *sock = svsk->sk_sock; + ret = sock_recvmsg(sock, msg, MSG_DONTWAIT); - if (unlikely(msg->msg_controllen != sizeof(u))) - ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret); + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags); + } return ret; } @@ -321,7 +348,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -1018,7 +1045,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; From 7b306dfa326f70114312b320d083b21fa9481e1e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 29 Jul 2025 13:41:29 -0500 Subject: [PATCH 0046/1307] x86/sev: Evict cache lines during SNP memory validation An SNP cache coherency vulnerability requires a cache line eviction mitigation when validating memory after a page state change to private. The specific mitigation is to touch the first and last byte of each 4K page that is being validated. There is no need to perform the mitigation when performing a page state change to shared and rescinding validation. CPUID bit Fn8000001F_EBX[31] defines the COHERENCY_SFW_NO CPUID bit that, when set, indicates that the software mitigation for this vulnerability is not needed. Implement the mitigation and invoke it when validating memory (making it private) and the COHERENCY_SFW_NO bit is not set, indicating the SNP guest is vulnerable. Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Acked-by: Thomas Gleixner --- arch/x86/boot/cpuflags.c | 13 +++++++++++++ arch/x86/boot/startup/sev-shared.c | 7 +++++++ arch/x86/coco/sev/core.c | 21 +++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/sev.h | 19 +++++++++++++++++++ arch/x86/kernel/cpu/scattered.c | 1 + 6 files changed, 62 insertions(+) diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index 916bac09b464..63e037e94e4c 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -106,5 +106,18 @@ void get_cpuflags(void) cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], &cpu.flags[1]); } + + if (max_amd_level >= 0x8000001f) { + u32 ebx; + + /* + * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in + * the virtualization flags entry (word 8) and set by + * scattered.c, so the bit needs to be explicitly set. + */ + cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored); + if (ebx & BIT(31)) + set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags); + } } } diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 7a706db87b93..ac7dfd21ddd4 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -810,6 +810,13 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, if (ret) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); } + + /* + * If validating memory (making it private) and affected by the + * cache-coherency vulnerability, perform the cache eviction mitigation. + */ + if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO)) + sev_evict_cache((void *)vaddr, 1); } /* diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index fc59ce78c477..400a6ab75d45 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -358,10 +358,31 @@ static void svsm_pval_pages(struct snp_psc_desc *desc) static void pvalidate_pages(struct snp_psc_desc *desc) { + struct psc_entry *e; + unsigned int i; + if (snp_vmpl) svsm_pval_pages(desc); else pval_pages(desc); + + /* + * If not affected by the cache-coherency vulnerability there is no need + * to perform the cache eviction mitigation. + */ + if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO)) + return; + + for (i = 0; i <= desc->hdr.end_entry; i++) { + e = &desc->entries[i]; + + /* + * If validating memory (making it private) perform the cache + * eviction mitigation. + */ + if (e->operation == SNP_PAGE_STATE_PRIVATE) + sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1); + } } static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 602957dd2609..06fc0479a23f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 89075ff19afa..02236962fdb1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -619,6 +619,24 @@ int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); void snp_fixup_e820_tables(void); + +static inline void sev_evict_cache(void *va, int npages) +{ + volatile u8 val __always_unused; + u8 *bytes = va; + int page_idx; + + /* + * For SEV guests, a read from the first/last cache-lines of a 4K page + * using the guest key is sufficient to cause a flush of all cache-lines + * associated with that 4K page without incurring all the overhead of a + * full CLFLUSH sequence. + */ + for (page_idx = 0; page_idx < npages; page_idx++) { + val = bytes[page_idx * PAGE_SIZE]; + val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; + } +} #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_rmptable_init(void) { return -ENOSYS; } @@ -634,6 +652,7 @@ static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } static inline void snp_fixup_e820_tables(void) {} +static inline void sev_evict_cache(void *va, int npages) {} #endif #endif diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b4a1f6732a3a..6b868afb26c3 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -48,6 +48,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, + { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, From 234d1eff5d4987024be9d40ac07b918a5ae8db1a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:25 +0200 Subject: [PATCH 0047/1307] xfrm: restore GSO for SW crypto Commit 49431af6c4ef incorrectly assumes that the GSO path is only used by HW offload, but it's also useful for SW crypto. This patch re-enables GSO for SW crypto. It's not an exact revert to preserve the other changes made to xfrm_dev_offload_ok afterwards, but it reverts all of its effects. Fixes: 49431af6c4ef ("xfrm: rely on XFRM offload") Signed-off-by: Sabrina Dubroca Reviewed-by: Leon Romanovsky Reviewed-by: Zhu Yanjun Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index d2819baea414..1f88472aaac0 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -415,10 +415,12 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) struct net_device *dev = x->xso.dev; bool check_tunnel_size; - if (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED) + if (!x->type_offload || + (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED && x->encap)) return false; - if ((dev == xfrm_dst_path(dst)->dev) && !xdst->child->xfrm) { + if ((!dev || dev == xfrm_dst_path(dst)->dev) && + !xdst->child->xfrm) { mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; @@ -430,6 +432,9 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) return false; ok: + if (!dev) + return true; + check_tunnel_size = x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->props.mode == XFRM_MODE_TUNNEL; switch (x->props.family) { From 65f079a6c446a939eefe71e6d5957d5d6365fcf9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:26 +0200 Subject: [PATCH 0048/1307] xfrm: bring back device check in validate_xmit_xfrm This is partial revert of commit d53dda291bbd993a29b84d358d282076e3d01506. This change causes traffic using GSO with SW crypto running through a NIC capable of HW offload to no longer get segmented during validate_xmit_xfrm, and is unrelated to the bonding use case mentioned in the commit. Fixes: d53dda291bbd ("xfrm: Remove unneeded device check from validate_xmit_xfrm") Signed-off-by: Sabrina Dubroca Reviewed-by: Cosmin Ratiu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 1f88472aaac0..c7a1f080d2de 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -155,7 +155,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb) && unlikely(xmit_xfrm_check_overflow(skb))) { + if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || + unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ From 1118aaa3b35157777890fffab91d8c1da841b20b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:27 +0200 Subject: [PATCH 0049/1307] udp: also consider secpath when evaluating ipsec use for checksumming Commit b40c5f4fde22 ("udp: disable inner UDP checksum offloads in IPsec case") tried to fix checksumming in UFO when the packets are going through IPsec, so that we can't rely on offloads because the UDP header and payload will be encrypted. But when doing a TCP test over VXLAN going through IPsec transport mode with GSO enabled (esp4_offload module loaded), I'm seeing broken UDP checksums on the encap after successful decryption. The skbs get to udp4_ufo_fragment/__skb_udp_tunnel_segment via __dev_queue_xmit -> validate_xmit_skb -> skb_gso_segment and at this point we've already dropped the dst (unless the device sets IFF_XMIT_DST_RELEASE, which is not common), so need_ipsec is false and we proceed with checksum offload. Make need_ipsec also check the secpath, which is not dropped on this callpath. Fixes: b40c5f4fde22 ("udp: disable inner UDP checksum offloads in IPsec case") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv4/udp_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 5128e2a5b00a..b1f3fd302e9d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -217,7 +217,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; - need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); + need_ipsec = (skb_dst(skb) && dst_xfrm(skb_dst(skb))) || skb_sec_path(skb); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && !need_ipsec && From f752adfaf5f7d796007f9c1a867b9bdccc15cc2c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 25 Jul 2025 14:31:34 +0200 Subject: [PATCH 0050/1307] MAINTAINERS: resurrect my netfilter maintainer entry This reverts commit b5048d27872a9734d142540ea23c3e897e47e05c. Its been more than a year, hope my motivation lasts a bit longer than last time :-) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b968bc6959d1..cd9415702b28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17313,6 +17313,7 @@ F: drivers/net/ethernet/neterion/ NETFILTER M: Pablo Neira Ayuso M: Jozsef Kadlecsik +M: Florian Westphal L: netfilter-devel@vger.kernel.org L: coreteam@netfilter.org S: Maintained From 25a8b88f000c33a1d580c317e93e40b953dc2fa5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Jul 2025 23:45:32 +0200 Subject: [PATCH 0051/1307] netfilter: add back NETFILTER_XTABLES dependencies Some Kconfig symbols were changed to depend on the 'bool' symbol NETFILTER_XTABLES_LEGACY, which means they can now be set to built-in when the xtables code itself is in a loadable module: x86_64-linux-ld: vmlinux.o: in function `arpt_unregister_table_pre_exit': (.text+0x1831987): undefined reference to `xt_find_table' x86_64-linux-ld: vmlinux.o: in function `get_info.constprop.0': arp_tables.c:(.text+0x1831aab): undefined reference to `xt_request_find_table_lock' x86_64-linux-ld: arp_tables.c:(.text+0x1831bea): undefined reference to `xt_table_unlock' x86_64-linux-ld: vmlinux.o: in function `do_arpt_get_ctl': arp_tables.c:(.text+0x183205d): undefined reference to `xt_find_table_lock' x86_64-linux-ld: arp_tables.c:(.text+0x18320c1): undefined reference to `xt_table_unlock' x86_64-linux-ld: arp_tables.c:(.text+0x183219a): undefined reference to `xt_recseq' Change these to depend on both NETFILTER_XTABLES and NETFILTER_XTABLES_LEGACY. Fixes: 9fce66583f06 ("netfilter: Exclude LEGACY TABLES on PREEMPT_RT.") Signed-off-by: Arnd Bergmann Acked-by: Florian Westphal Tested-by: Breno Leitao Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/Kconfig | 1 + net/ipv4/netfilter/Kconfig | 3 +++ net/ipv6/netfilter/Kconfig | 1 + 3 files changed, 5 insertions(+) diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 60f28e4fb5c0..4fd5a6ea26b4 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -43,6 +43,7 @@ config NF_CONNTRACK_BRIDGE config BRIDGE_NF_EBTABLES_LEGACY tristate "Legacy EBTABLES support" depends on BRIDGE && NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default n help Legacy ebtables packet/frame classifier. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2c438b140e88..7dc9772fe2d8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -14,6 +14,7 @@ config NF_DEFRAG_IPV4 config IP_NF_IPTABLES_LEGACY tristate "Legacy IP tables support" depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default m if NETFILTER_XTABLES_LEGACY help iptables is a legacy packet classifier. @@ -326,6 +327,7 @@ endif # IP_NF_IPTABLES config IP_NF_ARPTABLES tristate "Legacy ARPTABLES support" depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default n help arptables is a legacy packet classifier. @@ -343,6 +345,7 @@ config IP_NF_ARPFILTER select IP_NF_ARPTABLES select NETFILTER_FAMILY_ARP depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 276860f65baa..81daf82ddc2d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -10,6 +10,7 @@ menu "IPv6: Netfilter Configuration" config IP6_NF_IPTABLES_LEGACY tristate "Legacy IP6 tables support" depends on INET && IPV6 && NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default m if NETFILTER_XTABLES_LEGACY help ip6tables is a legacy packet classifier. From de788b2e6227462b6dcd0e07474e72c089008f74 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Aug 2025 17:25:08 +0200 Subject: [PATCH 0052/1307] netfilter: ctnetlink: fix refcount leak on table dump There is a reference count leak in ctnetlink_dump_table(): if (res < 0) { nf_conntrack_get(&ct->ct_general); // HERE cb->args[1] = (unsigned long)ct; ... While its very unlikely, its possible that ct == last. If this happens, then the refcount of ct was already incremented. This 2nd increment is never undone. This prevents the conntrack object from being released, which in turn keeps prevents cnet->count from dropping back to 0. This will then block the netns dismantle (or conntrack rmmod) as nf_conntrack_cleanup_net_list() will wait forever. This can be reproduced by running conntrack_resize.sh selftest in a loop. It takes ~20 minutes for me on a preemptible kernel on average before I see a runaway kworker spinning in nf_conntrack_cleanup_net_list. One fix would to change this to: if (res < 0) { if (ct != last) nf_conntrack_get(&ct->ct_general); But this reference counting isn't needed in the first place. We can just store a cookie value instead. A followup patch will do the same for ctnetlink_exp_dump_table, it looks to me as if this has the same problem and like ctnetlink_dump_table, we only need a 'skip hint', not the actual object so we can apply the same cookie strategy there as well. Fixes: d205dc40798d ("[NETFILTER]: ctnetlink: fix deadlock in table dumping") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 486d52b45fe5..f403acd82437 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -884,8 +884,6 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) static int ctnetlink_done(struct netlink_callback *cb) { - if (cb->args[1]) - nf_ct_put((struct nf_conn *)cb->args[1]); kfree(cb->data); return 0; } @@ -1208,19 +1206,26 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) return 0; } +static unsigned long ctnetlink_get_id(const struct nf_conn *ct) +{ + unsigned long id = nf_ct_get_id(ct); + + return id ? id : 1; +} + static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0; struct net *net = sock_net(skb->sk); - struct nf_conn *ct, *last; + unsigned long last_id = cb->args[1]; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *nf_ct_evict[8]; + struct nf_conn *ct; int res, i; spinlock_t *lockp; - last = (struct nf_conn *)cb->args[1]; i = 0; local_bh_disable(); @@ -1257,7 +1262,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) continue; if (cb->args[1]) { - if (ct != last) + if (ctnetlink_get_id(ct) != last_id) continue; cb->args[1] = 0; } @@ -1270,8 +1275,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, true, flags); if (res < 0) { - nf_conntrack_get(&ct->ct_general); - cb->args[1] = (unsigned long)ct; + cb->args[1] = ctnetlink_get_id(ct); spin_unlock(lockp); goto out; } @@ -1284,12 +1288,10 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } out: local_bh_enable(); - if (last) { + if (last_id) { /* nf ct hash resize happened, now clear the leftover. */ - if ((struct nf_conn *)cb->args[1] == last) + if (cb->args[1] == last_id) cb->args[1] = 0; - - nf_ct_put(last); } while (i) { From 1492e3dcb2be3aa46d1963da96aa9593e4e4db5a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Aug 2025 17:25:09 +0200 Subject: [PATCH 0053/1307] netfilter: ctnetlink: remove refcounting in expectation dumpers Same pattern as previous patch: do not keep the expectation object alive via refcount, only store a cookie value and then use that as the skip hint for dump resumption. AFAICS this has the same issue as the one resolved in the conntrack dumper, when we do if (!refcount_inc_not_zero(&exp->use)) to increment the refcount, there is a chance that exp == last, which causes a double-increment of the refcount and subsequent memory leak. Fixes: cf6994c2b981 ("[NETFILTER]: nf_conntrack_netlink: sync expectation dumping with conntrack table dumping") Fixes: e844a928431f ("netfilter: ctnetlink: allow to dump expectation per master conntrack") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 41 ++++++++++++---------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f403acd82437..50fd6809380f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3170,23 +3170,27 @@ ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) return 0; } #endif -static int ctnetlink_exp_done(struct netlink_callback *cb) + +static unsigned long ctnetlink_exp_id(const struct nf_conntrack_expect *exp) { - if (cb->args[1]) - nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]); - return 0; + unsigned long id = (unsigned long)exp; + + id += nf_ct_get_id(exp->master); + id += exp->class; + + return id ? id : 1; } static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; + unsigned long last_id = cb->args[1]; + struct nf_conntrack_expect *exp; rcu_read_lock(); - last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]], @@ -3198,7 +3202,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) continue; if (cb->args[1]) { - if (exp != last) + if (ctnetlink_exp_id(exp) != last_id) continue; cb->args[1] = 0; } @@ -3207,9 +3211,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!refcount_inc_not_zero(&exp->use)) - continue; - cb->args[1] = (unsigned long)exp; + cb->args[1] = ctnetlink_exp_id(exp); goto out; } } @@ -3220,32 +3222,30 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } out: rcu_read_unlock(); - if (last) - nf_ct_expect_put(last); - return skb->len; } static int ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nf_conn *ct = cb->data; struct nf_conn_help *help = nfct_help(ct); u_int8_t l3proto = nfmsg->nfgen_family; + unsigned long last_id = cb->args[1]; + struct nf_conntrack_expect *exp; if (cb->args[0]) return 0; rcu_read_lock(); - last = (struct nf_conntrack_expect *)cb->args[1]; + restart: hlist_for_each_entry_rcu(exp, &help->expectations, lnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; if (cb->args[1]) { - if (exp != last) + if (ctnetlink_exp_id(exp) != last_id) continue; cb->args[1] = 0; } @@ -3253,9 +3253,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!refcount_inc_not_zero(&exp->use)) - continue; - cb->args[1] = (unsigned long)exp; + cb->args[1] = ctnetlink_exp_id(exp); goto out; } } @@ -3266,9 +3264,6 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = 1; out: rcu_read_unlock(); - if (last) - nf_ct_expect_put(last); - return skb->len; } @@ -3287,7 +3282,6 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, - .done = ctnetlink_exp_done, }; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, @@ -3337,7 +3331,6 @@ static int ctnetlink_get_expect(struct sk_buff *skb, else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, - .done = ctnetlink_exp_done, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } From c8a7c2c608180f3b4e51dc958b3861242dcdd76d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Aug 2025 12:10:41 +0200 Subject: [PATCH 0054/1307] netfilter: nft_set_pipapo: don't return bogus extension pointer Dan Carpenter says: Commit 17a20e09f086 ("netfilter: nft_set: remove one argument from lookup and update functions") [..] leads to the following Smatch static checker warning: net/netfilter/nft_set_pipapo_avx2.c:1269 nft_pipapo_avx2_lookup() error: uninitialized symbol 'ext'. Fix this by initing ext to NULL and set it only once we've found a match. Fixes: 17a20e09f086 ("netfilter: nft_set: remove one argument from lookup and update functions") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/netfilter-devel/aJBzc3V5wk-yPOnH@stanley.mountain/ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo_avx2.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index db5d367e43c4..2f090e253caf 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1150,12 +1150,12 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_pipapo *priv = nft_set_priv(set); + const struct nft_set_ext *ext = NULL; struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; - const struct nft_set_ext *ext; unsigned long *res, *fill; bool map_index; int i; @@ -1246,13 +1246,13 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, goto out; if (last) { - ext = &f->mt[ret].e->ext; - if (unlikely(nft_set_elem_expired(ext) || - !nft_set_elem_active(ext, genmask))) { - ext = NULL; - goto next_match; - } + const struct nft_set_ext *e = &f->mt[ret].e->ext; + if (unlikely(nft_set_elem_expired(e) || + !nft_set_elem_active(e, genmask))) + goto next_match; + + ext = e; goto out; } From f54186df806fb1e9cb262d553f4ff942f9467cf1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Aug 2025 13:35:15 +0300 Subject: [PATCH 0055/1307] netfilter: conntrack: clean up returns in nf_conntrack_log_invalid_sysctl() Smatch complains that these look like error paths with missing error codes, especially the one where we return if nf_log_is_registered() is true: net/netfilter/nf_conntrack_standalone.c:575 nf_conntrack_log_invalid_sysctl() warn: missing error code? 'ret' In fact, all these return zero deliberately. Change them to return a literal instead which helps readability as well as silencing the warning. Fixes: e89a68046687 ("netfilter: load nf_log_syslog on enabling nf_conntrack_log_invalid") Signed-off-by: Dan Carpenter Acked-by: Lance Yang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9b8b10a85233..1f14ef0436c6 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -567,16 +567,16 @@ nf_conntrack_log_invalid_sysctl(const struct ctl_table *table, int write, return ret; if (*(u8 *)table->data == 0) - return ret; + return 0; /* Load nf_log_syslog only if no logger is currently registered */ for (i = 0; i < NFPROTO_NUMPROTO; i++) { if (nf_log_is_registered(i)) - return ret; + return 0; } request_module("%s", "nf_log_syslog"); - return ret; + return 0; } static struct ctl_table_header *nf_ct_netfilter_header; From 1dee968d22eaeb3eede70df513ab3f8dd1712e3e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Aug 2025 12:02:42 +0200 Subject: [PATCH 0056/1307] netfilter: nft_socket: remove WARN_ON_ONCE with huge level value syzbot managed to reach this WARN_ON_ONCE by passing a huge level value, remove it. WARNING: CPU: 0 PID: 5853 at net/netfilter/nft_socket.c:220 nft_socket_init+0x2f4/0x3d0 net/netfilter/nft_socket.c:220 Reported-by: syzbot+a225fea35d7baf8dbdc3@syzkaller.appspotmail.com Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 35d0409b0095..36affbb697c2 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -217,7 +217,7 @@ static int nft_socket_init(const struct nft_ctx *ctx, level += err; /* Implies a giant cgroup tree */ - if (WARN_ON_ONCE(level > 255)) + if (level > 255) return -EOPNOTSUPP; priv->level = level; From af357a6a3b7d685e7aa621c6fb1d4ed6c349ec9e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 7 Aug 2025 12:07:42 +0200 Subject: [PATCH 0057/1307] spi: spi-fsl-lpspi: Clamp too high speed_hz Currently the driver is not able to handle the case that a SPI device specifies a higher spi-max-frequency than half of per-clk: per-clk should be at least two times of transfer speed Fix this by clamping to the max possible value and use the minimum SCK period of 2 cycles. Fixes: 77736a98b859 ("spi: lpspi: add the error info of transfer speed setting") Signed-off-by: Stefan Wahren Link: https://patch.msgid.link/20250807100742.9917-1-wahrenst@gmx.net Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 67d4000c3cef..313e444a34f3 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -330,13 +330,11 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) } if (config.speed_hz > perclk_rate / 2) { - dev_err(fsl_lpspi->dev, - "per-clk should be at least two times of transfer speed"); - return -EINVAL; + div = 2; + } else { + div = DIV_ROUND_UP(perclk_rate, config.speed_hz); } - div = DIV_ROUND_UP(perclk_rate, config.speed_hz); - for (prescale = 0; prescale <= prescale_max; prescale++) { scldiv = div / (1 << prescale) - 2; if (scldiv >= 0 && scldiv < 256) { From 13d0fe84a214658254a7412b2b46ec1507dc51f0 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Tue, 5 Aug 2025 18:05:42 +0200 Subject: [PATCH 0058/1307] spi: spi-qpic-snand: fix calculating of ECC OOB regions' properties The OOB layout used by the driver has two distinct regions which contains hardware specific ECC data, yet the qcom_spi_ooblayout_ecc() function sets the same offset and length values for both regions which is clearly wrong. Change the code to calculate the correct values for both regions. For reference, the following table shows the computed offset and length values for various OOB size/ECC strength configurations: +-----------------+-----------------+ |before the change| after the change| +-------+----------+--------+--------+--------+--------+--------+ | OOB | ECC | region | region | region | region | region | | size | strength | index | offset | length | offset | length | +-------+----------+--------+--------+--------+--------+--------+ | 128 | 8 | 0 | 113 | 15 | 0 | 49 | | | | 1 | 113 | 15 | 65 | 63 | +-------+----------+--------+--------+--------+--------+--------+ | 128 | 4 | 0 | 117 | 11 | 0 | 37 | | | | 1 | 117 | 11 | 53 | 75 | +-------+----------+--------+--------+--------+--------+--------+ | 64 | 4 | 0 | 53 | 11 | 0 | 37 | | | | 1 | 53 | 11 | 53 | 11 | +-------+----------+--------+--------+--------+--------+--------+ Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20250805-qpic-snand-oob-ecc-fix-v2-1-e6f811c70d6f@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 5216d60e01aa..7b76d2c82a52 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -210,13 +210,21 @@ static int qcom_spi_ooblayout_ecc(struct mtd_info *mtd, int section, struct qcom_nand_controller *snandc = nand_to_qcom_snand(nand); struct qpic_ecc *qecc = snandc->qspi->ecc; - if (section > 1) - return -ERANGE; + switch (section) { + case 0: + oobregion->offset = 0; + oobregion->length = qecc->bytes * (qecc->steps - 1) + + qecc->bbm_size; + return 0; + case 1: + oobregion->offset = qecc->bytes * (qecc->steps - 1) + + qecc->bbm_size + + qecc->steps * 4; + oobregion->length = mtd->oobsize - oobregion->offset; + return 0; + } - oobregion->length = qecc->ecc_bytes_hw + qecc->spare_bytes; - oobregion->offset = mtd->oobsize - oobregion->length; - - return 0; + return -ERANGE; } static int qcom_spi_ooblayout_free(struct mtd_info *mtd, int section, From ad580dfa388fabb52af033e3f8cc5d04be985e54 Mon Sep 17 00:00:00 2001 From: Leo Martins Date: Mon, 21 Jul 2025 10:49:16 -0700 Subject: [PATCH 0059/1307] btrfs: fix subpage deadlock in try_release_subpage_extent_buffer() There is a potential deadlock that can happen in try_release_subpage_extent_buffer() because the irq-safe xarray spin lock fs_info->buffer_tree is being acquired before the irq-unsafe eb->refs_lock. This leads to the potential race: // T1 (random eb->refs user) // T2 (release folio) spin_lock(&eb->refs_lock); // interrupt end_bbio_meta_write() btrfs_meta_folio_clear_writeback() btree_release_folio() folio_test_writeback() //false try_release_extent_buffer() try_release_subpage_extent_buffer() xa_lock_irq(&fs_info->buffer_tree) spin_lock(&eb->refs_lock); // blocked; held by T1 buffer_tree_clear_mark() xas_lock_irqsave() // blocked; held by T2 I believe that the spin lock can safely be replaced by an rcu_read_lock. The xa_for_each loop does not need the spin lock as it's already internally protected by the rcu_read_lock. The extent buffer is also protected by the rcu_read_lock so it won't be freed before we take the eb->refs_lock and check the ref count. The rcu_read_lock is taken and released every iteration, just like the spin lock, which means we're not protected against concurrent insertions into the xarray. This is fine because we rely on folio->private to detect if there are any ebs remaining in the folio. There is already some precedent for this with find_extent_buffer_nolock, which loads an extent buffer from the xarray with only rcu_read_lock. lockdep warning: ===================================================== WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected 6.16.0-0_fbk701_debug_rc0_123_g4c06e63b9203 #1 Tainted: G E N ----------------------------------------------------- kswapd0/66 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: ffff000011ffd600 (&eb->refs_lock){+.+.}-{3:3}, at: try_release_extent_buffer+0x18c/0x560 and this task is already holding: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560 which would create a new lock dependency: (&buffer_xa_class){-.-.}-{3:3} -> (&eb->refs_lock){+.+.}-{3:3} but this new dependency connects a HARDIRQ-irq-safe lock: (&buffer_xa_class){-.-.}-{3:3} ... which became HARDIRQ-irq-safe at: lock_acquire+0x178/0x358 _raw_spin_lock_irqsave+0x60/0x88 buffer_tree_clear_mark+0xc4/0x160 end_bbio_meta_write+0x238/0x398 btrfs_bio_end_io+0x1f8/0x330 btrfs_orig_write_end_io+0x1c4/0x2c0 bio_endio+0x63c/0x678 blk_update_request+0x1c4/0xa00 blk_mq_end_request+0x54/0x88 virtblk_request_done+0x124/0x1d0 blk_mq_complete_request+0x84/0xa0 virtblk_done+0x130/0x238 vring_interrupt+0x130/0x288 __handle_irq_event_percpu+0x1e8/0x708 handle_irq_event+0x98/0x1b0 handle_fasteoi_irq+0x264/0x7c0 generic_handle_domain_irq+0xa4/0x108 gic_handle_irq+0x7c/0x1a0 do_interrupt_handler+0xe4/0x148 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x14/0x20 el1h_64_irq+0x6c/0x70 _raw_spin_unlock_irq+0x38/0x70 __run_timer_base+0xdc/0x5e0 run_timer_softirq+0xa0/0x138 handle_softirqs.llvm.13542289750107964195+0x32c/0xbd0 ____do_softirq.llvm.17674514681856217165+0x18/0x28 call_on_irq_stack+0x24/0x30 __irq_exit_rcu+0x164/0x430 irq_exit_rcu+0x18/0x88 el1_interrupt+0x34/0x50 el1h_64_irq_handler+0x14/0x20 el1h_64_irq+0x6c/0x70 arch_local_irq_enable+0x4/0x8 do_idle+0x1a0/0x3b8 cpu_startup_entry+0x60/0x80 rest_init+0x204/0x228 start_kernel+0x394/0x3f0 __primary_switched+0x8c/0x8958 to a HARDIRQ-irq-unsafe lock: (&eb->refs_lock){+.+.}-{3:3} ... which became HARDIRQ-irq-unsafe at: ... lock_acquire+0x178/0x358 _raw_spin_lock+0x4c/0x68 free_extent_buffer_stale+0x2c/0x170 btrfs_read_sys_array+0x1b0/0x338 open_ctree+0xeb0/0x1df8 btrfs_get_tree+0xb60/0x1110 vfs_get_tree+0x8c/0x250 fc_mount+0x20/0x98 btrfs_get_tree+0x4a4/0x1110 vfs_get_tree+0x8c/0x250 do_new_mount+0x1e0/0x6c0 path_mount+0x4ec/0xa58 __arm64_sys_mount+0x370/0x490 invoke_syscall+0x6c/0x208 el0_svc_common+0x14c/0x1b8 do_el0_svc+0x4c/0x60 el0_svc+0x4c/0x160 el0t_64_sync_handler+0x70/0x100 el0t_64_sync+0x168/0x170 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&eb->refs_lock); local_irq_disable(); lock(&buffer_xa_class); lock(&eb->refs_lock); lock(&buffer_xa_class); *** DEADLOCK *** 2 locks held by kswapd0/66: #0: ffff800085506e40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0xe8/0xe50 #1: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560 Link: https://www.kernel.org/doc/Documentation/locking/lockdep-design.rst#:~:text=Multi%2Dlock%20dependency%20rules%3A Fixes: 19d7f65f032f ("btrfs: convert the buffer_radix to an xarray") CC: stable@vger.kernel.org # 6.16+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Leo Martins Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 835b0deef9bb..f23d75986947 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4331,15 +4331,18 @@ static int try_release_subpage_extent_buffer(struct folio *folio) unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1; int ret; - xa_lock_irq(&fs_info->buffer_tree); + rcu_read_lock(); xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) { /* * The same as try_release_extent_buffer(), to ensure the eb * won't disappear out from under us. */ spin_lock(&eb->refs_lock); + rcu_read_unlock(); + if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { spin_unlock(&eb->refs_lock); + rcu_read_lock(); continue; } @@ -4358,11 +4361,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * check the folio private at the end. And * release_extent_buffer() will release the refs_lock. */ - xa_unlock_irq(&fs_info->buffer_tree); release_extent_buffer(eb); - xa_lock_irq(&fs_info->buffer_tree); + rcu_read_lock(); } - xa_unlock_irq(&fs_info->buffer_tree); + rcu_read_unlock(); /* * Finally to check if we have cleared folio private, as if we have @@ -4375,7 +4377,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio) ret = 0; spin_unlock(&folio->mapping->i_private_lock); return ret; - } int try_release_extent_buffer(struct folio *folio) From 15fc0bec883c95007a4901fe75f247bd0ca21651 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 20 Jul 2025 07:56:48 +0930 Subject: [PATCH 0060/1307] btrfs: make btrfs_cleanup_ordered_extents() support large folios When hitting a large folio, btrfs_cleanup_ordered_extents() will get the same large folio multiple times, and clearing the same range again and again. Thankfully this is not causing anything wrong, just inefficiency. This is caused by the fact that we're iterating folios using the old page index, thus can hit the same large folio again and again. Enhance it by increasing @index to the index of the folio end, and only increase @index by 1 if we failed to grab a folio. Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b77dd22b8cdb..a2de289e662b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -401,10 +401,12 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, while (index <= end_index) { folio = filemap_get_folio(inode->vfs_inode.i_mapping, index); - index++; - if (IS_ERR(folio)) + if (IS_ERR(folio)) { + index++; continue; + } + index = folio_end(folio) >> PAGE_SHIFT; /* * Here we just clear all Ordered bits for every page in the * range, then btrfs_mark_ordered_io_finished() will handle From deaf895212da74635a7f0a420e1ecf8f5eca1fe5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 20 Jul 2025 15:01:39 +0930 Subject: [PATCH 0061/1307] btrfs: fix wrong length parameter for btrfs_cleanup_ordered_extents() Inside nocow_one_range(), if the checksum cloning for data reloc inode failed, we call btrfs_cleanup_ordered_extents() to cleanup the just allocated ordered extents. But unlike extent_clear_unlock_delalloc(), btrfs_cleanup_ordered_extents() requires a length, not an inclusive end bytenr. This can be problematic, as the @end is normally way larger than @len. This means btrfs_cleanup_ordered_extents() can be called on folios out of the correct range, and if the out-of-range folio is under writeback, we can incorrectly clear the ordered flag of the folio, and trigger the DEBUG_WARN() inside btrfs_writepage_cow_fixup(). Fix the wrong parameter with correct length instead. Fixes: 94f6c5c17e52 ("btrfs: move ordered extent cleanup to where they are allocated") CC: stable@vger.kernel.org # 6.15+ Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a2de289e662b..d740910e071a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2015,7 +2015,7 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio * cleaered by the caller. */ if (ret < 0) - btrfs_cleanup_ordered_extents(inode, file_pos, end); + btrfs_cleanup_ordered_extents(inode, file_pos, len); return ret; } From fc5799986fbca957e2e3c0480027f249951b7bcf Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 16 Jul 2025 11:41:21 +0100 Subject: [PATCH 0062/1307] btrfs: error on missing block group when unaccounting log tree extent buffers Currently we only log an error message if we can't find the block group for a log tree extent buffer when unaccounting it (while freeing a log tree). A missing block group means something is seriously wrong and we end up leaking space from the metadata space info. So return -ENOENT in case we don't find the block group. CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2186e87fb61b..69e11557fd13 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2605,14 +2605,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, /* * Correctly adjust the reserved bytes occupied by a log tree extent buffer */ -static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) +static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) { struct btrfs_block_group *cache; cache = btrfs_lookup_block_group(fs_info, start); if (!cache) { btrfs_err(fs_info, "unable to find block group for %llu", start); - return; + return -ENOENT; } spin_lock(&cache->space_info->lock); @@ -2623,27 +2623,22 @@ static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) spin_unlock(&cache->space_info->lock); btrfs_put_block_group(cache); + + return 0; } static int clean_log_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *eb) { - int ret; - btrfs_tree_lock(eb); btrfs_clear_buffer_dirty(trans, eb); wait_on_extent_buffer_writeback(eb); btrfs_tree_unlock(eb); - if (trans) { - ret = btrfs_pin_reserved_extent(trans, eb); - if (ret) - return ret; - } else { - unaccount_log_buffer(eb->fs_info, eb->start); - } + if (trans) + return btrfs_pin_reserved_extent(trans, eb); - return 0; + return unaccount_log_buffer(eb->fs_info, eb->start); } static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, From 4289b494ac553e74e86fed1c66b2bf9530bc1082 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 25 Jul 2025 20:33:25 +0930 Subject: [PATCH 0063/1307] btrfs: do not allow relocation of partially dropped subvolumes [BUG] There is an internal report that balance triggered transaction abort, with the following call trace: item 85 key (594509824 169 0) itemoff 12599 itemsize 33 extent refs 1 gen 197740 flags 2 ref#0: tree block backref root 7 item 86 key (594558976 169 0) itemoff 12566 itemsize 33 extent refs 1 gen 197522 flags 2 ref#0: tree block backref root 7 ... BTRFS error (device loop0): extent item not found for insert, bytenr 594526208 num_bytes 16384 parent 449921024 root_objectid 934 owner 1 offset 0 BTRFS error (device loop0): failed to run delayed ref for logical 594526208 num_bytes 16384 type 182 action 1 ref_mod 1: -117 ------------[ cut here ]------------ BTRFS: Transaction aborted (error -117) WARNING: CPU: 1 PID: 6963 at ../fs/btrfs/extent-tree.c:2168 btrfs_run_delayed_refs+0xfa/0x110 [btrfs] And btrfs check doesn't report anything wrong related to the extent tree. [CAUSE] The cause is a little complex, firstly the extent tree indeed doesn't have the backref for 594526208. The extent tree only have the following two backrefs around that bytenr on-disk: item 65 key (594509824 METADATA_ITEM 0) itemoff 13880 itemsize 33 refs 1 gen 197740 flags TREE_BLOCK tree block skinny level 0 (176 0x7) tree block backref root CSUM_TREE item 66 key (594558976 METADATA_ITEM 0) itemoff 13847 itemsize 33 refs 1 gen 197522 flags TREE_BLOCK tree block skinny level 0 (176 0x7) tree block backref root CSUM_TREE But the such missing backref item is not an corruption on disk, as the offending delayed ref belongs to subvolume 934, and that subvolume is being dropped: item 0 key (934 ROOT_ITEM 198229) itemoff 15844 itemsize 439 generation 198229 root_dirid 256 bytenr 10741039104 byte_limit 0 bytes_used 345571328 last_snapshot 198229 flags 0x1000000000001(RDONLY) refs 0 drop_progress key (206324 EXTENT_DATA 2711650304) drop_level 2 level 2 generation_v2 198229 And that offending tree block 594526208 is inside the dropped range of that subvolume. That explains why there is no backref item for that bytenr and why btrfs check is not reporting anything wrong. But this also shows another problem, as btrfs will do all the orphan subvolume cleanup at a read-write mount. So half-dropped subvolume should not exist after an RW mount, and balance itself is also exclusive to subvolume cleanup, meaning we shouldn't hit a subvolume half-dropped during relocation. The root cause is, there is no orphan item for this subvolume. In fact there are 5 subvolumes from around 2021 that have the same problem. It looks like the original report has some older kernels running, and caused those zombie subvolumes. Thankfully upstream commit 8d488a8c7ba2 ("btrfs: fix subvolume/snapshot deletion not triggered on mount") has long fixed the bug. [ENHANCEMENT] For repairing such old fs, btrfs-progs will be enhanced. Considering how delayed the problem will show up (at run delayed ref time) and at that time we have to abort transaction already, it is too late. Instead here we reject any half-dropped subvolume for reloc tree at the earliest time, preventing confusion and extra time wasted on debugging similar bugs. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e58151933844..7256f6748c8f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -602,6 +602,25 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, if (btrfs_root_id(root) == objectid) { u64 commit_root_gen; + /* + * Relocation will wait for cleaner thread, and any half-dropped + * subvolume will be fully cleaned up at mount time. + * So here we shouldn't hit a subvolume with non-zero drop_progress. + * + * If this isn't the case, error out since it can make us attempt to + * drop references for extents that were already dropped before. + */ + if (unlikely(btrfs_disk_key_objectid(&root->root_item.drop_progress))) { + struct btrfs_key cpu_key; + + btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress); + btrfs_err(fs_info, + "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)", + objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset); + ret = -EUCLEAN; + goto fail; + } + /* called by btrfs_init_reloc_root */ ret = btrfs_copy_root(trans, root, root->commit_root, &eb, BTRFS_TREE_RELOC_OBJECTID); From 3a931e9b39c7ff8066657042f5f00d3b7e6ad315 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:52 +0900 Subject: [PATCH 0064/1307] btrfs: zoned: do not select metadata BG as finish target We call btrfs_zone_finish_one_bg() to zone finish one block group and make room to activate another block group. Currently, we can choose a metadata block group as a target. But, as we reserve an active metadata block group, we no longer want to select a metadata block group. So, skip it in the loop. CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 245e813ecd78..db11b5b5f0e6 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2650,7 +2650,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) spin_lock(&block_group->lock); if (block_group->reserved || block_group->alloc_offset == 0 || - (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || + !(block_group->flags & BTRFS_BLOCK_GROUP_DATA) || test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); continue; From 7b632596188e1973c6b3ac1c9f8252f735e1039f Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 30 Jul 2025 09:29:23 -0700 Subject: [PATCH 0065/1307] btrfs: fix iteration bug in __qgroup_excl_accounting() __qgroup_excl_accounting() uses the qgroup iterator machinery to update the account of one qgroups usage for all its parent hierarchy, when we either add or remove a relation and have only exclusive usage. However, there is a small bug there: we loop with an extra iteration temporary qgroup called `cur` but never actually refer to that in the body of the loop. As a result, we redundantly account the same usage to the first qgroup in the list. This can be reproduced in the following way: mkfs.btrfs -f -O squota mount btrfs subvol create /sv dd if=/dev/zero of=/sv/f bs=1M count=1 sync btrfs qgroup create 1/100 btrfs qgroup create 2/200 btrfs qgroup assign 1/100 2/200 btrfs qgroup assign 0/256 1/100 btrfs qgroup show and the broken result is (note the 2MiB on 1/100 and 0Mib on 2/100): Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv 1/100 2.03MiB 2.03MiB 2/100<1 member qgroup> 2/100 0.00B 0.00B <0 member qgroups> With this fix, which simply re-uses `qgroup` as the iteration variable, we see the expected result: Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv 1/100 1.02MiB 1.02MiB 2/100<1 member qgroup> 2/100 1.02MiB 1.02MiB <0 member qgroups> The existing fstests did not exercise two layer inheritance so this bug was missed. I intend to add that testing there, as well. Fixes: a0bdc04b0732 ("btrfs: qgroup: use qgroup_iterator in __qgroup_excl_accounting()") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1a5972178b3a..ccaa9a3cf1ce 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1453,7 +1453,6 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, struct btrfs_qgroup *src, int sign) { struct btrfs_qgroup *qgroup; - struct btrfs_qgroup *cur; LIST_HEAD(qgroup_list); u64 num_bytes = src->excl; int ret = 0; @@ -1463,7 +1462,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, goto out; qgroup_iterator_add(&qgroup_list, qgroup); - list_for_each_entry(cur, &qgroup_list, iterator) { + list_for_each_entry(qgroup, &qgroup_list, iterator) { struct btrfs_qgroup_list *glist; qgroup->rfer += sign * num_bytes; From 08c5b422807435cdb79bee60da84262102e5f26a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 23 Jul 2025 13:28:23 -0700 Subject: [PATCH 0066/1307] drm/msm: Defer fd_install in VM_BIND ioctl Avoid fd_install() until there are no more potential error paths, to avoid put_unused_fd() after the fd is made visible to userspace. Fixes: 2e6a8a1fe2b2 ("drm/msm: Add VM_BIND ioctl") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/665365/ --- drivers/gpu/drm/msm/msm_gem_vma.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 3cd8562a5109..dc54c693b28d 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -1460,12 +1460,8 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file) if (args->flags & MSM_VM_BIND_FENCE_FD_OUT) { sync_file = sync_file_create(job->fence); - if (!sync_file) { + if (!sync_file) ret = -ENOMEM; - } else { - fd_install(out_fence_fd, sync_file->file); - args->fence_fd = out_fence_fd; - } } if (ret) @@ -1494,10 +1490,14 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file) out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) { - put_unused_fd(out_fence_fd); + if (ret) { + if (out_fence_fd >= 0) + put_unused_fd(out_fence_fd); if (sync_file) fput(sync_file->file); + } else if (sync_file) { + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; } if (!IS_ERR_OR_NULL(job)) { From d02d50cb062737f2b0c689fa24ef8b86f14756e5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 Jul 2025 15:29:05 +0100 Subject: [PATCH 0067/1307] drm/msm: Fix dereference of pointer minor before null check Currently the pointer minor is being dereferenced before it is null checked, leading to a potential null pointer dereference issue. Fix this by dereferencing the pointer only after it has been null checked. Also Replace minor->dev with dev. Fixes: 4f89cf40d01e ("drm/msm: bail out late_init_minor() if it is not a GPU device") Signed-off-by: Colin Ian King Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/666259/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_debugfs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index bbda865addae..97dc70876442 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -325,25 +325,28 @@ static struct drm_info_list msm_debugfs_list[] = { static int late_init_minor(struct drm_minor *minor) { - struct drm_device *dev = minor->dev; - struct msm_drm_private *priv = dev->dev_private; + struct drm_device *dev; + struct msm_drm_private *priv; int ret; if (!minor) return 0; + dev = minor->dev; + priv = dev->dev_private; + if (!priv->gpu_pdev) return 0; ret = msm_rd_debugfs_init(minor); if (ret) { - DRM_DEV_ERROR(minor->dev->dev, "could not install rd debugfs\n"); + DRM_DEV_ERROR(dev->dev, "could not install rd debugfs\n"); return ret; } ret = msm_perf_debugfs_init(minor); if (ret) { - DRM_DEV_ERROR(minor->dev->dev, "could not install perf debugfs\n"); + DRM_DEV_ERROR(dev->dev, "could not install perf debugfs\n"); return ret; } From 4a00bf1fd5add1e0da37009cba5b5ffb4de255d9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 10:59:58 -0700 Subject: [PATCH 0068/1307] drm/msm: Add missing "location"s to devcoredump This is needed to properly interpret some of the sections. v2: Fix missing \n Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666651/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index faca2a0243ab..8420cbe129c0 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -1796,6 +1796,7 @@ static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj, print_name(p, " - type: ", a7xx_statetype_names[block->statetype]); print_name(p, " - pipe: ", a7xx_pipe_names[block->pipeid]); + drm_printf(p, " - location: %d\n", block->location); for (i = 0; i < block->num_sps; i++) { drm_printf(p, " - sp: %d\n", i); @@ -1873,6 +1874,7 @@ static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, print_name(p, " - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]); print_name(p, " - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]); drm_printf(p, " - context: %d\n", dbgahb->context_id); + drm_printf(p, " - location: %d\n", dbgahb->location_id); a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4); } } From 9466b45c19f6646787d6249116e52a9c4382e6ad Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 10:59:59 -0700 Subject: [PATCH 0069/1307] drm/msm: Fix section names and sizes The section names randomly appended _DATA or _ADDR in many cases, and/or didn't match the reg names. Fix them so crashdec can properly resolve the section names back to reg names. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666654/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 38 +++++++++---------- .../drm/msm/adreno/adreno_gen7_9_0_snapshot.h | 24 ++++++------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h index 95d93ac6812a..1c18499b60bb 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h @@ -419,47 +419,47 @@ static const struct a6xx_indexed_registers a6xx_indexed_reglist[] = { REG_A6XX_CP_SQE_STAT_DATA, 0x33, NULL }, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0, a6xx_get_cp_roq_size}, }; static const struct a6xx_indexed_registers a7xx_indexed_reglist[] = { { "CP_SQE_STAT", REG_A6XX_CP_SQE_STAT_ADDR, - REG_A6XX_CP_SQE_STAT_DATA, 0x33, NULL }, + REG_A6XX_CP_SQE_STAT_DATA, 0x40, NULL }, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_BV_SQE_STAT_ADDR", REG_A7XX_CP_BV_SQE_STAT_ADDR, - REG_A7XX_CP_BV_SQE_STAT_DATA, 0x33, NULL }, - { "CP_BV_DRAW_STATE_ADDR", REG_A7XX_CP_BV_DRAW_STATE_ADDR, + { "CP_BV_SQE_STAT", REG_A7XX_CP_BV_SQE_STAT_ADDR, + REG_A7XX_CP_BV_SQE_STAT_DATA, 0x40, NULL }, + { "CP_BV_DRAW_STATE", REG_A7XX_CP_BV_DRAW_STATE_ADDR, REG_A7XX_CP_BV_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_BV_SQE_UCODE_DBG_ADDR", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, + { "CP_BV_SQE_UCODE_DBG", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, REG_A7XX_CP_BV_SQE_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_SQE_AC_STAT_ADDR", REG_A7XX_CP_SQE_AC_STAT_ADDR, - REG_A7XX_CP_SQE_AC_STAT_DATA, 0x33, NULL }, - { "CP_LPAC_DRAW_STATE_ADDR", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, + { "CP_SQE_AC_STAT", REG_A7XX_CP_SQE_AC_STAT_ADDR, + REG_A7XX_CP_SQE_AC_STAT_DATA, 0x40, NULL }, + { "CP_LPAC_DRAW_STATE", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, REG_A7XX_CP_LPAC_DRAW_STATE_DATA, 0x100, NULL }, - { "CP_SQE_AC_UCODE_DBG_ADDR", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, + { "CP_SQE_AC_UCODE_DBG", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, REG_A7XX_CP_SQE_AC_UCODE_DBG_DATA, 0x8000, NULL }, - { "CP_LPAC_FIFO_DBG_ADDR", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, + { "CP_LPAC_FIFO_DBG", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, REG_A7XX_CP_LPAC_FIFO_DBG_DATA, 0x40, NULL }, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0, a7xx_get_cp_roq_size }, }; static const struct a6xx_indexed_registers a6xx_cp_mempool_indexed = { - "CP_MEMPOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR, + "CP_MEM_POOL_DBG", REG_A6XX_CP_MEM_POOL_DBG_ADDR, REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2060, NULL, }; static const struct a6xx_indexed_registers a7xx_cp_bv_mempool_indexed[] = { - { "CP_MEMPOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR, - REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2100, NULL }, - { "CP_BV_MEMPOOL", REG_A7XX_CP_BV_MEM_POOL_DBG_ADDR, - REG_A7XX_CP_BV_MEM_POOL_DBG_DATA, 0x2100, NULL }, + { "CP_MEM_POOL_DBG", REG_A6XX_CP_MEM_POOL_DBG_ADDR, + REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2200, NULL }, + { "CP_BV_MEM_POOL_DBG", REG_A7XX_CP_BV_MEM_POOL_DBG_ADDR, + REG_A7XX_CP_BV_MEM_POOL_DBG_DATA, 0x2200, NULL }, }; #define DEBUGBUS(_id, _count) { .id = _id, .name = #_id, .count = _count } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h index e02cabb39f19..b1f8bbf1d843 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h @@ -1299,29 +1299,29 @@ static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { REG_A6XX_CP_SQE_STAT_DATA, 0x00040}, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, REG_A6XX_CP_DRAW_STATE_DATA, 0x00200}, - { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + { "CP_ROQ_DBG", REG_A6XX_CP_ROQ_DBG_ADDR, REG_A6XX_CP_ROQ_DBG_DATA, 0x00800}, - { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + { "CP_SQE_UCODE_DBG", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x08000}, - { "CP_BV_DRAW_STATE_ADDR", REG_A7XX_CP_BV_DRAW_STATE_ADDR, + { "CP_BV_DRAW_STATE", REG_A7XX_CP_BV_DRAW_STATE_ADDR, REG_A7XX_CP_BV_DRAW_STATE_DATA, 0x00200}, - { "CP_BV_ROQ_DBG_ADDR", REG_A7XX_CP_BV_ROQ_DBG_ADDR, + { "CP_BV_ROQ_DBG", REG_A7XX_CP_BV_ROQ_DBG_ADDR, REG_A7XX_CP_BV_ROQ_DBG_DATA, 0x00800}, - { "CP_BV_SQE_UCODE_DBG_ADDR", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, + { "CP_BV_SQE_UCODE_DBG", REG_A7XX_CP_BV_SQE_UCODE_DBG_ADDR, REG_A7XX_CP_BV_SQE_UCODE_DBG_DATA, 0x08000}, - { "CP_BV_SQE_STAT_ADDR", REG_A7XX_CP_BV_SQE_STAT_ADDR, + { "CP_BV_SQE_STAT", REG_A7XX_CP_BV_SQE_STAT_ADDR, REG_A7XX_CP_BV_SQE_STAT_DATA, 0x00040}, - { "CP_RESOURCE_TBL", REG_A7XX_CP_RESOURCE_TABLE_DBG_ADDR, + { "CP_RESOURCE_TABLE_DBG", REG_A7XX_CP_RESOURCE_TABLE_DBG_ADDR, REG_A7XX_CP_RESOURCE_TABLE_DBG_DATA, 0x04100}, - { "CP_LPAC_DRAW_STATE_ADDR", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, + { "CP_LPAC_DRAW_STATE", REG_A7XX_CP_LPAC_DRAW_STATE_ADDR, REG_A7XX_CP_LPAC_DRAW_STATE_DATA, 0x00200}, - { "CP_LPAC_ROQ", REG_A7XX_CP_LPAC_ROQ_DBG_ADDR, + { "CP_LPAC_ROQ_DBG", REG_A7XX_CP_LPAC_ROQ_DBG_ADDR, REG_A7XX_CP_LPAC_ROQ_DBG_DATA, 0x00200}, - { "CP_SQE_AC_UCODE_DBG_ADDR", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, + { "CP_SQE_AC_UCODE_DBG", REG_A7XX_CP_SQE_AC_UCODE_DBG_ADDR, REG_A7XX_CP_SQE_AC_UCODE_DBG_DATA, 0x08000}, - { "CP_SQE_AC_STAT_ADDR", REG_A7XX_CP_SQE_AC_STAT_ADDR, + { "CP_SQE_AC_STAT", REG_A7XX_CP_SQE_AC_STAT_ADDR, REG_A7XX_CP_SQE_AC_STAT_DATA, 0x00040}, - { "CP_LPAC_FIFO_DBG_ADDR", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, + { "CP_LPAC_FIFO_DBG", REG_A7XX_CP_LPAC_FIFO_DBG_ADDR, REG_A7XX_CP_LPAC_FIFO_DBG_DATA, 0x00040}, { "CP_AQE_ROQ_0", REG_A7XX_CP_AQE_ROQ_DBG_ADDR_0, REG_A7XX_CP_AQE_ROQ_DBG_DATA_0, 0x00100}, From a506578d8909e7e6f0d545af9850ccd4318bf6cf Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:00 -0700 Subject: [PATCH 0070/1307] drm/msm: Fix order of selector programming in cluster snapshot Program the selector _after_ selecting the aperture. This aligns with the downstream driver, and fixes a case where we were failing to capture ctx0 regs (and presumably what we thought were ctx1 regs were actually ctx0). Suggested-by: Akhil P Oommen Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666655/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 8420cbe129c0..b4c8583dd6ca 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -759,15 +759,15 @@ static void a7xx_get_cluster(struct msm_gpu *gpu, size_t datasize; int i, regcount = 0; - /* Some clusters need a selector register to be programmed too */ - if (cluster->sel) - in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); - in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD, A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) | A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) | A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id)); + /* Some clusters need a selector register to be programmed too */ + if (cluster->sel) + in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val); + for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) { int count = RANGE(cluster->regs, i); From 2f2cc939ad672361ca81fcb27d76dc8154b17a1c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:01 -0700 Subject: [PATCH 0071/1307] drm/msm: Constify snapshot tables A bit of divergence from the downstream driver from which these headers were imported. But no need for these tables not to be const. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666656/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 2 +- drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h | 8 ++++---- drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h | 8 ++++---- drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h | 10 +++++----- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index b4c8583dd6ca..7fc450ab8c12 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -11,7 +11,7 @@ static const unsigned int *gen7_0_0_external_core_regs[] __always_unused; static const unsigned int *gen7_2_0_external_core_regs[] __always_unused; static const unsigned int *gen7_9_0_external_core_regs[] __always_unused; -static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused; +static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused; static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused; #include "adreno_gen7_0_0_snapshot.h" diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h index cb66ece6606b..afcc7498983f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h @@ -81,7 +81,7 @@ static const u32 gen7_0_0_debugbus_blocks[] = { A7XX_DBGBUS_USPTP_7, }; -static struct gen7_shader_block gen7_0_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_0_0_shader_blocks[] = { {A7XX_TP0_TMO_DATA, 0x200, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_SMO_DATA, 0x80, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_MIPMAP_BASE_DATA, 0x3c0, 4, 2, A7XX_PIPE_BR, A7XX_USPTP}, @@ -695,7 +695,7 @@ static const struct gen7_sel_reg gen7_0_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_0_0_clusters[] = { +static const struct gen7_cluster_registers gen7_0_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_0_0_noncontext_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -764,7 +764,7 @@ static struct gen7_cluster_registers gen7_0_0_clusters[] = { gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_0_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_0_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -914,7 +914,7 @@ static const u32 gen7_0_0_dpm_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_0_0_dpm_registers), 8)); -static struct gen7_reg_list gen7_0_0_reg_list[] = { +static const struct gen7_reg_list gen7_0_0_reg_list[] = { { gen7_0_0_gpu_registers, NULL }, { gen7_0_0_cx_misc_registers, NULL }, { gen7_0_0_dpm_registers, NULL }, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h index 6f8ad50f32ce..6569f12bf12f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h @@ -95,7 +95,7 @@ static const u32 gen7_2_0_debugbus_blocks[] = { A7XX_DBGBUS_CCHE_2, }; -static struct gen7_shader_block gen7_2_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_2_0_shader_blocks[] = { {A7XX_TP0_TMO_DATA, 0x200, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_SMO_DATA, 0x80, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, {A7XX_TP0_MIPMAP_BASE_DATA, 0x3c0, 6, 2, A7XX_PIPE_BR, A7XX_USPTP}, @@ -489,7 +489,7 @@ static const struct gen7_sel_reg gen7_2_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_2_0_clusters[] = { +static const struct gen7_cluster_registers gen7_2_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_2_0_noncontext_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -558,7 +558,7 @@ static struct gen7_cluster_registers gen7_2_0_clusters[] = { gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 }, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -737,7 +737,7 @@ static const u32 gen7_2_0_dpm_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_2_0_dpm_registers), 8)); -static struct gen7_reg_list gen7_2_0_reg_list[] = { +static const struct gen7_reg_list gen7_2_0_reg_list[] = { { gen7_2_0_gpu_registers, NULL }, { gen7_2_0_cx_misc_registers, NULL }, { gen7_2_0_dpm_registers, NULL }, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h index b1f8bbf1d843..0956dfca1f05 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_9_0_snapshot.h @@ -117,7 +117,7 @@ static const u32 gen7_9_0_cx_debugbus_blocks[] = { A7XX_DBGBUS_GBIF_CX, }; -static struct gen7_shader_block gen7_9_0_shader_blocks[] = { +static const struct gen7_shader_block gen7_9_0_shader_blocks[] = { { A7XX_TP0_TMO_DATA, 0x0200, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, { A7XX_TP0_SMO_DATA, 0x0080, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, { A7XX_TP0_MIPMAP_BASE_DATA, 0x03C0, 6, 2, A7XX_PIPE_BR, A7XX_USPTP }, @@ -1116,7 +1116,7 @@ static const struct gen7_sel_reg gen7_9_0_rb_rbp_sel = { .val = 0x9, }; -static struct gen7_cluster_registers gen7_9_0_clusters[] = { +static const struct gen7_cluster_registers gen7_9_0_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_PIPE_BR, STATE_NON_CONTEXT, gen7_9_0_non_context_pipe_br_registers, }, { A7XX_CLUSTER_NONE, A7XX_PIPE_BV, STATE_NON_CONTEXT, @@ -1185,7 +1185,7 @@ static struct gen7_cluster_registers gen7_9_0_clusters[] = { gen7_9_0_vpc_pipe_bv_cluster_vpc_ps_registers, }, }; -static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { +static const struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, gen7_9_0_non_context_sp_pipe_br_hlsq_state_registers, 0xae00}, { A7XX_CLUSTER_NONE, A7XX_SP_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_SP_TOP, @@ -1294,7 +1294,7 @@ static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] = { gen7_9_0_tpl1_pipe_br_cluster_sp_ps_usptp_registers, 0xb000}, }; -static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { +static const struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { { "CP_SQE_STAT", REG_A6XX_CP_SQE_STAT_ADDR, REG_A6XX_CP_SQE_STAT_DATA, 0x00040}, { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, @@ -1337,7 +1337,7 @@ static struct a6xx_indexed_registers gen7_9_0_cp_indexed_reg_list[] = { REG_A7XX_CP_AQE_STAT_DATA_1, 0x00040}, }; -static struct gen7_reg_list gen7_9_0_reg_list[] = { +static const struct gen7_reg_list gen7_9_0_reg_list[] = { { gen7_9_0_gpu_registers, NULL}, { gen7_9_0_cx_misc_registers, NULL}, { gen7_9_0_cx_dbgc_registers, NULL}, From 13ed0a1af263b56a5ebbf38ab7163cbc9dcb009e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:02 -0700 Subject: [PATCH 0072/1307] drm/msm: Fix a7xx debugbus read The bitfield positions changed in a7xx. v2: Don't open-code the bitfield building v3: Also fix cx_debugbus Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666659/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 32 ++++++++++++++----- drivers/gpu/drm/msm/registers/adreno/a6xx.xml | 14 +++++++- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 7fc450ab8c12..75f93213e114 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -174,8 +174,15 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu, static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, u32 *data) { - u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | - A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + u32 reg; + + if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { + reg = A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | + A7XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + } else { + reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | + A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + } gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); @@ -198,11 +205,18 @@ static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, readl((ptr) + ((offset) << 2)) /* read a value from the CX debug bus */ -static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset, +static int cx_debugbus_read(struct msm_gpu *gpu, void __iomem *cxdbg, u32 block, u32 offset, u32 *data) { - u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | - A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + u32 reg; + + if (to_adreno_gpu(gpu)->info->family >= ADRENO_7XX_GEN1) { + reg = A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | + A7XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + } else { + reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | + A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + } cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); @@ -315,7 +329,8 @@ static void a6xx_get_debugbus_block(struct msm_gpu *gpu, ptr += debugbus_read(gpu, block->id, i, ptr); } -static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, +static void a6xx_get_cx_debugbus_block(struct msm_gpu *gpu, + void __iomem *cxdbg, struct a6xx_gpu_state *a6xx_state, const struct a6xx_debugbus_block *block, struct a6xx_gpu_state_obj *obj) @@ -330,7 +345,7 @@ static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, obj->handle = block; for (ptr = obj->data, i = 0; i < block->count; i++) - ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); + ptr += cx_debugbus_read(gpu, cxdbg, block->id, i, ptr); } static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu, @@ -526,7 +541,8 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu, int i; for (i = 0; i < nr_cx_debugbus_blocks; i++) - a6xx_get_cx_debugbus_block(cxdbg, + a6xx_get_cx_debugbus_block(gpu, + cxdbg, a6xx_state, &cx_debugbus_blocks[i], &a6xx_state->cx_debugbus[i]); diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml index d860fd94feae..86fab2750ba7 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml @@ -594,10 +594,14 @@ by a particular renderpass/blit. - + + + + + @@ -3796,6 +3800,14 @@ by a particular renderpass/blit. + + + + + + + + From a814ba2d7b847cff15565bbab781df89e190619c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:03 -0700 Subject: [PATCH 0073/1307] drm/msm: Fix debugbus snapshot We weren't setting the # of captured debugbus blocks. Reported-by: Connor Abbott Suggested-by: Connor Abbott Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666660/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 75f93213e114..d5d1271fce61 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -438,8 +438,9 @@ static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu, a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]], &a6xx_state->debugbus[i + debugbus_blocks_count]); } - } + a6xx_state->nr_debugbus = total_debugbus_blocks; + } } static void a6xx_get_debugbus(struct msm_gpu *gpu, From e9621ef610c4a600678da5d8020d4a0dfe686faa Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 1 Aug 2025 11:00:04 -0700 Subject: [PATCH 0074/1307] drm/msm: Fix a7xx TPL1 cluster snapshot Later gens have both a PIPE_BR and PIPE_NONE section. The snapshot tool seems to expect this for x1-85 as well. I guess this was just a bug in downstream kgsl, which went unnoticed? Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/666662/ --- drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h | 11 +++++++++-- drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h index afcc7498983f..04b49d385f9d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_0_0_snapshot.h @@ -668,12 +668,19 @@ static const u32 gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers[] = { }; static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers), 8)); -/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_BR */ -static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = { +/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_NONE */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_none_registers[] = { 0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c, 0x0b60f, 0x0b621, 0x0b630, 0x0b633, UINT_MAX, UINT_MAX, }; +static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_none_registers), 8)); + +/* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_BR */ +static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = { + 0x0b600, 0x0b600, + UINT_MAX, UINT_MAX, +}; static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_br_registers), 8)); /* Block: TPl1 Cluster: noncontext Pipeline: A7XX_PIPE_LPAC */ diff --git a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h index 6569f12bf12f..772652eb61f3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gen7_2_0_snapshot.h @@ -573,6 +573,8 @@ static const struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = { gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 }, { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_BR, 0, A7XX_USPTP, gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 }, + { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_NONE, 0, A7XX_USPTP, + gen7_0_0_tpl1_noncontext_pipe_none_registers, 0xb600 }, { A7XX_CLUSTER_NONE, A7XX_TP0_NCTX_REG, A7XX_PIPE_LPAC, 0, A7XX_USPTP, gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 }, { A7XX_CLUSTER_SP_PS, A7XX_SP_CTX0_3D_CPS_REG, A7XX_PIPE_BR, 0, A7XX_HLSQ_STATE, From ba3afadeb81ebb92ab23546fbd2ec7d9dfe216f8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 5 Aug 2025 09:43:08 -0700 Subject: [PATCH 0075/1307] drm/msm: Fix a few comments Fix a couple comments which had become (partially) obsolete or incorrect with the gpuvm conversion. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/667237/ --- drivers/gpu/drm/msm/msm_gem.h | 2 +- drivers/gpu/drm/msm/msm_gem_vma.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 88239da1cd72..751c3b4965bc 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -100,7 +100,7 @@ struct msm_gem_vm { * * Only used for kernel managed VMs, unused for user managed VMs. * - * Protected by @mm_lock. + * Protected by vm lock. See msm_gem_lock_vm_and_obj(), for ex. */ struct drm_mm mm; diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index dc54c693b28d..d1f5bb2e0a16 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -319,13 +319,10 @@ msm_gem_vma_map(struct drm_gpuva *vma, int prot, struct sg_table *sgt) mutex_lock(&vm->mmu_lock); /* - * NOTE: iommu/io-pgtable can allocate pages, so we cannot hold + * NOTE: if not using pgtable preallocation, we cannot hold * a lock across map/unmap which is also used in the job_run() * path, as this can cause deadlock in job_run() vs shrinker/ * reclaim. - * - * Revisit this if we can come up with a scheme to pre-alloc pages - * for the pgtable in map/unmap ops. */ ret = vm_map_op(vm, &(struct msm_vm_map_op){ .iova = vma->va.addr, From fe2f3b1c702f0e02906419c662ca9446cc789354 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 5 Aug 2025 09:44:31 -0700 Subject: [PATCH 0076/1307] drm/msm: Handle in-place remaps Detect and handle the special case of a MAP op simply updating the vma flags of an existing vma, and skip the pgtable updates. This allows turnip to set the MSM_VMA_DUMP flag on an existing mapping without requiring additional synchronization against commands running on the GPU. Signed-off-by: Rob Clark Tested-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/667238/ --- drivers/gpu/drm/msm/msm_gem_vma.c | 41 ++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index d1f5bb2e0a16..00d0f3b7ba32 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -451,6 +451,8 @@ msm_gem_vm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) struct op_arg { unsigned flags; struct msm_vm_bind_job *job; + const struct msm_vm_bind_op *op; + bool kept; }; static void @@ -472,14 +474,18 @@ vma_from_op(struct op_arg *arg, struct drm_gpuva_op_map *op) } static int -msm_gem_vm_sm_step_map(struct drm_gpuva_op *op, void *arg) +msm_gem_vm_sm_step_map(struct drm_gpuva_op *op, void *_arg) { - struct msm_vm_bind_job *job = ((struct op_arg *)arg)->job; + struct op_arg *arg = _arg; + struct msm_vm_bind_job *job = arg->job; struct drm_gem_object *obj = op->map.gem.obj; struct drm_gpuva *vma; struct sg_table *sgt; unsigned prot; + if (arg->kept) + return 0; + vma = vma_from_op(arg, &op->map); if (WARN_ON(IS_ERR(vma))) return PTR_ERR(vma); @@ -599,15 +605,41 @@ msm_gem_vm_sm_step_remap(struct drm_gpuva_op *op, void *arg) } static int -msm_gem_vm_sm_step_unmap(struct drm_gpuva_op *op, void *arg) +msm_gem_vm_sm_step_unmap(struct drm_gpuva_op *op, void *_arg) { - struct msm_vm_bind_job *job = ((struct op_arg *)arg)->job; + struct op_arg *arg = _arg; + struct msm_vm_bind_job *job = arg->job; struct drm_gpuva *vma = op->unmap.va; struct msm_gem_vma *msm_vma = to_msm_vma(vma); vm_dbg("%p:%p:%p: %016llx %016llx", vma->vm, vma, vma->gem.obj, vma->va.addr, vma->va.range); + /* + * Detect in-place remap. Turnip does this to change the vma flags, + * in particular MSM_VMA_DUMP. In this case we want to avoid actually + * touching the page tables, as that would require synchronization + * against SUBMIT jobs running on the GPU. + */ + if (op->unmap.keep && + (arg->op->op == MSM_VM_BIND_OP_MAP) && + (vma->gem.obj == arg->op->obj) && + (vma->gem.offset == arg->op->obj_offset) && + (vma->va.addr == arg->op->iova) && + (vma->va.range == arg->op->range)) { + /* We are only expecting a single in-place unmap+map cb pair: */ + WARN_ON(arg->kept); + + /* Leave the existing VMA in place, but signal that to the map cb: */ + arg->kept = true; + + /* Only flags are changing, so update that in-place: */ + unsigned orig_flags = vma->flags & (DRM_GPUVA_USERBITS - 1); + vma->flags = orig_flags | arg->flags; + + return 0; + } + if (!msm_vma->mapped) goto out_close; @@ -1268,6 +1300,7 @@ vm_bind_job_prepare(struct msm_vm_bind_job *job) const struct msm_vm_bind_op *op = &job->ops[i]; struct op_arg arg = { .job = job, + .op = op, }; switch (op->op) { From 42464c51ccccb6343a932a7ea8bc9181e589f270 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 7 Aug 2025 09:10:58 -0400 Subject: [PATCH 0077/1307] drm/msm: Fix objtool warning in submit_lock_objects() Split the vmbind case into a separate helper function submit_lock_objects_vmbind() to fix objtool warning: drivers/gpu/drm/msm/msm.o: warning: objtool: submit_lock_objects+0x451: sibling call from callable instruction with modified stack frame The drm_exec_until_all_locked() macro uses computed gotos internally for its retry loop. Having return statements inside this macro, or immediately after it in certain code paths, confuses objtool's static analysis of stack frames, causing it to incorrectly flag tail call optimizations. Fixes: 92395af63a99 ("drm/msm: Add VM_BIND submitqueue") Signed-off-by: Sasha Levin Patchwork: https://patchwork.freedesktop.org/patch/667539/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 53 +++++++++++++++------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index bfea19baf6d9..3ab3b27134f9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -271,32 +271,37 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, return ret; } +static int submit_lock_objects_vmbind(struct msm_gem_submit *submit) +{ + unsigned flags = DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES; + struct drm_exec *exec = &submit->exec; + int ret = 0; + + drm_exec_init(&submit->exec, flags, submit->nr_bos); + + drm_exec_until_all_locked (&submit->exec) { + ret = drm_gpuvm_prepare_vm(submit->vm, exec, 1); + drm_exec_retry_on_contention(exec); + if (ret) + break; + + ret = drm_gpuvm_prepare_objects(submit->vm, exec, 1); + drm_exec_retry_on_contention(exec); + if (ret) + break; + } + + return ret; +} + /* This is where we make sure all the bo's are reserved and pin'd: */ static int submit_lock_objects(struct msm_gem_submit *submit) { unsigned flags = DRM_EXEC_INTERRUPTIBLE_WAIT; - struct drm_exec *exec = &submit->exec; - int ret; + int ret = 0; - if (msm_context_is_vmbind(submit->queue->ctx)) { - flags |= DRM_EXEC_IGNORE_DUPLICATES; - - drm_exec_init(&submit->exec, flags, submit->nr_bos); - - drm_exec_until_all_locked (&submit->exec) { - ret = drm_gpuvm_prepare_vm(submit->vm, exec, 1); - drm_exec_retry_on_contention(exec); - if (ret) - return ret; - - ret = drm_gpuvm_prepare_objects(submit->vm, exec, 1); - drm_exec_retry_on_contention(exec); - if (ret) - return ret; - } - - return 0; - } + if (msm_context_is_vmbind(submit->queue->ctx)) + return submit_lock_objects_vmbind(submit); drm_exec_init(&submit->exec, flags, submit->nr_bos); @@ -305,17 +310,17 @@ static int submit_lock_objects(struct msm_gem_submit *submit) drm_gpuvm_resv_obj(submit->vm)); drm_exec_retry_on_contention(&submit->exec); if (ret) - return ret; + break; for (unsigned i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; ret = drm_exec_prepare_obj(&submit->exec, obj, 1); drm_exec_retry_on_contention(&submit->exec); if (ret) - return ret; + break; } } - return 0; + return ret; } static int submit_fence_sync(struct msm_gem_submit *submit) From 700d6868fee2d384cbd821d31db3a245d218223c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Aug 2025 09:21:28 +0200 Subject: [PATCH 0078/1307] kvm: arm64: use BUG() instead of BUG_ON(1) The BUG_ON() macro adds a little bit of complexity over BUG(), and in some cases this ends up confusing the compiler's control flow analysis in a way that results in a warning. This one now shows up with clang-21: arch/arm64/kvm/vgic/vgic-mmio.c:1094:3: error: variable 'len' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] 1094 | BUG_ON(1); Change both instances of BUG_ON(1) to a plain BUG() in the arm64 kvm code, to avoid the false-positive warning. Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250807072132.4170088-1-arnd@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/list_debug.c | 2 +- arch/arm64/kvm/vgic/vgic-mmio.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c index 46a2d4f2b3c6..baa6260f88dc 100644 --- a/arch/arm64/kvm/hyp/nvhe/list_debug.c +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -17,7 +17,7 @@ static inline __must_check bool nvhe_check_data_corruption(bool v) bool corruption = unlikely(condition); \ if (corruption) { \ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ - BUG_ON(1); \ + BUG(); \ } else \ WARN_ON(1); \ } \ diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index e416e433baff..a573b1f0c6cb 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -1091,7 +1091,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, len = vgic_v3_init_dist_iodev(io_device); break; default: - BUG_ON(1); + BUG(); } io_device->base_addr = dist_base_address; From eaa43934b412f0381598e308b6a25d6c9a5dce2d Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 7 Aug 2025 13:01:31 +0100 Subject: [PATCH 0079/1307] KVM: arm64: Handle AIDR_EL1 and REVIDR_EL1 in host for protected VMs Since commit 17efc1acee62 ("arm64: Expose AIDR_EL1 via sysfs"), AIDR_EL1 is read early during boot. Therefore, a guest running as a protected VM will fail to boot because when it attempts to access AIDR_EL1, access to that register is restricted in pKVM for protected guests. Similar to how MIDR_EL1 is handled by the host for protected VMs, let the host handle accesses to AIDR_EL1 as well as REVIDR_EL1. However note that, unlike MIDR_EL1, AIDR_EL1 and REVIDR_EL1 are trapped by HCR_EL2.TID1. Therefore, explicitly mark them as handled by the host for protected VMs. TID1 is always set in pKVM, because it needs to restrict access to SMIDR_EL1, which is also trapped by that bit. Reported-by: Will Deacon Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20250807120133.871892-2-tabba@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 1ddd9ed3cbb3..bbd60013cf9e 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -372,6 +372,9 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { /* Debug and Trace Registers are restricted. */ + /* Group 1 ID registers */ + HOST_HANDLED(SYS_REVIDR_EL1), + /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ AARCH32(SYS_ID_PFR0_EL1), @@ -460,6 +463,7 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), + HOST_HANDLED(SYS_AIDR_EL1), HOST_HANDLED(SYS_CSSELR_EL1), HOST_HANDLED(SYS_CTR_EL0), From 798eb597870064bff28d8a41cb5197725f7dc6f2 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 7 Aug 2025 13:01:32 +0100 Subject: [PATCH 0080/1307] KVM: arm64: Sync protected guest VBAR_EL1 on injecting an undef exception In pKVM, a race condition can occur if a guest updates its VBAR_EL1 register and, before a vCPU exit synchronizes this change, the hypervisor needs to inject an undefined exception into a protected guest. In this scenario, the vCPU still holds the stale VBAR_EL1 value from before the guest's update. When pKVM injects the exception, it ends up using the stale value. Explicitly read the live value of VBAR_EL1 from the guest and update the vCPU value immediately before pending the exception. This ensures the vCPU's value is the same as the guest's and that the exception will be handled at the correct address upon resuming the guest. Reported-by: Keir Fraser Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20250807120133.871892-3-tabba@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index bbd60013cf9e..71d2fc97f004 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -253,6 +253,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + __vcpu_assign_sys_reg(vcpu, read_sysreg_el1(SYS_VBAR), VBAR_EL1); kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); From f1edb159200ad1ef2fc518c7673cb9559a6f8edd Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 7 Aug 2025 13:01:33 +0100 Subject: [PATCH 0081/1307] arm64: vgic-v2: Fix guest endianness check in hVHE mode In hVHE when running at the hypervisor, SCTLR_EL1 refers to the hypervisor's System Control Register rather than the guest's. Make sure to access the guest's register to determine its endianness. Reported-by: Will Deacon Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20250807120133.871892-4-tabba@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 87a54375bd6e..78579b31a420 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -20,7 +20,7 @@ static bool __is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); - return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); + return !!(read_sysreg_el1(SYS_SCTLR) & SCTLR_ELx_EE); } /* From 85acc29f90e0183997dea27277057c9aec2769aa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Aug 2025 18:13:41 +0100 Subject: [PATCH 0082/1307] KVM: arm64: selftest: Add standalone test checking for KVM's own UUID Tinkering with UUIDs is a perilious task, and the KVM UUID gets broken at times. In order to spot this early enough, add a selftest that will shout if the expected value isn't found. Signed-off-by: Marc Zyngier Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20250721130558.50823-1-jackabt.amazon@gmail.com Link: https://lore.kernel.org/r/20250806171341.1521210-1-maz@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/Makefile.kvm | 1 + tools/testing/selftests/kvm/arm64/kvm-uuid.c | 70 ++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 tools/testing/selftests/kvm/arm64/kvm-uuid.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index ce817a975e50..e1eb1ba238a2 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -167,6 +167,7 @@ TEST_GEN_PROGS_arm64 += arm64/vgic_irq TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += arm64/kvm-uuid TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c new file mode 100644 index 000000000000..af9581b860f1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that nobody has tampered with KVM's UID + +#include +#include +#include +#include + +#include "processor.h" + +/* + * Do NOT redefine these constants, or try to replace them with some + * "common" version. They are hardcoded here to detect any potential + * breakage happening in the rest of the kernel. + * + * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 + */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +static void guest_code(void) +{ + struct arm_smccc_res res = {}; + + smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + + __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && + res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && + res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 && + res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3, + "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3); + GUEST_DONE(); +} + +int main (int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (!guest_done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); + + return 0; +} From c875503a9b9082928d7d3fc60b5400d16fbfae4e Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:56 +0800 Subject: [PATCH 0083/1307] net: hibmcge: fix rtnl deadlock issue Currently, the hibmcge netdev acquires the rtnl_lock in pci_error_handlers.reset_prepare() and releases it in pci_error_handlers.reset_done(). However, in the PCI framework: pci_reset_bus - __pci_reset_slot - pci_slot_save_and_disable_locked - pci_dev_save_and_disable - err_handler->reset_prepare(dev); In pci_slot_save_and_disable_locked(): list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot!= slot) continue; pci_dev_save_and_disable(dev); if (dev->subordinate) pci_bus_save_and_disable_locked(dev->subordinate); } This will iterate through all devices under the current bus and execute err_handler->reset_prepare(), causing two devices of the hibmcge driver to sequentially request the rtnl_lock, leading to a deadlock. Since the driver now executes netif_device_detach() before the reset process, it will not concurrently with other netdev APIs, so there is no need to hold the rtnl_lock now. Therefore, this patch removes the rtnl_lock during the reset process and adjusts the position of HBG_NIC_STATE_RESETTING to ensure that multiple resets are not executed concurrently. Fixes: 3f5a61f6d504f ("net: hibmcge: Add reset supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 503cfbfb4a8a..83cf75bf7a17 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -53,9 +53,11 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) { int ret; - ASSERT_RTNL(); + if (test_and_set_bit(HBG_NIC_STATE_RESETTING, &priv->state)) + return -EBUSY; if (netif_running(priv->netdev)) { + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_warn(&priv->pdev->dev, "failed to reset because port is up\n"); return -EBUSY; @@ -64,7 +66,6 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) netif_device_detach(priv->netdev); priv->reset_type = type; - set_bit(HBG_NIC_STATE_RESETTING, &priv->state); clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); ret = hbg_hw_event_notify(priv, HBG_HW_EVENT_RESET); if (ret) { @@ -84,29 +85,26 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) type != priv->reset_type) return 0; - ASSERT_RTNL(); - - clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); ret = hbg_rebuild(priv); if (ret) { priv->stats.reset_fail_cnt++; set_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_err(&priv->pdev->dev, "failed to rebuild after reset\n"); return ret; } netif_device_attach(priv->netdev); + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_info(&priv->pdev->dev, "reset done\n"); return ret; } -/* must be protected by rtnl lock */ int hbg_reset(struct hbg_priv *priv) { int ret; - ASSERT_RTNL(); ret = hbg_reset_prepare(priv, HBG_RESET_TYPE_FUNCTION); if (ret) return ret; @@ -171,7 +169,6 @@ static void hbg_pci_err_reset_prepare(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct hbg_priv *priv = netdev_priv(netdev); - rtnl_lock(); hbg_reset_prepare(priv, HBG_RESET_TYPE_FLR); } @@ -181,7 +178,6 @@ static void hbg_pci_err_reset_done(struct pci_dev *pdev) struct hbg_priv *priv = netdev_priv(netdev); hbg_reset_done(priv, HBG_RESET_TYPE_FLR); - rtnl_unlock(); } static const struct pci_error_handlers hbg_pci_err_handler = { From 7004b26f0b64331143eb0b312e77a357a11427ce Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:57 +0800 Subject: [PATCH 0084/1307] net: hibmcge: fix the division by zero issue When the network port is down, the queue is released, and ring->len is 0. In debugfs, hbg_get_queue_used_num() will be called, which may lead to a division by zero issue. This patch adds a check, if ring->len is 0, hbg_get_queue_used_num() directly returns 0. Fixes: 40735e7543f9 ("net: hibmcge: Implement .ndo_start_xmit function") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h index 2883a5899ae2..8b6110599e10 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h @@ -29,7 +29,12 @@ static inline bool hbg_fifo_is_full(struct hbg_priv *priv, enum hbg_dir dir) static inline u32 hbg_get_queue_used_num(struct hbg_ring *ring) { - return (ring->ntu + ring->len - ring->ntc) % ring->len; + u32 len = READ_ONCE(ring->len); + + if (!len) + return 0; + + return (READ_ONCE(ring->ntu) + len - READ_ONCE(ring->ntc)) % len; } netdev_tx_t hbg_net_start_xmit(struct sk_buff *skb, struct net_device *netdev); From 62c50180ffda01468e640ac14925503796f255e2 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:58 +0800 Subject: [PATCH 0085/1307] net: hibmcge: fix the np_link_fail error reporting issue Currently, after modifying device port mode, the np_link_ok state is immediately checked. At this point, the device may not yet ready, leading to the querying of an intermediate state. This patch will poll to check if np_link is ok after modifying device port mode, and only report np_link_fail upon timeout. Fixes: e0306637e85d ("net: hibmcge: Add support for mac link exception handling feature") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 8cca8316ba40..d0aa0661ecd4 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -12,6 +12,8 @@ #define HBG_HW_EVENT_WAIT_TIMEOUT_US (2 * 1000 * 1000) #define HBG_HW_EVENT_WAIT_INTERVAL_US (10 * 1000) +#define HBG_MAC_LINK_WAIT_TIMEOUT_US (500 * 1000) +#define HBG_MAC_LINK_WAIT_INTERVAL_US (5 * 1000) /* little endian or big endian. * ctrl means packet description, data means skb packet data */ @@ -228,6 +230,9 @@ void hbg_hw_fill_buffer(struct hbg_priv *priv, u32 buffer_dma_addr) void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) { + u32 link_status; + int ret; + hbg_hw_mac_enable(priv, HBG_STATUS_DISABLE); hbg_reg_write_field(priv, HBG_REG_PORT_MODE_ADDR, @@ -239,8 +244,14 @@ void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) hbg_hw_mac_enable(priv, HBG_STATUS_ENABLE); - if (!hbg_reg_read_field(priv, HBG_REG_AN_NEG_STATE_ADDR, - HBG_REG_AN_NEG_STATE_NP_LINK_OK_B)) + /* wait MAC link up */ + ret = readl_poll_timeout(priv->io_base + HBG_REG_AN_NEG_STATE_ADDR, + link_status, + FIELD_GET(HBG_REG_AN_NEG_STATE_NP_LINK_OK_B, + link_status), + HBG_MAC_LINK_WAIT_INTERVAL_US, + HBG_MAC_LINK_WAIT_TIMEOUT_US); + if (ret) hbg_np_link_fail_task_schedule(priv); } From 06feac15406f4f66f4c0c6ea60b10d44775d4133 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Tue, 5 Aug 2025 23:08:12 +0530 Subject: [PATCH 0086/1307] net: ti: icssg-prueth: Fix emac link speed handling When link settings are changed emac->speed is populated by emac_adjust_link(). The link speed and other settings are then written into the DRAM. However if both ports are brought down after this and brought up again or if the operating mode is changed and a firmware reload is needed, the DRAM is cleared by icssg_config(). As a result the link settings are lost. Fix this by calling emac_adjust_link() after icssg_config(). This re populates the settings in the DRAM after a new firmware load. Fixes: 9facce84f406 ("net: ti: icssg-prueth: Fix firmware load sequence.") Signed-off-by: MD Danish Anwar Reviewed-by: Andrew Lunn Message-ID: <20250805173812.2183161-1-danishanwar@ti.com> Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 2b973d6e2341..6c7d776ae4ee 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -50,6 +50,8 @@ /* CTRLMMR_ICSSG_RGMII_CTRL register bits */ #define ICSSG_CTRL_RGMII_ID_MODE BIT(24) +static void emac_adjust_link(struct net_device *ndev); + static int emac_get_tx_ts(struct prueth_emac *emac, struct emac_tx_ts_response *rsp) { @@ -229,6 +231,10 @@ static int prueth_emac_common_start(struct prueth *prueth) ret = icssg_config(prueth, emac, slice); if (ret) goto disable_class; + + mutex_lock(&emac->ndev->phydev->lock); + emac_adjust_link(emac->ndev); + mutex_unlock(&emac->ndev->phydev->lock); } ret = prueth_emac_start(prueth); From 64fdaa94bfe0cca3a0f4b2dd922486c5f59fe678 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Aug 2025 17:36:54 -0700 Subject: [PATCH 0087/1307] net: page_pool: allow enabling recycling late, fix false positive warning Page pool can have pages "directly" (locklessly) recycled to it, if the NAPI that owns the page pool is scheduled to run on the same CPU. To make this safe we check that the NAPI is disabled while we destroy the page pool. In most cases NAPI and page pool lifetimes are tied together so this happens naturally. The queue API expects the following order of calls: -> mem_alloc alloc new pp -> stop napi_disable -> start napi_enable -> mem_free free old pp Here we allocate the page pool in ->mem_alloc and free in ->mem_free. But the NAPIs are only stopped between ->stop and ->start. We created page_pool_disable_direct_recycling() to safely shut down the recycling in ->stop. This way the page_pool_destroy() call in ->mem_free doesn't have to worry about recycling any more. Unfortunately, the page_pool_disable_direct_recycling() is not enough to deal with failures which necessitate freeing the _new_ page pool. If we hit a failure in ->mem_alloc or ->stop the new page pool has to be freed while the NAPI is active (assuming driver attaches the page pool to an existing NAPI instance and doesn't reallocate NAPIs). Freeing the new page pool is technically safe because it hasn't been used for any packets, yet, so there can be no recycling. But the check in napi_assert_will_not_race() has no way of knowing that. We could check if page pool is empty but that'd make the check much less likely to trigger during development. Add page_pool_enable_direct_recycling(), pairing with page_pool_disable_direct_recycling(). It will allow us to create the new page pools in "disabled" state and only enable recycling when we know the reconfig operation will not fail. Coincidentally it will also let us re-enable the recycling for the old pool, if the reconfig failed: -> mem_alloc (new) -> stop (old) # disables direct recycling for old -> start (new) # fail!! -> start (old) # go back to old pp but direct recycling is lost :( -> mem_free (new) The new helper is idempotent to make the life easier for drivers, which can operate in HDS mode and support zero-copy Rx. The driver can call the helper twice whether there are two pools or it has multiple references to a single pool. Fixes: 40eca00ae605 ("bnxt_en: unlink page pool when stopping Rx queue") Tested-by: David Wei Link: https://patch.msgid.link/20250805003654.2944974-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++++- include/net/page_pool/types.h | 2 ++ net/core/page_pool.c | 29 +++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5578ddcb465d..76a4c5ae8000 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3819,7 +3819,6 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, if (BNXT_RX_PAGE_MODE(bp)) pp.pool_size += bp->rx_ring_size / rx_size_fac; pp.nid = numa_node; - pp.napi = &rxr->bnapi->napi; pp.netdev = bp->dev; pp.dev = &bp->pdev->dev; pp.dma_dir = bp->rx_dir; @@ -3851,6 +3850,12 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, return PTR_ERR(pool); } +static void bnxt_enable_rx_page_pool(struct bnxt_rx_ring_info *rxr) +{ + page_pool_enable_direct_recycling(rxr->head_pool, &rxr->bnapi->napi); + page_pool_enable_direct_recycling(rxr->page_pool, &rxr->bnapi->napi); +} + static int bnxt_alloc_rx_agg_bmap(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) { u16 mem_size; @@ -3889,6 +3894,7 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp) rc = bnxt_alloc_rx_page_pool(bp, rxr, cpu_node); if (rc) return rc; + bnxt_enable_rx_page_pool(rxr); rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i, 0); if (rc < 0) @@ -16031,6 +16037,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) goto err_reset; } + bnxt_enable_rx_page_pool(rxr); napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 431b593de709..1509a536cb85 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -265,6 +265,8 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL +void page_pool_enable_direct_recycling(struct page_pool *pool, + struct napi_struct *napi); void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 05e2e22a8f7c..343a6cac21e3 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1201,6 +1201,35 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), pool->xdp_mem_id = mem->id; } +/** + * page_pool_enable_direct_recycling() - mark page pool as owned by NAPI + * @pool: page pool to modify + * @napi: NAPI instance to associate the page pool with + * + * Associate a page pool with a NAPI instance for lockless page recycling. + * This is useful when a new page pool has to be added to a NAPI instance + * without disabling that NAPI instance, to mark the point at which control + * path "hands over" the page pool to the NAPI instance. In most cases driver + * can simply set the @napi field in struct page_pool_params, and does not + * have to call this helper. + * + * The function is idempotent, but does not implement any refcounting. + * Single page_pool_disable_direct_recycling() will disable recycling, + * no matter how many times enable was called. + */ +void page_pool_enable_direct_recycling(struct page_pool *pool, + struct napi_struct *napi) +{ + if (READ_ONCE(pool->p.napi) == napi) + return; + WARN_ON(!napi || pool->p.napi); + + mutex_lock(&page_pools_lock); + WRITE_ONCE(pool->p.napi, napi); + mutex_unlock(&page_pools_lock); +} +EXPORT_SYMBOL(page_pool_enable_direct_recycling); + void page_pool_disable_direct_recycling(struct page_pool *pool) { /* Disable direct recycling based on pool->cpuid. From 5f1d1d14db7dabce9c815e7d7cd351f8d58b8585 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 5 Aug 2025 07:23:18 -0700 Subject: [PATCH 0088/1307] net: ti: icss-iep: Fix incorrect type for return value in extts_enable() The variable ret in icss_iep_extts_enable() was incorrectly declared as u32, while the function returns int and may return negative error codes. This will cause sign extension issues and incorrect error propagation. Update ret to be int to fix error handling. This change corrects the declaration to avoid potential type mismatch. Fixes: c1e0230eeaab ("net: ti: icss-iep: Add IEP driver") Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250805142323.1949406-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icss_iep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 50bfbc2779e4..d8c9fe1d98c4 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -621,7 +621,8 @@ static int icss_iep_pps_enable(struct icss_iep *iep, int on) static int icss_iep_extts_enable(struct icss_iep *iep, u32 index, int on) { - u32 val, cap, ret = 0; + u32 val, cap; + int ret = 0; mutex_lock(&iep->ptp_clk_mutex); From aba0c94f61ec05315fa7815d21aefa4c87f6a9f4 Mon Sep 17 00:00:00 2001 From: Budimir Markovic Date: Thu, 7 Aug 2025 04:18:11 +0000 Subject: [PATCH 0089/1307] vsock: Do not allow binding to VMADDR_PORT_ANY It is possible for a vsock to autobind to VMADDR_PORT_ANY. This can cause a use-after-free when a connection is made to the bound socket. The socket returned by accept() also has port VMADDR_PORT_ANY but is not on the list of unbound sockets. Binding it will result in an extra refcount decrement similar to the one fixed in fcdd2242c023 (vsock: Keep the binding until socket destruction). Modify the check in __vsock_bind_connectible() to also prevent binding to VMADDR_PORT_ANY. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reported-by: Budimir Markovic Signed-off-by: Budimir Markovic Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20250807041811.678-1-markovicbudimir@gmail.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ead6a3c14b87..bebb355f3ffe 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -689,7 +689,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, unsigned int i; for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) + if (port == VMADDR_PORT_ANY || + port <= LAST_RESERVED_PORT) port = LAST_RESERVED_PORT + 1; new_addr.svm_port = port++; From fd60d8a086191fe33c2d719732d2482052fa6805 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 7 Aug 2025 15:40:11 -0400 Subject: [PATCH 0090/1307] sctp: linearize cloned gso packets in sctp_rcv A cloned head skb still shares these frag skbs in fraglist with the original head skb. It's not safe to access these frag skbs. syzbot reported two use-of-uninitialized-memory bugs caused by this: BUG: KMSAN: uninit-value in sctp_inq_pop+0x15b7/0x1920 net/sctp/inqueue.c:211 sctp_inq_pop+0x15b7/0x1920 net/sctp/inqueue.c:211 sctp_assoc_bh_rcv+0x1a7/0xc50 net/sctp/associola.c:998 sctp_inq_push+0x2ef/0x380 net/sctp/inqueue.c:88 sctp_backlog_rcv+0x397/0xdb0 net/sctp/input.c:331 sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1122 __release_sock+0x1da/0x330 net/core/sock.c:3106 release_sock+0x6b/0x250 net/core/sock.c:3660 sctp_wait_for_connect+0x487/0x820 net/sctp/socket.c:9360 sctp_sendmsg_to_asoc+0x1ec1/0x1f00 net/sctp/socket.c:1885 sctp_sendmsg+0x32b9/0x4a80 net/sctp/socket.c:2031 inet_sendmsg+0x25a/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:718 [inline] and BUG: KMSAN: uninit-value in sctp_assoc_bh_rcv+0x34e/0xbc0 net/sctp/associola.c:987 sctp_assoc_bh_rcv+0x34e/0xbc0 net/sctp/associola.c:987 sctp_inq_push+0x2a3/0x350 net/sctp/inqueue.c:88 sctp_backlog_rcv+0x3c7/0xda0 net/sctp/input.c:331 sk_backlog_rcv+0x142/0x420 include/net/sock.h:1148 __release_sock+0x1d3/0x330 net/core/sock.c:3213 release_sock+0x6b/0x270 net/core/sock.c:3767 sctp_wait_for_connect+0x458/0x820 net/sctp/socket.c:9367 sctp_sendmsg_to_asoc+0x223a/0x2260 net/sctp/socket.c:1886 sctp_sendmsg+0x3910/0x49f0 net/sctp/socket.c:2032 inet_sendmsg+0x269/0x2a0 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] This patch fixes it by linearizing cloned gso packets in sctp_rcv(). Fixes: 90017accff61 ("sctp: Add GSO support") Reported-by: syzbot+773e51afe420baaf0e2b@syzkaller.appspotmail.com Reported-by: syzbot+70a42f45e76bede082be@syzkaller.appspotmail.com Signed-off-by: Xin Long Reviewed-by: Marcelo Ricardo Leitner Link: https://patch.msgid.link/dd7dc337b99876d4132d0961f776913719f7d225.1754595611.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 2dc2666988fb..7e99894778d4 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -117,7 +117,7 @@ int sctp_rcv(struct sk_buff *skb) * it's better to just linearize it otherwise crc computing * takes longer. */ - if ((!is_gso && skb_linearize(skb)) || + if (((!is_gso || skb_cloned(skb)) && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; From 829f45f9d992019b49f08ab425ca11288b084aed Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Wed, 6 Aug 2025 17:54:53 -0700 Subject: [PATCH 0091/1307] net: dsa: microchip: Fix KSZ8863 reset problem ksz8873_valid_regs[] was added for register access for KSZ8863/KSZ8873 switches, but the reset register is not in the list so ksz8_reset_switch() does not take any effect. Replace regmap_update_bits() using ksz_regmap_8 with ksz_rmw8() so that an error message will be given if the register is not defined. A side effect of not resetting the switch is the static MAC table is not cleared. Further additions to the table will show write error as there are only 8 entries in the table. Fixes: d0dec3333040 ("net: dsa: microchip: Add register access control for KSZ8873 chip") Signed-off-by: Tristram Ha Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250807005453.8306-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8.c | 20 +++++++++++--------- drivers/net/dsa/microchip/ksz_common.c | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8.c b/drivers/net/dsa/microchip/ksz8.c index 76e490070e9c..c354abdafc1b 100644 --- a/drivers/net/dsa/microchip/ksz8.c +++ b/drivers/net/dsa/microchip/ksz8.c @@ -36,15 +36,14 @@ static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) { - regmap_update_bits(ksz_regmap_8(dev), addr, bits, set ? bits : 0); + ksz_rmw8(dev, addr, bits, set ? bits : 0); } static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits, bool set) { - regmap_update_bits(ksz_regmap_8(dev), - dev->dev_ops->get_port_addr(port, offset), - bits, set ? bits : 0); + ksz_rmw8(dev, dev->dev_ops->get_port_addr(port, offset), bits, + set ? bits : 0); } /** @@ -1955,16 +1954,19 @@ int ksz8_setup(struct dsa_switch *ds) ksz_cfg(dev, S_LINK_AGING_CTRL, SW_LINK_AUTO_AGING, true); /* Enable aggressive back off algorithm in half duplex mode. */ - regmap_update_bits(ksz_regmap_8(dev), REG_SW_CTRL_1, - SW_AGGR_BACKOFF, SW_AGGR_BACKOFF); + ret = ksz_rmw8(dev, REG_SW_CTRL_1, SW_AGGR_BACKOFF, SW_AGGR_BACKOFF); + if (ret) + return ret; /* * Make sure unicast VLAN boundary is set as default and * enable no excessive collision drop. */ - regmap_update_bits(ksz_regmap_8(dev), REG_SW_CTRL_2, - UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP, - UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP); + ret = ksz_rmw8(dev, REG_SW_CTRL_2, + UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP, + UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP); + if (ret) + return ret; ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_REPLACE_VID, false); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 7292bfe2f7ca..4cb14288ff0f 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1447,6 +1447,7 @@ static const struct regmap_range ksz8873_valid_regs[] = { regmap_reg_range(0x3f, 0x3f), /* advanced control registers */ + regmap_reg_range(0x43, 0x43), regmap_reg_range(0x60, 0x6f), regmap_reg_range(0x70, 0x75), regmap_reg_range(0x76, 0x78), From 53898ebabe843bfa7baea9dae152797d5d0563c9 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 6 Aug 2025 14:37:25 -0700 Subject: [PATCH 0092/1307] net: lapbether: ignore ops-locked netdevs Syzkaller managed to trigger lock dependency in xsk_notify via register_netdevice. As discussed in [0], using register_netdevice in the notifiers is problematic so skip adding lapbeth for ops-locked devices. xsk_notifier+0xa4/0x280 net/xdp/xsk.c:1645 notifier_call_chain+0xbc/0x410 kernel/notifier.c:85 call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:2230 call_netdevice_notifiers_extack net/core/dev.c:2268 [inline] call_netdevice_notifiers net/core/dev.c:2282 [inline] unregister_netdevice_many_notify+0xf9d/0x2700 net/core/dev.c:12077 unregister_netdevice_many net/core/dev.c:12140 [inline] unregister_netdevice_queue+0x305/0x3f0 net/core/dev.c:11984 register_netdevice+0x18f1/0x2270 net/core/dev.c:11149 lapbeth_new_device drivers/net/wan/lapbether.c:420 [inline] lapbeth_device_event+0x5b1/0xbe0 drivers/net/wan/lapbether.c:462 notifier_call_chain+0xbc/0x410 kernel/notifier.c:85 call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:2230 call_netdevice_notifiers_extack net/core/dev.c:2268 [inline] call_netdevice_notifiers net/core/dev.c:2282 [inline] __dev_notify_flags+0x12c/0x2e0 net/core/dev.c:9497 netif_change_flags+0x108/0x160 net/core/dev.c:9526 dev_change_flags+0xba/0x250 net/core/dev_api.c:68 devinet_ioctl+0x11d5/0x1f50 net/ipv4/devinet.c:1200 inet_ioctl+0x3a7/0x3f0 net/ipv4/af_inet.c:1001 0: https://lore.kernel.org/netdev/20250625140357.6203d0af@kernel.org/ Fixes: 4c975fd70002 ("net: hold instance lock during NETDEV_REGISTER/UP") Suggested-by: Jakub Kicinski Reported-by: syzbot+e67ea9c235b13b4f0020@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e67ea9c235b13b4f0020 Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250806213726.1383379-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/wan/lapbether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 995a7207bdf8..f357a7ac70ac 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -81,7 +81,7 @@ static struct lapbethdev *lapbeth_get_x25_dev(struct net_device *dev) static __inline__ int dev_is_ethdev(struct net_device *dev) { - return dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5); + return dev->type == ARPHRD_ETHER && !netdev_need_ops_lock(dev); } /* ------------------------------------------------------------------------ */ From c64237960819aee1766d03f446ae6de94b1e3f73 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 6 Aug 2025 14:37:26 -0700 Subject: [PATCH 0093/1307] hamradio: ignore ops-locked netdevs Syzkaller managed to trigger lock dependency in xsk_notify via register_netdevice. As discussed in [0], using register_netdevice in the notifiers is problematic so skip adding hamradio for ops-locked devices. xsk_notifier+0x89/0x230 net/xdp/xsk.c:1664 notifier_call_chain+0x1b6/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] unregister_netdevice_many_notify+0x14d7/0x1ff0 net/core/dev.c:12156 unregister_netdevice_many net/core/dev.c:12219 [inline] unregister_netdevice_queue+0x33c/0x380 net/core/dev.c:12063 register_netdevice+0x1689/0x1ae0 net/core/dev.c:11241 bpq_new_device drivers/net/hamradio/bpqether.c:481 [inline] bpq_device_event+0x491/0x600 drivers/net/hamradio/bpqether.c:523 notifier_call_chain+0x1b6/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] __dev_notify_flags+0x18d/0x2e0 net/core/dev.c:-1 netif_change_flags+0xe8/0x1a0 net/core/dev.c:9608 dev_change_flags+0x130/0x260 net/core/dev_api.c:68 devinet_ioctl+0xbb4/0x1b50 net/ipv4/devinet.c:1200 inet_ioctl+0x3c0/0x4c0 net/ipv4/af_inet.c:1001 0: https://lore.kernel.org/netdev/20250625140357.6203d0af@kernel.org/ Fixes: 4c975fd70002 ("net: hold instance lock during NETDEV_REGISTER/UP") Suggested-by: Jakub Kicinski Reported-by: syzbot+e6300f66a999a6612477@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e6300f66a999a6612477 Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250806213726.1383379-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/hamradio/bpqether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 0e0fe32d2da4..045c5177262e 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -138,7 +138,7 @@ static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev) static inline int dev_is_ethdev(struct net_device *dev) { - return dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5); + return dev->type == ARPHRD_ETHER && !netdev_need_ops_lock(dev); } /* ------------------------------------------------------------------------ */ From 33caa208dba6fa639e8a92fd0c8320b652e5550c Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 6 Aug 2025 13:21:51 -0700 Subject: [PATCH 0094/1307] hv_netvsc: Fix panic during namespace deletion with VF The existing code move the VF NIC to new namespace when NETDEV_REGISTER is received on netvsc NIC. During deletion of the namespace, default_device_exit_batch() >> default_device_exit_net() is called. When netvsc NIC is moved back and registered to the default namespace, it automatically brings VF NIC back to the default namespace. This will cause the default_device_exit_net() >> for_each_netdev_safe loop unable to detect the list end, and hit NULL ptr: [ 231.449420] mana 7870:00:00.0 enP30832s1: Moved VF to namespace with: eth0 [ 231.449656] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ 231.450246] #PF: supervisor read access in kernel mode [ 231.450579] #PF: error_code(0x0000) - not-present page [ 231.450916] PGD 17b8a8067 P4D 0 [ 231.451163] Oops: Oops: 0000 [#1] SMP NOPTI [ 231.451450] CPU: 82 UID: 0 PID: 1394 Comm: kworker/u768:1 Not tainted 6.16.0-rc4+ #3 VOLUNTARY [ 231.452042] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 11/21/2024 [ 231.452692] Workqueue: netns cleanup_net [ 231.452947] RIP: 0010:default_device_exit_batch+0x16c/0x3f0 [ 231.453326] Code: c0 0c f5 b3 e8 d5 db fe ff 48 85 c0 74 15 48 c7 c2 f8 fd ca b2 be 10 00 00 00 48 8d 7d c0 e8 7b 77 25 00 49 8b 86 28 01 00 00 <48> 8b 50 10 4c 8b 2a 4c 8d 62 f0 49 83 ed 10 4c 39 e0 0f 84 d6 00 [ 231.454294] RSP: 0018:ff75fc7c9bf9fd00 EFLAGS: 00010246 [ 231.454610] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 61c8864680b583eb [ 231.455094] RDX: ff1fa9f71462d800 RSI: ff75fc7c9bf9fd38 RDI: 0000000030766564 [ 231.455686] RBP: ff75fc7c9bf9fd78 R08: 0000000000000000 R09: 0000000000000000 [ 231.456126] R10: 0000000000000001 R11: 0000000000000004 R12: ff1fa9f70088e340 [ 231.456621] R13: ff1fa9f70088e340 R14: ffffffffb3f50c20 R15: ff1fa9f7103e6340 [ 231.457161] FS: 0000000000000000(0000) GS:ff1faa6783a08000(0000) knlGS:0000000000000000 [ 231.457707] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 231.458031] CR2: 0000000000000010 CR3: 0000000179ab2006 CR4: 0000000000b73ef0 [ 231.458434] Call Trace: [ 231.458600] [ 231.458777] ops_undo_list+0x100/0x220 [ 231.459015] cleanup_net+0x1b8/0x300 [ 231.459285] process_one_work+0x184/0x340 To fix it, move the ns change to a workqueue, and take rtnl_lock to avoid changing the netdev list when default_device_exit_net() is using it. Cc: stable@vger.kernel.org Fixes: 4c262801ea60 ("hv_netvsc: Fix VF namespace also in synthetic NIC NETDEV_REGISTER event") Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/1754511711-11188-1-git-send-email-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/hyperv_net.h | 3 +++ drivers/net/hyperv/netvsc_drv.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cb6f5482d203..7397c693f984 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1061,6 +1061,7 @@ struct net_device_context { struct net_device __rcu *vf_netdev; struct netvsc_vf_pcpu_stats __percpu *vf_stats; struct delayed_work vf_takeover; + struct delayed_work vfns_work; /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; @@ -1075,6 +1076,8 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +void netvsc_vfns_work(struct work_struct *w); + /* Azure hosts don't support non-TCP port numbers in hashing for fragmented * packets. We can use ethtool to change UDP hash level when necessary. */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f44753756358..39c892e46cb0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2522,6 +2522,7 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work); net_device_ctx->vf_stats = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); @@ -2666,6 +2667,8 @@ static void netvsc_remove(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); + nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev) { cancel_work_sync(&nvdev->subchan_work); @@ -2707,6 +2710,7 @@ static int netvsc_suspend(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev == NULL) { @@ -2800,6 +2804,27 @@ static void netvsc_event_set_vf_ns(struct net_device *ndev) } } +void netvsc_vfns_work(struct work_struct *w) +{ + struct net_device_context *ndev_ctx = + container_of(w, struct net_device_context, vfns_work.work); + struct net_device *ndev; + + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->vfns_work, 1); + return; + } + + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (!ndev) + goto out; + + netvsc_event_set_vf_ns(ndev); + +out: + rtnl_unlock(); +} + /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -2810,10 +2835,12 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net_device_context *ndev_ctx; int ret = 0; if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { - netvsc_event_set_vf_ns(event_dev); + ndev_ctx = netdev_priv(event_dev); + schedule_delayed_work(&ndev_ctx->vfns_work, 0); return NOTIFY_DONE; } From bb8aeaa3191b617c6faf8ae937252e059673b7ea Mon Sep 17 00:00:00 2001 From: Fanhua Li Date: Mon, 28 Jul 2025 19:50:27 +0800 Subject: [PATCH 0095/1307] drm/nouveau/nvif: Fix potential memory leak in nvif_vmm_ctor(). When the nvif_vmm_type is invalid, we will return error directly without freeing the args in nvif_vmm_ctor(), which leading a memory leak. Fix it by setting the ret -EINVAL and goto done. Reported-by: kernel test robot Closes: https://lore.kernel.org/all/202312040659.4pJpMafN-lkp@intel.com/ Fixes: 6b252cf42281 ("drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm") Signed-off-by: Fanhua Li Link: https://lore.kernel.org/r/20250728115027.50878-1-lifanhua5@huawei.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvif/vmm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c index 99296f03371a..07c1ebc2a941 100644 --- a/drivers/gpu/drm/nouveau/nvif/vmm.c +++ b/drivers/gpu/drm/nouveau/nvif/vmm.c @@ -219,7 +219,8 @@ nvif_vmm_ctor(struct nvif_mmu *mmu, const char *name, s32 oclass, case RAW: args->type = NVIF_VMM_V0_TYPE_RAW; break; default: WARN_ON(1); - return -EINVAL; + ret = -EINVAL; + goto done; } memcpy(args->data, argv, argc); From 01c2afe7358385a5381835293dfb6901f11b1691 Mon Sep 17 00:00:00 2001 From: Madhur Kumar Date: Fri, 8 Aug 2025 13:08:40 +0530 Subject: [PATCH 0096/1307] drm/nouveau: fix typos in comments Fixed three spelling mistakes in nouveau_exec.c comments: - alloctor -> allocator - exectued -> executed - depent -> depend No functional changes. Fixes: b88baab82871 ("drm/nouveau: implement new VM_BIND uAPI") Signed-off-by: Madhur Kumar Link: https://lore.kernel.org/r/20250808073840.376764-1-madhurkumar004@gmail.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_exec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 41b7c608c905..46294134f294 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -60,14 +60,14 @@ * virtual address in the GPU's VA space there is no guarantee that the actual * mappings are created in the GPU's MMU. If the given memory is swapped out * at the time the bind operation is executed the kernel will stash the mapping - * details into it's internal alloctor and create the actual MMU mappings once + * details into it's internal allocator and create the actual MMU mappings once * the memory is swapped back in. While this is transparent for userspace, it is * guaranteed that all the backing memory is swapped back in and all the memory * mappings, as requested by userspace previously, are actually mapped once the * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. * * A VM_BIND job can be executed either synchronously or asynchronously. If - * exectued asynchronously, userspace may provide a list of syncobjs this job + * executed asynchronously, userspace may provide a list of syncobjs this job * will wait for and/or a list of syncobj the kernel will signal once the * VM_BIND job finished execution. If executed synchronously the ioctl will * block until the bind job is finished. For synchronous jobs the kernel will @@ -82,7 +82,7 @@ * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have * an up to date view of the VA space. However, the actual mappings might still * be pending. Hence, EXEC jobs require to have the particular fences - of - * the corresponding VM_BIND jobs they depent on - attached to them. + * the corresponding VM_BIND jobs they depend on - attached to them. */ static int From 65f97cc81b0adc5f49cf6cff5d874be0058e3f41 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2025 13:24:28 -0400 Subject: [PATCH 0097/1307] cgroup/cpuset: Use static_branch_enable_cpuslocked() on cpusets_insane_config_key The following lockdep splat was observed. [ 812.359086] ============================================ [ 812.359089] WARNING: possible recursive locking detected [ 812.359097] -------------------------------------------- [ 812.359100] runtest.sh/30042 is trying to acquire lock: [ 812.359105] ffffffffa7f27420 (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0xe/0x20 [ 812.359131] [ 812.359131] but task is already holding lock: [ 812.359134] ffffffffa7f27420 (cpu_hotplug_lock){++++}-{0:0}, at: cpuset_write_resmask+0x98/0xa70 : [ 812.359267] Call Trace: [ 812.359272] [ 812.359367] cpus_read_lock+0x3c/0xe0 [ 812.359382] static_key_enable+0xe/0x20 [ 812.359389] check_insane_mems_config.part.0+0x11/0x30 [ 812.359398] cpuset_write_resmask+0x9f2/0xa70 [ 812.359411] cgroup_file_write+0x1c7/0x660 [ 812.359467] kernfs_fop_write_iter+0x358/0x530 [ 812.359479] vfs_write+0xabe/0x1250 [ 812.359529] ksys_write+0xf9/0x1d0 [ 812.359558] do_syscall_64+0x5f/0xe0 Since commit d74b27d63a8b ("cgroup/cpuset: Change cpuset_rwsem and hotplug lock order"), the ordering of cpu hotplug lock and cpuset_mutex had been reversed. That patch correctly used the cpuslocked version of the static branch API to enable cpusets_pre_enable_key and cpusets_enabled_key, but it didn't do the same for cpusets_insane_config_key. The cpusets_insane_config_key can be enabled in the check_insane_mems_config() which is called from update_nodemask() or cpuset_hotplug_update_tasks() with both cpu hotplug lock and cpuset_mutex held. Deadlock can happen with a pending hotplug event that tries to acquire the cpu hotplug write lock which will block further cpus_read_lock() attempt from check_insane_mems_config(). Fix that by switching to use static_branch_enable_cpuslocked(). Fixes: d74b27d63a8b ("cgroup/cpuset: Change cpuset_rwsem and hotplug lock order") Signed-off-by: Waiman Long Reviewed-by: Juri Lelli Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f74d04429a29..bf149246e001 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -280,7 +280,7 @@ static inline void check_insane_mems_config(nodemask_t *nodes) { if (!cpusets_insane_config() && movable_only_nodes(nodes)) { - static_branch_enable(&cpusets_insane_config_key); + static_branch_enable_cpuslocked(&cpusets_insane_config_key); pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n" "Cpuset allocations might fail even with a lot of memory available.\n", nodemask_pr_args(nodes)); From 150e298ae0ccbecff2357a72fbabd80f8849ea6e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2025 13:24:29 -0400 Subject: [PATCH 0098/1307] cgroup/cpuset: Fix a partition error with CPU hotplug It was found during testing that an invalid leaf partition with an empty effective exclusive CPU list can become a valid empty partition with no CPU afer an offline/online operation of an unrelated CPU. An empty partition root is allowed in the special case that it has no task in its cgroup and has distributed out all its CPUs to its child partitions. That is certainly not the case here. The problem is in the cpumask_subsets() test in the hotplug case (update with no new mask) of update_parent_effective_cpumask() as it also returns true if the effective exclusive CPU list is empty. Fix that by addding the cpumask_empty() test to root out this exception case. Also add the cpumask_empty() test in cpuset_hotplug_update_tasks() to avoid calling update_parent_effective_cpumask() for this special case. Fixes: 0c7f293efc87 ("cgroup/cpuset: Add cpuset.cpus.exclusive.effective for v2") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index bf149246e001..d993e058a663 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1843,7 +1843,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, if (is_partition_valid(cs)) adding = cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus); - } else if (is_partition_invalid(cs) && + } else if (is_partition_invalid(cs) && !cpumask_empty(xcpus) && cpumask_subset(xcpus, parent->effective_xcpus)) { struct cgroup_subsys_state *css; struct cpuset *child; @@ -3870,9 +3870,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) partcmd = partcmd_invalidate; /* * On the other hand, an invalid partition root may be transitioned - * back to a regular one. + * back to a regular one with a non-empty effective xcpus. */ - else if (is_partition_valid(parent) && is_partition_invalid(cs)) + else if (is_partition_valid(parent) && is_partition_invalid(cs) && + !cpumask_empty(cs->effective_xcpus)) partcmd = partcmd_update; if (partcmd >= 0) { From 87eba5bc5ab1d99e31c9d3b2c386187da94a5ab1 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2025 13:24:30 -0400 Subject: [PATCH 0099/1307] cgroup/cpuset: Remove the unnecessary css_get/put() in cpuset_partition_write() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The css_get/put() calls in cpuset_partition_write() are unnecessary as an active reference of the kernfs node will be taken which will prevent its removal and guarantee the existence of the css. Only the online check is needed. Signed-off-by: Waiman Long Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d993e058a663..27adb04df675 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3358,14 +3358,12 @@ static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf, else return -EINVAL; - css_get(&cs->css); cpus_read_lock(); mutex_lock(&cpuset_mutex); if (is_cpuset_online(cs)) retval = update_prstate(cs, val); mutex_unlock(&cpuset_mutex); cpus_read_unlock(); - css_put(&cs->css); return retval ?: nbytes; } From eea51c6e3f6675b795f6439eaa960eb2948d6905 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 6 Aug 2025 17:33:50 -0700 Subject: [PATCH 0100/1307] cgroup: avoid null de-ref in css_rstat_exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit css_rstat_exit() may be called asynchronously in scenarios where preceding calls to css_rstat_init() have not completed. One such example is this sequence below: css_create(...) { ... init_and_link_css(css, ...); err = percpu_ref_init(...); if (err) goto err_free_css; err = cgroup_idr_alloc(...); if (err) goto err_free_css; err = css_rstat_init(css, ...); if (err) goto err_free_css; ... err_free_css: INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); return ERR_PTR(err); } If any of the three goto jumps are taken, async cleanup will begin and css_rstat_exit() will be invoked on an uninitialized css->rstat_cpu. Avoid accessing the unitialized field by returning early in css_rstat_exit() if this is the case. Signed-off-by: JP Kobryn Suggested-by: Michal Koutný Fixes: 5da3bfa029d68 ("cgroup: use separate rstat trees for each subsystem") Cc: stable@vger.kernel.org # v6.16 Reported-by: syzbot+8d052e8b99e40bc625ed@syzkaller.appspotmail.com Acked-by: Shakeel Butt Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 981e2f77ad4e..a198e40c799b 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -479,6 +479,9 @@ void css_rstat_exit(struct cgroup_subsys_state *css) if (!css_uses_rstat(css)) return; + if (!css->rstat_cpu) + return; + css_rstat_flush(css); /* sanity check */ From eb5ca9094a18fb98777bf4814ea84c93bf7c271d Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sun, 27 Jul 2025 12:59:06 +0200 Subject: [PATCH 0101/1307] mm/vmscan: fix inverted polarity in lru_gen_seq_show() Commit a7694ff11aa9 ("vmscan: don't bother with debugfs_real_fops()") started using debugfs_get_aux_num() to distinguish between the RW "lru_gen" and the RO "lru_gen_full" file [1]. Willy reported the inverted polarity [2] and Al fixed it up in [3]. However, the patch in [1] was applied. Hence, fix this up accordingly. Cc: Alexander Viro Cc: Matthew Wilcox Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/all/20250704040720.GP1880847@ZenIV/ [1] Link: https://lore.kernel.org/all/aGZu3Z730FQtqxsE@casper.infradead.org/ [2] Link: https://lore.kernel.org/all/20250704040720.GP1880847@ZenIV/ [3] Fixes: a7694ff11aa9 ("vmscan: don't bother with debugfs_real_fops()") Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250727105937.7480-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- mm/vmscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 7de11524a936..a48aec8bfd92 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5772,9 +5772,9 @@ static int __init init_lru_gen(void) if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) pr_err("lru_gen: failed to create sysfs group\n"); - debugfs_create_file_aux_num("lru_gen", 0644, NULL, NULL, 1, + debugfs_create_file_aux_num("lru_gen", 0644, NULL, NULL, false, &lru_gen_rw_fops); - debugfs_create_file_aux_num("lru_gen_full", 0444, NULL, NULL, 0, + debugfs_create_file_aux_num("lru_gen_full", 0444, NULL, NULL, true, &lru_gen_ro_fops); return 0; From 0af1561b2d60bab2a2b00720a5c7b292ecc549ec Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 8 Aug 2025 12:20:17 -0300 Subject: [PATCH 0102/1307] smb: client: fix race with concurrent opens in unlink(2) According to some logs reported by customers, CIFS client might end up reporting unlinked files as existing in stat(2) due to concurrent opens racing with unlink(2). Besides sending the removal request to the server, the unlink process could involve closing any deferred close as well as marking all existing open handles as deleted to prevent them from deferring closes, which increases the race window for potential concurrent opens. Fix this by unhashing the dentry in cifs_unlink() to prevent any subsequent opens. Any open attempts, while we're still unlinking, will block on parent's i_rwsem. Reported-by: Jay Shin Signed-off-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Cc: Al Viro Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/inode.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 75be4b46bc6f..cf9060f0fc08 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1943,15 +1943,24 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct tcon_link *tlink; struct cifs_tcon *tcon; + __u32 dosattr = 0, origattr = 0; struct TCP_Server_Info *server; struct iattr *attrs = NULL; - __u32 dosattr = 0, origattr = 0; + bool rehash = false; cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry); if (unlikely(cifs_forced_shutdown(cifs_sb))) return -EIO; + /* Unhash dentry in advance to prevent any concurrent opens */ + spin_lock(&dentry->d_lock); + if (!d_unhashed(dentry)) { + __d_drop(dentry); + rehash = true; + } + spin_unlock(&dentry->d_lock); + tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -2003,7 +2012,8 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) cifs_drop_nlink(inode); } } else if (rc == -ENOENT) { - d_drop(dentry); + if (simple_positive(dentry)) + d_delete(dentry); } else if (rc == -EBUSY) { if (server->ops->rename_pending_delete) { rc = server->ops->rename_pending_delete(full_path, @@ -2056,6 +2066,8 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) kfree(attrs); free_xid(xid); cifs_put_tlink(tlink); + if (rehash) + d_rehash(dentry); return rc; } From d84291fc7453df7881a970716f8256273aca5747 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 8 Aug 2025 11:43:29 -0300 Subject: [PATCH 0103/1307] smb: client: fix race with concurrent opens in rename(2) Besides sending the rename request to the server, the rename process also involves closing any deferred close, waiting for outstanding I/O to complete as well as marking all existing open handles as deleted to prevent them from deferring closes, which increases the race window for potential concurrent opens on the target file. Fix this by unhashing the dentry in advance to prevent any concurrent opens on the target. Signed-off-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Cc: Al Viro Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/inode.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index cf9060f0fc08..fe453a4b3dc8 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2474,6 +2474,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *tcon; + bool rehash = false; unsigned int xid; int rc, tmprc; int retry_count = 0; @@ -2489,6 +2490,17 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, if (unlikely(cifs_forced_shutdown(cifs_sb))) return -EIO; + /* + * Prevent any concurrent opens on the target by unhashing the dentry. + * VFS already unhashes the target when renaming directories. + */ + if (d_is_positive(target_dentry) && !d_is_dir(target_dentry)) { + if (!d_unhashed(target_dentry)) { + d_drop(target_dentry); + rehash = true; + } + } + tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -2530,6 +2542,8 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, } } + if (!rc) + rehash = false; /* * No-replace is the natural behavior for CIFS, so skip unlink hacks. */ @@ -2588,12 +2602,16 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, goto cifs_rename_exit; rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); + if (!rc) + rehash = false; } /* force revalidate to go get info when needed */ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; cifs_rename_exit: + if (rehash) + d_rehash(target_dentry); kfree(info_buf_source); free_dentry_path(page2); free_dentry_path(page1); From 0e270f32975fd21874185ba53653630dd40bf560 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 7 Aug 2025 10:03:18 +0800 Subject: [PATCH 0104/1307] ASoC: fsl_sai: replace regmap_write with regmap_update_bits Use the regmap_write() for software reset in fsl_sai_config_disable would cause the FSL_SAI_CSR_BCE bit to be cleared. Refer to commit 197c53c8ecb34 ("ASoC: fsl_sai: Don't disable bitclock for i.MX8MP") FSL_SAI_CSR_BCE should not be cleared. So need to use regmap_update_bits() instead of regmap_write() for these bit operations. Fixes: dc78f7e59169d ("ASoC: fsl_sai: Force a software reset when starting in consumer mode") Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20250807020318.2143219-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index c313b654236c..d0367b21f775 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -809,9 +809,9 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) * are running concurrently. */ /* Software Reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); /* Clear SR bit to finish the reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); + regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR, 0); } static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd, @@ -930,11 +930,11 @@ static int fsl_sai_dai_probe(struct snd_soc_dai *cpu_dai) unsigned int ofs = sai->soc_data->reg_offset; /* Software Reset for both Tx and Rx */ - regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR); - regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); /* Clear SR bit to finish the reset */ - regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), 0); - regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), 0); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR, 0); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR, 0); regmap_update_bits(sai->regmap, FSL_SAI_TCR1(ofs), FSL_SAI_CR1_RFW_MASK(sai->soc_data->fifo_depth), @@ -1824,11 +1824,11 @@ static int fsl_sai_runtime_resume(struct device *dev) regcache_cache_only(sai->regmap, false); regcache_mark_dirty(sai->regmap); - regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR); - regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR, FSL_SAI_CSR_SR); usleep_range(1000, 2000); - regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), 0); - regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), 0); + regmap_update_bits(sai->regmap, FSL_SAI_TCSR(ofs), FSL_SAI_CSR_SR, 0); + regmap_update_bits(sai->regmap, FSL_SAI_RCSR(ofs), FSL_SAI_CSR_SR, 0); ret = regcache_sync(sai->regmap); if (ret) From 43e0da37d5cfb23eec6aeee9422f84d86621ce2b Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Wed, 6 Aug 2025 15:00:30 +0100 Subject: [PATCH 0105/1307] ASoC: codecs: tx-macro: correct tx_macro_component_drv name We already have a component driver named "RX-MACRO", which is lpass-rx-macro.c. The tx macro component driver's name should be "TX-MACRO" accordingly. Fix it. Cc: Srinivas Kandagatla Signed-off-by: Alexey Klimov Reviewed-by: Neil Armstrong Link: https://patch.msgid.link/20250806140030.691477-1-alexey.klimov@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-tx-macro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index 40d79bee4584..1da34cb3505f 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -2229,7 +2229,7 @@ static int tx_macro_register_mclk_output(struct tx_macro *tx) } static const struct snd_soc_component_driver tx_macro_component_drv = { - .name = "RX-MACRO", + .name = "TX-MACRO", .probe = tx_macro_component_probe, .controls = tx_macro_snd_controls, .num_controls = ARRAY_SIZE(tx_macro_snd_controls), From 7cdadac0d2b3614d04651be7104a89a1998efec0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Aug 2025 11:53:24 +0100 Subject: [PATCH 0106/1307] ASoC: codec: sma1307: replace spelling mistake with new error message There is a spelling mistake in a failure message, replace the message with something a little more meaningful. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20250808105324.829883-1-colin.i.king@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/sma1307.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/sma1307.c b/sound/soc/codecs/sma1307.c index b3d401ada176..6a601e7134ea 100644 --- a/sound/soc/codecs/sma1307.c +++ b/sound/soc/codecs/sma1307.c @@ -1749,7 +1749,7 @@ static void sma1307_setting_loaded(struct sma1307_priv *sma1307, const char *fil sma1307->set.header_size * sizeof(int)); if ((sma1307->set.checksum >> 8) != SMA1307_SETTING_CHECKSUM) { - dev_err(sma1307->dev, "%s: failed by dismatch \"%s\"\n", + dev_err(sma1307->dev, "%s: checksum failed \"%s\"\n", __func__, setting_file); sma1307->set.status = false; return; From f13ab498726bb6c636d6c5cd8c7df911444316dc Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 6 Aug 2025 23:34:52 +0000 Subject: [PATCH 0107/1307] ASoC: generic: tidyup standardized ASoC menu for generic commit acc84d15e45393fb ("ASoC: generic: Standardize ASoC menu") standardized ASoC generic menu. Then, it moved generic menu position under SoC group. It should be kept generic position. Tidyup it. Suggested-by: Geert Uytterhoeven Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87v7n0c9d0.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index bf362bfca456..ce74818bd715 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -111,7 +111,6 @@ source "sound/soc/bcm/Kconfig" source "sound/soc/cirrus/Kconfig" source "sound/soc/dwc/Kconfig" source "sound/soc/fsl/Kconfig" -source "sound/soc/generic/Kconfig" source "sound/soc/google/Kconfig" source "sound/soc/hisilicon/Kconfig" source "sound/soc/jz4740/Kconfig" @@ -149,5 +148,8 @@ source "sound/soc/codecs/Kconfig" source "sound/soc/sdw_utils/Kconfig" +# generic frame-work +source "sound/soc/generic/Kconfig" + endif # SND_SOC From 633e391d45bda3fc848d26bee6bbe57ef2935713 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Fri, 8 Aug 2025 13:57:06 +0800 Subject: [PATCH 0108/1307] ASoC: rt721: fix FU33 Boost Volume control not working This patch fixed FU33 Boost Volume control not working. Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20250808055706.1110766-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt721-sdca.c | 2 ++ sound/soc/codecs/rt721-sdca.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index f6f7c2ffde1c..a4bd29d7220b 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -278,6 +278,8 @@ static void rt721_sdca_jack_preset(struct rt721_sdca_priv *rt721) RT721_ENT_FLOAT_CTL1, 0x4040); rt_sdca_index_write(rt721->mbq_regmap, RT721_HDA_SDCA_FLOAT, RT721_ENT_FLOAT_CTL4, 0x1201); + rt_sdca_index_write(rt721->mbq_regmap, RT721_BOOST_CTRL, + RT721_BST_4CH_TOP_GATING_CTRL1, 0x002a); regmap_write(rt721->regmap, 0x2f58, 0x07); } diff --git a/sound/soc/codecs/rt721-sdca.h b/sound/soc/codecs/rt721-sdca.h index 0a82c107b19a..71fac9cd8739 100644 --- a/sound/soc/codecs/rt721-sdca.h +++ b/sound/soc/codecs/rt721-sdca.h @@ -56,6 +56,7 @@ struct rt721_sdca_dmic_kctrl_priv { #define RT721_CBJ_CTRL 0x0a #define RT721_CAP_PORT_CTRL 0x0c #define RT721_CLASD_AMP_CTRL 0x0d +#define RT721_BOOST_CTRL 0x0f #define RT721_VENDOR_REG 0x20 #define RT721_RC_CALIB_CTRL 0x40 #define RT721_VENDOR_EQ_L 0x53 @@ -93,6 +94,9 @@ struct rt721_sdca_dmic_kctrl_priv { /* Index (NID:0dh) */ #define RT721_CLASD_AMP_2CH_CAL 0x14 +/* Index (NID:0fh) */ +#define RT721_BST_4CH_TOP_GATING_CTRL1 0x05 + /* Index (NID:20h) */ #define RT721_JD_PRODUCT_NUM 0x00 #define RT721_ANALOG_BIAS_CTL3 0x04 From f48d7a1b0bf11d16d8c9f77a5b9c80a82272f625 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Thu, 7 Aug 2025 17:24:32 +0800 Subject: [PATCH 0109/1307] ASoC: rt1320: fix random cycle mute issue This patch fixed the random cycle mute issue that occurs during long-time playback. Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20250807092432.997989-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1320-sdw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt1320-sdw.c b/sound/soc/codecs/rt1320-sdw.c index b13d7a99bf63..dcddc28e8856 100644 --- a/sound/soc/codecs/rt1320-sdw.c +++ b/sound/soc/codecs/rt1320-sdw.c @@ -109,6 +109,7 @@ static const struct reg_sequence rt1320_blind_write[] = { { 0x0000d540, 0x01 }, { 0xd172, 0x2a }, { 0xc5d6, 0x01 }, + { 0xd478, 0xff }, }; static const struct reg_sequence rt1320_vc_blind_write[] = { @@ -159,7 +160,7 @@ static const struct reg_sequence rt1320_vc_blind_write[] = { { 0xd471, 0x3a }, { 0xd474, 0x11 }, { 0xd475, 0x32 }, - { 0xd478, 0x64 }, + { 0xd478, 0xff }, { 0xd479, 0x20 }, { 0xd47a, 0x10 }, { 0xd47c, 0xff }, From b11f2a9745401d9ccc51c91b5482044d2ea936e8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Aug 2025 11:49:43 +0100 Subject: [PATCH 0110/1307] ASoC: tas2781: Fix spelling mistake "dismatch" -> "mismatch" There is a spelling mistake (or neologism of dis and match) in a dev_err message. Fix it. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20250808104943.829668-1-colin.i.king@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 9f4d965a1335..8e7e45c046b8 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -1480,7 +1480,7 @@ static ssize_t acoustic_ctl_write(struct file *file, return PTR_ERR(src); if (src[0] > max_pkg_len && src[0] != count) { - dev_err(priv->dev, "pkg(%u), max(%u), count(%u) dismatch.\n", + dev_err(priv->dev, "pkg(%u), max(%u), count(%u) mismatch.\n", src[0], max_pkg_len, (unsigned int)count); ret = 0; goto exit; From c6993c4cb91803fceb82d6b5e0ec5e0aec2d0ad6 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 4 Aug 2025 16:20:31 +0800 Subject: [PATCH 0111/1307] erofs: Fallback to normal access if DAX is not supported on extra device If using multiple devices, we should check if the extra device support DAX instead of checking the primary device when deciding if to use DAX to access a file. If an extra device does not support DAX we should fallback to normal access otherwise the data on that device will be inaccessible. Signed-off-by: Yuezhang Mo Reviewed-by: Friendy Su Reviewed-by: Jacky Cao Reviewed-by: Daniel Palmer Reviewed-by: Gao Xiang Reviewed-by: Hongbo Li Link: https://lore.kernel.org/r/20250804082030.3667257-2-Yuezhang.Mo@sony.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index e1020aa60771..8c7a5985b4ee 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -174,6 +174,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, if (!erofs_is_fileio_mode(sbi)) { dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), &dif->dax_part_off, NULL, NULL); + if (!dif->dax_dev && test_opt(&sbi->opt, DAX_ALWAYS)) { + erofs_info(sb, "DAX unsupported by %s. Turning off DAX.", + dif->path); + clear_opt(&sbi->opt, DAX_ALWAYS); + } } else if (!S_ISREG(file_inode(file)->i_mode)) { fput(file); return -EINVAL; @@ -210,8 +215,13 @@ static int erofs_scan_devices(struct super_block *sb, ondisk_extradevs, sbi->devs->extra_devices); return -EINVAL; } - if (!ondisk_extradevs) + if (!ondisk_extradevs) { + if (test_opt(&sbi->opt, DAX_ALWAYS) && !sbi->dif0.dax_dev) { + erofs_info(sb, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } return 0; + } if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb)) sbi->devs->flatdev = true; @@ -338,7 +348,6 @@ static int erofs_read_superblock(struct super_block *sb) if (ret < 0) goto out; - /* handle multiple devices */ ret = erofs_scan_devices(sb, dsb); if (erofs_sb_has_48bit(sbi)) @@ -671,14 +680,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return invalfc(fc, "cannot use fsoffset in fscache mode"); } - if (test_opt(&sbi->opt, DAX_ALWAYS)) { - if (!sbi->dif0.dax_dev) { - errorfc(fc, "DAX unsupported by block device. Turning off DAX."); - clear_opt(&sbi->opt, DAX_ALWAYS); - } else if (sbi->blkszbits != PAGE_SHIFT) { - errorfc(fc, "unsupported blocksize for DAX"); - clear_opt(&sbi->opt, DAX_ALWAYS); - } + if (test_opt(&sbi->opt, DAX_ALWAYS) && sbi->blkszbits != PAGE_SHIFT) { + erofs_info(sb, "unsupported blocksize for DAX"); + clear_opt(&sbi->opt, DAX_ALWAYS); } sb->s_time_gran = 1; From 74da24f0ac9b8aabfb8d7feeba6c32ddff3065e0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Jul 2025 14:44:49 +0200 Subject: [PATCH 0112/1307] erofs: Do not select tristate symbols from bool symbols The EROFS filesystem has many configurable options, controlled through boolean Kconfig symbols. When enabled, these options may need to enable additional library functionality elsewhere. Currently this is done by selecting the symbol for the additional functionality. However, if EROFS_FS itself is modular, and the target symbol is a tristate symbol, the additional functionality is always forced built-in. Selecting tristate symbols from a tristate symbol does keep modular transitivity. Hence fix this by moving selects of tristate symbols to the main EROFS_FS symbol. Signed-off-by: Geert Uytterhoeven Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/da1b899e511145dd43fd2d398f64b2e03c6a39e7.1753879351.git.geert+renesas@glider.be Signed-off-by: Gao Xiang --- fs/erofs/Kconfig | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 7b26efc271ee..d81f3318417d 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,8 +3,18 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select CACHEFILES if EROFS_FS_ONDEMAND select CRC32 + select CRYPTO if EROFS_FS_ZIP_ACCEL + select CRYPTO_DEFLATE if EROFS_FS_ZIP_ACCEL select FS_IOMAP + select LZ4_DECOMPRESS if EROFS_FS_ZIP + select NETFS_SUPPORT if EROFS_FS_ONDEMAND + select XXHASH if EROFS_FS_XATTR + select XZ_DEC if EROFS_FS_ZIP_LZMA + select XZ_DEC_MICROLZMA if EROFS_FS_ZIP_LZMA + select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE + select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD help EROFS (Enhanced Read-Only File System) is a lightweight read-only file system with modern designs (e.g. no buffer heads, inline @@ -38,7 +48,6 @@ config EROFS_FS_DEBUG config EROFS_FS_XATTR bool "EROFS extended attributes" depends on EROFS_FS - select XXHASH default y help Extended attributes are name:value pairs associated with inodes by @@ -94,7 +103,6 @@ config EROFS_FS_BACKED_BY_FILE config EROFS_FS_ZIP bool "EROFS Data Compression Support" depends on EROFS_FS - select LZ4_DECOMPRESS default y help Enable transparent compression support for EROFS file systems. @@ -104,8 +112,6 @@ config EROFS_FS_ZIP config EROFS_FS_ZIP_LZMA bool "EROFS LZMA compressed data support" depends on EROFS_FS_ZIP - select XZ_DEC - select XZ_DEC_MICROLZMA help Saying Y here includes support for reading EROFS file systems containing LZMA compressed data, specifically called microLZMA. It @@ -117,7 +123,6 @@ config EROFS_FS_ZIP_LZMA config EROFS_FS_ZIP_DEFLATE bool "EROFS DEFLATE compressed data support" depends on EROFS_FS_ZIP - select ZLIB_INFLATE help Saying Y here includes support for reading EROFS file systems containing DEFLATE compressed data. It gives better compression @@ -132,7 +137,6 @@ config EROFS_FS_ZIP_DEFLATE config EROFS_FS_ZIP_ZSTD bool "EROFS Zstandard compressed data support" depends on EROFS_FS_ZIP - select ZSTD_DECOMPRESS help Saying Y here includes support for reading EROFS file systems containing Zstandard compressed data. It gives better compression @@ -147,8 +151,6 @@ config EROFS_FS_ZIP_ZSTD config EROFS_FS_ZIP_ACCEL bool "EROFS hardware decompression support" depends on EROFS_FS_ZIP - select CRYPTO - select CRYPTO_DEFLATE help Saying Y here includes hardware accelerator support for reading EROFS file systems containing compressed data. It gives better @@ -163,9 +165,7 @@ config EROFS_FS_ZIP_ACCEL config EROFS_FS_ONDEMAND bool "EROFS fscache-based on-demand read support (deprecated)" depends on EROFS_FS - select NETFS_SUPPORT select FSCACHE - select CACHEFILES select CACHEFILES_ONDEMAND help This permits EROFS to use fscache-backed data blobs with on-demand From c99fab6e80b76422741d34aafc2f930a482afbdd Mon Sep 17 00:00:00 2001 From: Junli Liu Date: Tue, 5 Aug 2025 09:19:58 +0800 Subject: [PATCH 0113/1307] erofs: fix atomic context detection when !CONFIG_DEBUG_LOCK_ALLOC Since EROFS handles decompression in non-atomic contexts due to uncontrollable decompression latencies and vmap() usage, it tries to detect atomic contexts and only kicks off a kworker on demand in order to reduce unnecessary scheduling overhead. However, the current approach is insufficient and can lead to sleeping function calls in invalid contexts, causing kernel warnings and potential system instability. See the stacktrace [1] and previous discussion [2]. The current implementation only checks rcu_read_lock_any_held(), which behaves inconsistently across different kernel configurations: - When CONFIG_DEBUG_LOCK_ALLOC is enabled: correctly detects RCU critical sections by checking rcu_lock_map - When CONFIG_DEBUG_LOCK_ALLOC is disabled: compiles to "!preemptible()", which only checks preempt_count and misses RCU critical sections This patch introduces z_erofs_in_atomic() to provide comprehensive atomic context detection: 1. Check RCU preemption depth when CONFIG_PREEMPTION is enabled, as RCU critical sections may not affect preempt_count but still require atomic handling 2. Always use async processing when CONFIG_PREEMPT_COUNT is disabled, as preemption state cannot be reliably determined 3. Fall back to standard preemptible() check for remaining cases The function replaces the previous complex condition check and ensures that z_erofs always uses (kthread_)work in atomic contexts to minimize scheduling overhead and prevent sleeping in invalid contexts. [1] Problem stacktrace [ 61.266692] BUG: sleeping function called from invalid context at kernel/locking/rtmutex_api.c:510 [ 61.266702] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 107, name: irq/54-ufshcd [ 61.266704] preempt_count: 0, expected: 0 [ 61.266705] RCU nest depth: 2, expected: 0 [ 61.266710] CPU: 0 UID: 0 PID: 107 Comm: irq/54-ufshcd Tainted: G W O 6.12.17 #1 [ 61.266714] Tainted: [W]=WARN, [O]=OOT_MODULE [ 61.266715] Hardware name: schumacher (DT) [ 61.266717] Call trace: [ 61.266718] dump_backtrace+0x9c/0x100 [ 61.266727] show_stack+0x20/0x38 [ 61.266728] dump_stack_lvl+0x78/0x90 [ 61.266734] dump_stack+0x18/0x28 [ 61.266736] __might_resched+0x11c/0x180 [ 61.266743] __might_sleep+0x64/0xc8 [ 61.266745] mutex_lock+0x2c/0xc0 [ 61.266748] z_erofs_decompress_queue+0xe8/0x978 [ 61.266753] z_erofs_decompress_kickoff+0xa8/0x190 [ 61.266756] z_erofs_endio+0x168/0x288 [ 61.266758] bio_endio+0x160/0x218 [ 61.266762] blk_update_request+0x244/0x458 [ 61.266766] scsi_end_request+0x38/0x278 [ 61.266770] scsi_io_completion+0x4c/0x600 [ 61.266772] scsi_finish_command+0xc8/0xe8 [ 61.266775] scsi_complete+0x88/0x148 [ 61.266777] blk_mq_complete_request+0x3c/0x58 [ 61.266780] scsi_done_internal+0xcc/0x158 [ 61.266782] scsi_done+0x1c/0x30 [ 61.266783] ufshcd_compl_one_cqe+0x12c/0x438 [ 61.266786] __ufshcd_transfer_req_compl+0x2c/0x78 [ 61.266788] ufshcd_poll+0xf4/0x210 [ 61.266789] ufshcd_transfer_req_compl+0x50/0x88 [ 61.266791] ufshcd_intr+0x21c/0x7c8 [ 61.266792] irq_forced_thread_fn+0x44/0xd8 [ 61.266796] irq_thread+0x1a4/0x358 [ 61.266799] kthread+0x12c/0x138 [ 61.266802] ret_from_fork+0x10/0x20 [2] https://lore.kernel.org/r/58b661d0-0ebb-4b45-a10d-c5927fb791cd@paulmck-laptop Signed-off-by: Junli Liu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250805011957.911186-1-liujunli@lixiang.com [ Gao Xiang: Use the original trace in v1. ] Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 792f20888a8f..2d73297003d2 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1432,6 +1432,16 @@ static void z_erofs_decompressqueue_kthread_work(struct kthread_work *work) } #endif +/* Use (kthread_)work in atomic contexts to minimize scheduling overhead */ +static inline bool z_erofs_in_atomic(void) +{ + if (IS_ENABLED(CONFIG_PREEMPTION) && rcu_preempt_depth()) + return true; + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return true; + return !preemptible(); +} + static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, int bios) { @@ -1446,8 +1456,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, if (atomic_add_return(bios, &io->pending_bios)) return; - /* Use (kthread_)work and sync decompression for atomic contexts only */ - if (!in_task() || irqs_disabled() || rcu_read_lock_any_held()) { + if (z_erofs_in_atomic()) { #ifdef CONFIG_EROFS_FS_PCPU_KTHREAD struct kthread_worker *worker; From 0b96d9bed324a1c1b7d02bfb9596351ef178428d Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 7 Aug 2025 16:20:19 +0800 Subject: [PATCH 0114/1307] erofs: fix block count report when 48-bit layout is on Fix incorrect shift order when combining the 48-bit block count. Fixes: 2e1473d5195f ("erofs: implement 48-bit block addressing for unencoded inodes") Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250807082019.3093539-1-hsiangkao@linux.alibaba.com --- fs/erofs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 8c7a5985b4ee..1b529ace4db0 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -323,8 +323,8 @@ static int erofs_read_superblock(struct super_block *sb) sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); - sbi->dif0.blocks = (sbi->dif0.blocks << 32) | - le16_to_cpu(dsb->rb.blocks_hi); + sbi->dif0.blocks = sbi->dif0.blocks | + ((u64)le16_to_cpu(dsb->rb.blocks_hi) << 32); } else { sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); } From 25daf9af0ac1bf12490b723b5efaf8dcc85980bc Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 30 Jul 2025 15:51:51 -0500 Subject: [PATCH 0115/1307] soc: qcom: mdt_loader: Deal with zero e_shentsize Firmware that doesn't provide section headers leave both e_shentsize and e_shnum 0, which obvious isn't compatible with the newly introduced stricter checks. Make the section-related checks conditional on either of these values being non-zero. Fixes: 9f9967fed9d0 ("soc: qcom: mdt_loader: Ensure we don't read past the ELF header") Reported-by: Val Packett Closes: https://lore.kernel.org/all/ece307c3-7d65-440f-babd-88cf9705b908@packett.cool/ Reported-by: Neil Armstrong Closes: https://lore.kernel.org/all/aec9cd03-6fc2-4dc8-b937-8b7cf7bf4128@linaro.org/ Signed-off-by: Bjorn Andersson Fixes: 9f35ab0e53cc ("soc: qcom: mdt_loader: Fix error return values in mdt_header_valid()") Tested-by: Neil Armstrong # on SM8650-QRD Reviewed-by: Dmitry Baryshkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250730-mdt-loader-shentsize-zero-v1-1-04f43186229c@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/mdt_loader.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c index 0ca268bdf1f8..5710ac0c07a8 100644 --- a/drivers/soc/qcom/mdt_loader.c +++ b/drivers/soc/qcom/mdt_loader.c @@ -39,12 +39,14 @@ static bool mdt_header_valid(const struct firmware *fw) if (phend > fw->size) return false; - if (ehdr->e_shentsize != sizeof(struct elf32_shdr)) - return false; + if (ehdr->e_shentsize || ehdr->e_shnum) { + if (ehdr->e_shentsize != sizeof(struct elf32_shdr)) + return false; - shend = size_add(size_mul(sizeof(struct elf32_shdr), ehdr->e_shnum), ehdr->e_shoff); - if (shend > fw->size) - return false; + shend = size_add(size_mul(sizeof(struct elf32_shdr), ehdr->e_shnum), ehdr->e_shoff); + if (shend > fw->size) + return false; + } return true; } From 61399e0c5410567ef60cb1cda34cca42903842e3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Aug 2025 19:03:22 +0200 Subject: [PATCH 0116/1307] rcu: Fix racy re-initialization of irq_work causing hangs RCU re-initializes the deferred QS irq work everytime before attempting to queue it. However there are situations where the irq work is attempted to be queued even though it is already queued. In that case re-initializing messes-up with the irq work queue that is about to be handled. The chances for that to happen are higher when the architecture doesn't support self-IPIs and irq work are then all lazy, such as with the following sequence: 1) rcu_read_unlock() is called when IRQs are disabled and there is a grace period involving blocked tasks on the node. The irq work is then initialized and queued. 2) The related tasks are unblocked and the CPU quiescent state is reported. rdp->defer_qs_iw_pending is reset to DEFER_QS_IDLE, allowing the irq work to be requeued in the future (note the previous one hasn't fired yet). 3) A new grace period starts and the node has blocked tasks. 4) rcu_read_unlock() is called when IRQs are disabled again. The irq work is re-initialized (but it's queued! and its node is cleared) and requeued. Which means it's requeued to itself. 5) The irq work finally fires with the tick. But since it was requeued to itself, it loops and hangs. Fix this with initializing the irq work only once before the CPU boots. Fixes: b41642c87716 ("rcu: Fix rcu_read_unlock() deadloop due to IRQ work") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508071303.c1134cce-lkp@intel.com Signed-off-by: Frederic Weisbecker Reviewed-by: Joel Fernandes Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/tree.c | 2 ++ kernel/rcu/tree.h | 1 + kernel/rcu/tree_plugin.h | 8 ++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 174ee243b349..8eff357b0436 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4262,6 +4262,8 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + + rcu_preempt_deferred_qs_init(rdp); rcu_spawn_rnp_kthreads(rnp); rcu_spawn_cpu_nocb_kthread(cpu); ASSERT_EXCLUSIVE_WRITER(rcu_state.n_online_cpus); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index de6ca13a7b5f..b8bbe7960cda 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -488,6 +488,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_flavor_sched_clock_irq(int user); static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck); +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); static bool rcu_is_callbacks_kthread(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fc14adf15cbb..4cd170b2d655 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -763,8 +763,6 @@ static void rcu_read_unlock_special(struct task_struct *t) cpu_online(rdp->cpu)) { // Get scheduler to re-evaluate and call hooks. // If !IRQ_WORK, FQS scan will eventually IPI. - rdp->defer_qs_iw = - IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler); rdp->defer_qs_iw_pending = DEFER_QS_PENDING; irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu); } @@ -904,6 +902,10 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) } } +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp) +{ + rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler); +} #else /* #ifdef CONFIG_PREEMPT_RCU */ /* @@ -1103,6 +1105,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks)); } +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp) { } + #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* From 22571172257a55c443f1a9306e963da4c6187e83 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 18:03:53 +0100 Subject: [PATCH 0117/1307] dt-bindings: dma: qcom: bam-dma: Add missing required properties num-channels and qcom,num-ees are required when there are no clocks specified in the device tree, because we have no reliable way to read them from the hardware registers if we cannot ensure the BAM hardware is up when the device is being probed. This has often been forgotten when adding new SoC device trees, so make this clear by describing this requirement in the schema. Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20250212-bam-dma-fixes-v1-7-f560889e65d8@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml index f2f87f0f545b..6493a6968bb4 100644 --- a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml @@ -92,8 +92,12 @@ required: anyOf: - required: - qcom,powered-remotely + - num-channels + - qcom,num-ees - required: - qcom,controlled-remotely + - num-channels + - qcom,num-ees - required: - clocks - clock-names From 5068b5254812433e841a40886e695633148d362d Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 18:03:54 +0100 Subject: [PATCH 0118/1307] dmaengine: qcom: bam_dma: Fix DT error handling for num-channels/ees When we don't have a clock specified in the device tree, we have no way to ensure the BAM is on. This is often the case for remotely-controlled or remotely-powered BAM instances. In this case, we need to read num-channels from the DT to have all the necessary information to complete probing. However, at the moment invalid device trees without clock and without num-channels still continue probing, because the error handling is missing return statements. The driver will then later try to read the number of channels from the registers. This is unsafe, because it relies on boot firmware and lucky timing to succeed. Unfortunately, the lack of proper error handling here has been abused for several Qualcomm SoCs upstream, causing early boot crashes in several situations [1, 2]. Avoid these early crashes by erroring out when any of the required DT properties are missing. Note that this will break some of the existing DTs upstream (mainly BAM instances related to the crypto engine). However, clearly these DTs have never been tested properly, since the error in the kernel log was just ignored. It's safer to disable the crypto engine for these broken DTBs. [1]: https://lore.kernel.org/r/CY01EKQVWE36.B9X5TDXAREPF@fairphone.com/ [2]: https://lore.kernel.org/r/20230626145959.646747-1-krzysztof.kozlowski@linaro.org/ Cc: stable@vger.kernel.org Fixes: 48d163b1aa6e ("dmaengine: qcom: bam_dma: get num-channels and num-ees from dt") Signed-off-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250212-bam-dma-fixes-v1-8-f560889e65d8@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/bam_dma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index bbc3276992bb..2cf060174795 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -1283,13 +1283,17 @@ static int bam_dma_probe(struct platform_device *pdev) if (!bdev->bamclk) { ret = of_property_read_u32(pdev->dev.of_node, "num-channels", &bdev->num_channels); - if (ret) + if (ret) { dev_err(bdev->dev, "num-channels unspecified in dt\n"); + return ret; + } ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees", &bdev->num_ees); - if (ret) + if (ret) { dev_err(bdev->dev, "num-ees unspecified in dt\n"); + return ret; + } } ret = clk_prepare_enable(bdev->bamclk); From f18c9e79bbe65627805fff6aac3ea96b6b55b53d Mon Sep 17 00:00:00 2001 From: Chukun Pan Date: Sun, 10 Aug 2025 18:00:19 +0800 Subject: [PATCH 0119/1307] arm64: dts: rockchip: mark eeprom as read-only for Radxa E52C The eeprom on the Radxa E52C SBC contains manufacturer data such as the mac address, so it should be marked as read-only. Fixes: 9be4171219b6 ("arm64: dts: rockchip: Add Radxa E52C") Signed-off-by: Chukun Pan Link: https://lore.kernel.org/r/20250810100020.445053-2-amadeus@jmu.edu.cn Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3582-radxa-e52c.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3582-radxa-e52c.dts b/arch/arm64/boot/dts/rockchip/rk3582-radxa-e52c.dts index e04f21d8c831..431ff77d4518 100644 --- a/arch/arm64/boot/dts/rockchip/rk3582-radxa-e52c.dts +++ b/arch/arm64/boot/dts/rockchip/rk3582-radxa-e52c.dts @@ -250,6 +250,7 @@ eeprom@50 { compatible = "belling,bl24c16a", "atmel,24c16"; reg = <0x50>; pagesize = <16>; + read-only; vcc-supply = <&vcc_3v3_pmu>; }; }; From d1f9c497618dece06a00e0b2995ed6b38fafe6b5 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 30 Jul 2025 11:21:26 +0100 Subject: [PATCH 0120/1307] arm64: dts: rockchip: Add vcc-supply to SPI flash on rk3399-pinebook-pro As described in the pinebookpro_v2.1_mainboard_schematic.pdf page 10, he SPI Flash's VCC connector is connected to VCC_3V0 power source. This fixes the following warning: spi-nor spi1.0: supply vcc not found, using dummy regulator Fixes: 5a65505a69884 ("arm64: dts: rockchip: Add initial support for Pinebook Pro") Signed-off-by: Peter Robinson Reviewed-by: Dragan Simic Link: https://lore.kernel.org/r/20250730102129.224468-1-pbrobinson@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 5a8551d9ffe4..b33a1509a8e9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -959,6 +959,7 @@ spiflash: flash@0 { reg = <0>; m25p,fast-read; spi-max-frequency = <10000000>; + vcc-supply = <&vcc_3v0>; }; }; From 0db77eccd964b11ab2b757031d1354fcc5a025ea Mon Sep 17 00:00:00 2001 From: Christopher Eby Date: Sat, 9 Aug 2025 20:00:06 -0700 Subject: [PATCH 0121/1307] ALSA: hda/realtek: Add Framework Laptop 13 (AMD Ryzen AI 300) to quirks Framework Laptop 13 (AMD Ryzen AI 300) requires the same quirk for headset detection as other Framework 13 models. Signed-off-by: Christopher Eby Cc: Link: https://patch.msgid.link/20250810030006.9060-1-kreed@kreed.org Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index e27a36e4e92a..337e33a59de8 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7158,6 +7158,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x000b, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x000c, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 From b6bcbce3359619d05bf387d4f5cc3af63668dbaa Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 31 Jul 2025 13:18:32 +0100 Subject: [PATCH 0122/1307] soc/tegra: pmc: Ensure power-domains are in a known state After commit 13a4b7fb6260 ("pmdomain: core: Leave powered-on genpds on until late_initcall_sync") was applied, the Tegra210 Jetson TX1 board failed to boot. Looking into this issue, before this commit was applied, if any of the Tegra power-domains were in 'on' state when the kernel booted, they were being turned off by the genpd core before any driver had chance to request them. This was purely by luck and a consequence of the power-domains being turned off earlier during boot. After this commit was applied, any power-domains in the 'on' state are kept on for longer during boot and therefore, may never transitioned to the off state before they are requested/used. The hang on the Tegra210 Jetson TX1 is caused because devices in some power-domains are accessed without the power-domain being turned off and on, indicating that the power-domain is not in a completely on state. >From reviewing the Tegra PMC driver code, if a power-domain is in the 'on' state there is no guarantee that all the necessary clocks associated with the power-domain are on and even if they are they would not have been requested via the clock framework and so could be turned off later. Some power-domains also have a 'clamping' register that needs to be configured as well. In short, if a power-domain is already 'on' it is difficult to know if it has been configured correctly. Given that the power-domains happened to be switched off during boot previously, to ensure that they are in a good known state on boot, fix this by switching off any power-domains that are on initially when registering the power-domains with the genpd framework. Note that commit 05cfb988a4d0 ("soc/tegra: pmc: Initialise resets associated with a power partition") updated the tegra_powergate_of_get_resets() function to pass the 'off' to ensure that the resets for the power-domain are in the correct state on boot. However, now that we may power off a domain on boot, if it is on, it is better to move this logic into the tegra_powergate_add() function so that there is a single place where we are handling the initial state of the power-domain. Fixes: a38045121bf4 ("soc/tegra: pmc: Add generic PM domain support") Signed-off-by: Jon Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250731121832.213671-1-jonathanh@nvidia.com Signed-off-by: Ulf Hansson --- drivers/soc/tegra/pmc.c | 51 +++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 2a5f24ee858c..034a2a535a1e 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -1232,7 +1232,7 @@ static int tegra_powergate_of_get_clks(struct tegra_powergate *pg, } static int tegra_powergate_of_get_resets(struct tegra_powergate *pg, - struct device_node *np, bool off) + struct device_node *np) { struct device *dev = pg->pmc->dev; int err; @@ -1247,22 +1247,6 @@ static int tegra_powergate_of_get_resets(struct tegra_powergate *pg, err = reset_control_acquire(pg->reset); if (err < 0) { pr_err("failed to acquire resets: %d\n", err); - goto out; - } - - if (off) { - err = reset_control_assert(pg->reset); - } else { - err = reset_control_deassert(pg->reset); - if (err < 0) - goto out; - - reset_control_release(pg->reset); - } - -out: - if (err) { - reset_control_release(pg->reset); reset_control_put(pg->reset); } @@ -1308,20 +1292,43 @@ static int tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np) goto set_available; } - err = tegra_powergate_of_get_resets(pg, np, off); + err = tegra_powergate_of_get_resets(pg, np); if (err < 0) { dev_err(dev, "failed to get resets for %pOFn: %d\n", np, err); goto remove_clks; } - if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) { - if (off) - WARN_ON(tegra_powergate_power_up(pg, true)); + /* + * If the power-domain is off, then ensure the resets are asserted. + * If the power-domain is on, then power down to ensure that when is + * it turned on the power-domain, clocks and resets are all in the + * expected state. + */ + if (off) { + err = reset_control_assert(pg->reset); + if (err) { + pr_err("failed to assert resets: %d\n", err); + goto remove_resets; + } + } else { + err = tegra_powergate_power_down(pg); + if (err) { + dev_err(dev, "failed to turn off PM domain %s: %d\n", + pg->genpd.name, err); + goto remove_resets; + } + } + /* + * If PM_GENERIC_DOMAINS is not enabled, power-on + * the domain and skip the genpd registration. + */ + if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) { + WARN_ON(tegra_powergate_power_up(pg, true)); goto remove_resets; } - err = pm_genpd_init(&pg->genpd, NULL, off); + err = pm_genpd_init(&pg->genpd, NULL, true); if (err < 0) { dev_err(dev, "failed to initialise PM domain %pOFn: %d\n", np, err); From 2c223f7239f376a90d71903ec474ba887cf21d94 Mon Sep 17 00:00:00 2001 From: Oreoluwa Babatunde Date: Wed, 6 Aug 2025 10:24:21 -0700 Subject: [PATCH 0123/1307] of: reserved_mem: Restructure call site for dma_contiguous_early_fixup() Restructure the call site for dma_contiguous_early_fixup() to where the reserved_mem nodes are being parsed from the DT so that dma_mmu_remap[] is populated before dma_contiguous_remap() is called. Fixes: 8a6e02d0c00e ("of: reserved_mem: Restructure how the reserved memory regions are processed") Signed-off-by: Oreoluwa Babatunde Tested-by: William Zhang Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250806172421.2748302-1-oreoluwa.babatunde@oss.qualcomm.com --- drivers/of/of_reserved_mem.c | 16 ++++++++++++---- include/linux/dma-map-ops.h | 3 +++ kernel/dma/contiguous.c | 2 -- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 77016c0cc296..7350b23cb734 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "of_private.h" @@ -175,13 +176,17 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, base = dt_mem_next_cell(dt_root_addr_cells, &prop); size = dt_mem_next_cell(dt_root_size_cells, &prop); - if (size && - early_init_dt_reserve_memory(base, size, nomap) == 0) + if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) { + /* Architecture specific contiguous memory fixup. */ + if (of_flat_dt_is_compatible(node, "shared-dma-pool") && + of_get_flat_dt_prop(node, "reusable", NULL)) + dma_contiguous_early_fixup(base, size); pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", uname, &base, (unsigned long)(size / SZ_1M)); - else + } else { pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n", uname, &base, (unsigned long)(size / SZ_1M)); + } len -= t_len; } @@ -472,7 +477,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam uname, (unsigned long)(size / SZ_1M)); return -ENOMEM; } - + /* Architecture specific contiguous memory fixup. */ + if (of_flat_dt_is_compatible(node, "shared-dma-pool") && + of_get_flat_dt_prop(node, "reusable", NULL)) + dma_contiguous_early_fixup(base, size); /* Save region in the reserved_mem array */ fdt_reserved_mem_save_node(node, uname, base, size); return 0; diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index f48e5fb88bd5..332b80c42b6f 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -153,6 +153,9 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, { __free_pages(page, get_order(size)); } +static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ +} #endif /* CONFIG_DMA_CMA*/ #ifdef CONFIG_DMA_DECLARE_COHERENT diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 67af8a55185d..d9b9dcba6ff7 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -483,8 +483,6 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) pr_err("Reserved memory: unable to setup CMA region\n"); return err; } - /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(rmem->base, rmem->size); if (default_cma) dma_contiguous_default_area = cma; From 1db9df89a213318a48d958385dc1b17b379dc32b Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Sun, 3 Aug 2025 14:57:25 +0800 Subject: [PATCH 0124/1307] HID: intel-thc-hid: intel-quicki2c: Fix ACPI dsd ICRS/ISUB length The QuickI2C ACPI _DSD methods return ICRS and ISUB data with a trailing byte, making the actual length is one more byte than the structs defined. It caused stack-out-of-bounds and kernel crash: kernel: BUG: KASAN: stack-out-of-bounds in quicki2c_acpi_get_dsd_property.constprop.0+0x111/0x1b0 [intel_quicki2c] kernel: Write of size 12 at addr ffff888106d1f900 by task kworker/u33:2/75 kernel: kernel: CPU: 3 UID: 0 PID: 75 Comm: kworker/u33:2 Not tainted 6.16.0+ #3 PREEMPT(voluntary) kernel: Workqueue: async async_run_entry_fn kernel: Call Trace: kernel: kernel: dump_stack_lvl+0x76/0xa0 kernel: print_report+0xd1/0x660 kernel: ? __pfx__raw_spin_lock_irqsave+0x10/0x10 kernel: ? __kasan_slab_free+0x5d/0x80 kernel: ? kasan_addr_to_slab+0xd/0xb0 kernel: kasan_report+0xe1/0x120 kernel: ? quicki2c_acpi_get_dsd_property.constprop.0+0x111/0x1b0 [intel_quicki2c] kernel: ? quicki2c_acpi_get_dsd_property.constprop.0+0x111/0x1b0 [intel_quicki2c] kernel: kasan_check_range+0x11c/0x200 kernel: __asan_memcpy+0x3b/0x80 kernel: quicki2c_acpi_get_dsd_property.constprop.0+0x111/0x1b0 [intel_quicki2c] kernel: ? __pfx_quicki2c_acpi_get_dsd_property.constprop.0+0x10/0x10 [intel_quicki2c] kernel: quicki2c_get_acpi_resources+0x237/0x730 [intel_quicki2c] [...] kernel: kernel: kernel: The buggy address belongs to stack of task kworker/u33:2/75 kernel: and is located at offset 48 in frame: kernel: quicki2c_get_acpi_resources+0x0/0x730 [intel_quicki2c] kernel: kernel: This frame has 3 objects: kernel: [32, 36) 'hid_desc_addr' kernel: [48, 59) 'i2c_param' kernel: [80, 224) 'i2c_config' ACPI DSD methods return: \_SB.PC00.THC0.ICRS Buffer 000000003fdc947b 001 Len 0C = 0A 00 80 1A 06 00 00 00 00 00 00 00 \_SB.PC00.THC0.ISUB Buffer 00000000f2fcbdc4 001 Len 91 = 00 00 00 00 00 00 00 00 00 00 00 00 Adding reserved padding to quicki2c_subip_acpi_parameter/config. Fixes: 5282e45ccbfa9 ("HID: intel-thc-hid: intel-quicki2c: Add THC QuickI2C ACPI interfaces") Signed-off-by: Aaron Ma Reviewed-by: Even Xu Tested-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h index 93d6fa982d60..d412eafcf9ea 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-dev.h @@ -77,6 +77,7 @@ struct quicki2c_subip_acpi_parameter { u16 device_address; u64 connection_speed; u8 addressing_mode; + u8 reserved; } __packed; /** @@ -126,6 +127,7 @@ struct quicki2c_subip_acpi_config { u64 HMTD; u64 HMRD; u64 HMSL; + u8 reserved; }; /** From a7fc15ed629be89e51e09b743277c53e0a0168f5 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Sun, 3 Aug 2025 14:57:26 +0800 Subject: [PATCH 0125/1307] HID: intel-thc-hid: intel-thc: Fix incorrect pointer arithmetic in I2C regs save Improper use of secondary pointer (&dev->i2c_subip_regs) caused kernel crash and out-of-bounds error: BUG: KASAN: slab-out-of-bounds in _regmap_bulk_read+0x449/0x510 Write of size 4 at addr ffff888136005dc0 by task kworker/u33:5/5107 CPU: 3 UID: 0 PID: 5107 Comm: kworker/u33:5 Not tainted 6.16.0+ #3 PREEMPT(voluntary) Workqueue: async async_run_entry_fn Call Trace: dump_stack_lvl+0x76/0xa0 print_report+0xd1/0x660 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? kasan_complete_mode_report_info+0x26/0x200 kasan_report+0xe1/0x120 ? _regmap_bulk_read+0x449/0x510 ? _regmap_bulk_read+0x449/0x510 __asan_report_store4_noabort+0x17/0x30 _regmap_bulk_read+0x449/0x510 ? __pfx__regmap_bulk_read+0x10/0x10 regmap_bulk_read+0x270/0x3d0 pio_complete+0x1ee/0x2c0 [intel_thc] ? __pfx_pio_complete+0x10/0x10 [intel_thc] ? __pfx_pio_wait+0x10/0x10 [intel_thc] ? regmap_update_bits_base+0x13b/0x1f0 thc_i2c_subip_pio_read+0x117/0x270 [intel_thc] thc_i2c_subip_regs_save+0xc2/0x140 [intel_thc] ? __pfx_thc_i2c_subip_regs_save+0x10/0x10 [intel_thc] [...] The buggy address belongs to the object at ffff888136005d00 which belongs to the cache kmalloc-rnd-12-192 of size 192 The buggy address is located 0 bytes to the right of allocated 192-byte region [ffff888136005d00, ffff888136005dc0) Replaced with direct array indexing (&dev->i2c_subip_regs[i]) to ensure safe memory access. Fixes: 4228966def884 ("HID: intel-thc-hid: intel-thc: Add THC I2C config interfaces") Signed-off-by: Aaron Ma Reviewed-by: Even Xu Tested-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-thc/intel-thc-dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/intel-thc-hid/intel-thc/intel-thc-dev.c b/drivers/hid/intel-thc-hid/intel-thc/intel-thc-dev.c index 6f2263869b20..e1cb9b117ebc 100644 --- a/drivers/hid/intel-thc-hid/intel-thc/intel-thc-dev.c +++ b/drivers/hid/intel-thc-hid/intel-thc/intel-thc-dev.c @@ -1540,7 +1540,7 @@ int thc_i2c_subip_regs_save(struct thc_device *dev) for (int i = 0; i < ARRAY_SIZE(i2c_subip_regs); i++) { ret = thc_i2c_subip_pio_read(dev, i2c_subip_regs[i], - &read_size, (u32 *)&dev->i2c_subip_regs + i); + &read_size, &dev->i2c_subip_regs[i]); if (ret < 0) return ret; } @@ -1563,7 +1563,7 @@ int thc_i2c_subip_regs_restore(struct thc_device *dev) for (int i = 0; i < ARRAY_SIZE(i2c_subip_regs); i++) { ret = thc_i2c_subip_pio_write(dev, i2c_subip_regs[i], - write_size, (u32 *)&dev->i2c_subip_regs + i); + write_size, &dev->i2c_subip_regs[i]); if (ret < 0) return ret; } From 647b3d59c768d7638dd17c78c8044178364383ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 31 Jul 2025 07:19:41 -0700 Subject: [PATCH 0126/1307] xfs: fix frozen file system assert in xfs_trans_alloc Commit 83a80e95e797 ("xfs: decouple xfs_trans_alloc_empty from xfs_trans_alloc") move the place of the assert for a frozen file system after the sb_start_intwrite call that ensures it doesn't run on frozen file systems, and thus allows to incorrect trigger it. Fix that by moving it back to where it belongs. Fixes: 83a80e95e797 ("xfs: decouple xfs_trans_alloc_empty from xfs_trans_alloc") Reported-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ece374d622b3..575e7028f423 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -253,8 +253,8 @@ xfs_trans_alloc( * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); tp = __xfs_trans_alloc(mp, flags); + WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); error = xfs_trans_reserve(tp, resp, blocks, rtextents); if (error == -ENOSPC && want_retry) { xfs_trans_cancel(tp); From d2845519b0723c5d5a0266cbf410495f9b8fd65c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jul 2025 14:19:44 +0200 Subject: [PATCH 0127/1307] xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags argument to xfs_inobt_walk, which expects the XFS_IWALK* flags. Currently passing the wrong flags works for non-debug builds because the only XFS_IWALK* flag has the same encoding as the corresponding XFS_IBULK* flag, but in debug builds it can trigger an assert that no incorrect flag is passed. Instead just extra the relevant flag. Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags") Cc: # v5.19 Reported-by: cen zhang Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_itable.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c8c9b8d8309f..5116842420b2 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -447,17 +447,21 @@ xfs_inumbers( .breq = breq, }; struct xfs_trans *tp; + unsigned int iwalk_flags = 0; int error = 0; if (xfs_bulkstat_already_done(breq->mp, breq->startino)) return 0; + if (breq->flags & XFS_IBULK_SAME_AG) + iwalk_flags |= XFS_IWALK_SAME_AG; + /* * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ tp = xfs_trans_alloc_empty(breq->mp); - error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, + error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags, xfs_inumbers_walk, breq->icount, &ic); xfs_trans_cancel(tp); From 82efde9cf2e4ce25eac96a20e36eae7c338df1e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jul 2025 14:19:45 +0200 Subject: [PATCH 0128/1307] xfs: remove XFS_IBULK_SAME_AG Add a new field to struct xfs_ibulk to directly pass XFS_IWALK* flags, and thus remove the need to indirect the SAME_AG flag through XFS_IBULK*. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_ioctl.c | 2 +- fs/xfs/xfs_itable.c | 12 ++---------- fs/xfs/xfs_itable.h | 10 ++++------ 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index fe1f74a3b6a3..e1051a530a50 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -219,7 +219,7 @@ xfs_bulk_ireq_setup( else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno) return -EINVAL; - breq->flags |= XFS_IBULK_SAME_AG; + breq->iwalk_flags |= XFS_IWALK_SAME_AG; /* Asking for an inode past the end of the AG? We're done! */ if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 5116842420b2..2aa37a4d2706 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -307,7 +307,6 @@ xfs_bulkstat( .breq = breq, }; struct xfs_trans *tp; - unsigned int iwalk_flags = 0; int error; if (breq->idmap != &nop_mnt_idmap) { @@ -328,10 +327,7 @@ xfs_bulkstat( * locking abilities to detect cycles in the inobt without deadlocking. */ tp = xfs_trans_alloc_empty(breq->mp); - if (breq->flags & XFS_IBULK_SAME_AG) - iwalk_flags |= XFS_IWALK_SAME_AG; - - error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags, + error = xfs_iwalk(breq->mp, tp, breq->startino, breq->iwalk_flags, xfs_bulkstat_iwalk, breq->icount, &bc); xfs_trans_cancel(tp); kfree(bc.buf); @@ -447,21 +443,17 @@ xfs_inumbers( .breq = breq, }; struct xfs_trans *tp; - unsigned int iwalk_flags = 0; int error = 0; if (xfs_bulkstat_already_done(breq->mp, breq->startino)) return 0; - if (breq->flags & XFS_IBULK_SAME_AG) - iwalk_flags |= XFS_IWALK_SAME_AG; - /* * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ tp = xfs_trans_alloc_empty(breq->mp); - error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags, + error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->iwalk_flags, xfs_inumbers_walk, breq->icount, &ic); xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index f10e8f8f2335..2d0612f14d6e 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -13,17 +13,15 @@ struct xfs_ibulk { xfs_ino_t startino; /* start with this inode */ unsigned int icount; /* number of elements in ubuffer */ unsigned int ocount; /* number of records returned */ - unsigned int flags; /* see XFS_IBULK_FLAG_* */ + unsigned int flags; /* XFS_IBULK_FLAG_* */ + unsigned int iwalk_flags; /* XFS_IWALK_FLAG_* */ }; -/* Only iterate within the same AG as startino */ -#define XFS_IBULK_SAME_AG (1U << 0) - /* Fill out the bs_extents64 field if set. */ -#define XFS_IBULK_NREXT64 (1U << 1) +#define XFS_IBULK_NREXT64 (1U << 0) /* Signal that we can return metadata directories. */ -#define XFS_IBULK_METADIR (1U << 2) +#define XFS_IBULK_METADIR (1U << 1) /* * Advance the user buffer pointer by one record of the given size. If the From e7fb9b71326f43bab25fb8f18c6bfebd7a628696 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 24 Jul 2025 08:12:13 +0000 Subject: [PATCH 0129/1307] fs/dax: Reject IOCB_ATOMIC in dax_iomap_rw() The DAX write path does not support IOCB_ATOMIC, so reject it when set. Suggested-by: Darrick J. Wong Signed-off-by: John Garry Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/dax.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 4229513806be..20ecf652c129 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1743,6 +1743,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, loff_t done = 0; int ret; + if (WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC)) + return -EIO; + if (!iomi.len) return 0; From 68456d05eb57a5d16b4be2d3caf421bdcf2de72e Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 24 Jul 2025 08:12:14 +0000 Subject: [PATCH 0130/1307] xfs: disallow atomic writes on DAX Atomic writes are not currently supported for DAX, but two problems exist: - we may go down DAX write path for IOCB_ATOMIC, which does not handle IOCB_ATOMIC properly - we report non-zero atomic write limits in statx (for DAX inodes) We may want atomic writes support on DAX in future, but just disallow for now. For this, ensure when IOCB_ATOMIC is set that we check the write size versus the atomic write min and max before branching off to the DAX write path. This is not strictly required for DAX, as we should not get this far in the write path as FMODE_CAN_ATOMIC_WRITE should not be set. In addition, due to reflink being supported for DAX, we automatically get CoW-based atomic writes support being advertised. Remedy this by disallowing atomic writes for a DAX inode for both sw and hw modes. Reported-by: Darrick J. Wong Fixes: 9dffc58f2384 ("xfs: update atomic write limits") Reviewed-by: Darrick J. Wong Signed-off-by: John Garry Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_file.c | 6 +++--- fs/xfs/xfs_inode.h | 11 +++++++++++ fs/xfs/xfs_iops.c | 5 +++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 55a304cb3aef..f96fbf5c54c9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1101,9 +1101,6 @@ xfs_file_write_iter( if (xfs_is_shutdown(ip->i_mount)) return -EIO; - if (IS_DAX(inode)) - return xfs_file_dax_write(iocb, from); - if (iocb->ki_flags & IOCB_ATOMIC) { if (ocount < xfs_get_atomic_write_min(ip)) return -EINVAL; @@ -1116,6 +1113,9 @@ xfs_file_write_iter( return ret; } + if (IS_DAX(inode)) + return xfs_file_dax_write(iocb, from); + if (iocb->ki_flags & IOCB_DIRECT) { /* * Allow a directio write to fall back to a buffered diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 07fbdcc4cbf5..bd6d33557194 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -358,9 +358,20 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip) static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip) { + if (IS_DAX(VFS_IC(ip))) + return false; + return xfs_inode_buftarg(ip)->bt_awu_max > 0; } +static inline bool xfs_inode_can_sw_atomic_write(const struct xfs_inode *ip) +{ + if (IS_DAX(VFS_IC(ip))) + return false; + + return xfs_can_sw_atomic_write(ip->i_mount); +} + /* * In-core inode flags. */ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 149b5460fbfd..603effabe1ee 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -616,7 +616,8 @@ xfs_get_atomic_write_min( * write of exactly one single fsblock if the bdev will make that * guarantee for us. */ - if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp)) + if (xfs_inode_can_hw_atomic_write(ip) || + xfs_inode_can_sw_atomic_write(ip)) return mp->m_sb.sb_blocksize; return 0; @@ -633,7 +634,7 @@ xfs_get_atomic_write_max( * write of exactly one single fsblock if the bdev will make that * guarantee for us. */ - if (!xfs_can_sw_atomic_write(mp)) { + if (!xfs_inode_can_sw_atomic_write(ip)) { if (xfs_inode_can_hw_atomic_write(ip)) return mp->m_sb.sb_blocksize; return 0; From 8dc5e9b037138317c1d3151a7dabe41fa171cee1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 24 Jul 2025 08:12:15 +0000 Subject: [PATCH 0131/1307] xfs: reject max_atomic_write mount option for no reflink If the FS has no reflink, then atomic writes greater than 1x block are not supported. As such, for no reflink it is pointless to accept setting max_atomic_write when it cannot be supported, so reject max_atomic_write mount option in this case. It could be still possible to accept max_atomic_write option of size 1x block if HW atomics are supported, so check for this specifically. Fixes: 4528b9052731 ("xfs: allow sysadmins to specify a maximum atomic write limit at mount time") Signed-off-by: John Garry Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_mount.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2133fbaf1766..dc32c5e34d81 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -779,6 +779,25 @@ xfs_set_max_atomic_write_opt( return -EINVAL; } + if (xfs_has_reflink(mp)) + goto set_limit; + + if (new_max_fsbs == 1) { + if (mp->m_ddev_targp->bt_awu_max || + (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_awu_max)) { + } else { + xfs_warn(mp, + "cannot support atomic writes of size %lluk with no reflink or HW support", + new_max_bytes >> 10); + return -EINVAL; + } + } else { + xfs_warn(mp, + "cannot support atomic writes of size %lluk with no reflink support", + new_max_bytes >> 10); + return -EINVAL; + } + set_limit: error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs); if (error) { From 5d94b19f066480addfcdcb5efde66152ad5a7c0e Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Thu, 31 Jul 2025 19:07:22 +0200 Subject: [PATCH 0132/1307] xfs: fix scrub trace with null pointer in quotacheck The quotacheck doesn't initialize sc->ip. Cc: stable@vger.kernel.org # v6.8 Fixes: 21d7500929c8a0 ("xfs: improve dquot iteration for scrub") Reviewed-by: Darrick J. Wong Signed-off-by: Andrey Albershteyn Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 1e6e9c10cea2..a8187281eb96 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -479,7 +479,7 @@ DECLARE_EVENT_CLASS(xchk_dqiter_class, __field(xfs_exntst_t, state) ), TP_fast_assign( - __entry->dev = cursor->sc->ip->i_mount->m_super->s_dev; + __entry->dev = cursor->sc->mp->m_super->s_dev; __entry->dqtype = cursor->dqtype; __entry->ino = cursor->quota_ip->i_ino; __entry->cur_id = cursor->id; From f76823e3b284aae30797fded988a807eab2da246 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jul 2025 07:35:44 +0200 Subject: [PATCH 0133/1307] xfs: split xfs_zone_record_blocks xfs_zone_record_blocks not only records successfully written blocks that now back file data, but is also used for blocks speculatively written by garbage collection that were never linked to an inode and instantly become invalid. Split the latter functionality out to be easier to understand. This also make it clear that we don't need to attach the rmap inode to a transaction for the skipped blocks case as we never dirty any peristent data structure. Also make the argument order to xfs_zone_record_blocks a bit more natural. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trace.h | 1 + fs/xfs/xfs_zone_alloc.c | 42 ++++++++++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index e1794e3e3156..ac344e42846c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -455,6 +455,7 @@ DEFINE_EVENT(xfs_zone_alloc_class, name, \ xfs_extlen_t len), \ TP_ARGS(oz, rgbno, len)) DEFINE_ZONE_ALLOC_EVENT(xfs_zone_record_blocks); +DEFINE_ZONE_ALLOC_EVENT(xfs_zone_skip_blocks); DEFINE_ZONE_ALLOC_EVENT(xfs_zone_alloc_blocks); TRACE_EVENT(xfs_zone_gc_select_victim, diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 33f7eee521a8..f8bd6d741755 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -166,10 +166,9 @@ xfs_open_zone_mark_full( static void xfs_zone_record_blocks( struct xfs_trans *tp, - xfs_fsblock_t fsbno, - xfs_filblks_t len, struct xfs_open_zone *oz, - bool used) + xfs_fsblock_t fsbno, + xfs_filblks_t len) { struct xfs_mount *mp = tp->t_mountp; struct xfs_rtgroup *rtg = oz->oz_rtg; @@ -179,18 +178,37 @@ xfs_zone_record_blocks( xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); - if (used) { - rmapip->i_used_blocks += len; - ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); - } else { - xfs_add_frextents(mp, len); - } + rmapip->i_used_blocks += len; + ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); oz->oz_written += len; if (oz->oz_written == rtg_blocks(rtg)) xfs_open_zone_mark_full(oz); xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); } +/* + * Called for blocks that have been written to disk, but not actually linked to + * an inode, which can happen when garbage collection races with user data + * writes to a file. + */ +static void +xfs_zone_skip_blocks( + struct xfs_open_zone *oz, + xfs_filblks_t len) +{ + struct xfs_rtgroup *rtg = oz->oz_rtg; + + trace_xfs_zone_skip_blocks(oz, 0, len); + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + oz->oz_written += len; + if (oz->oz_written == rtg_blocks(rtg)) + xfs_open_zone_mark_full(oz); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + + xfs_add_frextents(rtg_mount(rtg), len); +} + static int xfs_zoned_map_extent( struct xfs_trans *tp, @@ -250,8 +268,7 @@ xfs_zoned_map_extent( } } - xfs_zone_record_blocks(tp, new->br_startblock, new->br_blockcount, oz, - true); + xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount); /* Map the new blocks into the data fork. */ xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new); @@ -259,8 +276,7 @@ xfs_zoned_map_extent( skip: trace_xfs_reflink_cow_remap_skip(ip, new); - xfs_zone_record_blocks(tp, new->br_startblock, new->br_blockcount, oz, - false); + xfs_zone_skip_blocks(oz, new->br_blockcount); return 0; } From d02d2c98d25793902f65803ab853b592c7a96b29 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Mon, 28 Jul 2025 18:07:15 +0800 Subject: [PATCH 0134/1307] fs: writeback: fix use-after-free in __mark_inode_dirty() An use-after-free issue occurred when __mark_inode_dirty() get the bdi_writeback that was in the progress of switching. CPU: 1 PID: 562 Comm: systemd-random- Not tainted 6.6.56-gb4403bd46a8e #1 ...... pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __mark_inode_dirty+0x124/0x418 lr : __mark_inode_dirty+0x118/0x418 sp : ffffffc08c9dbbc0 ........ Call trace: __mark_inode_dirty+0x124/0x418 generic_update_time+0x4c/0x60 file_modified+0xcc/0xd0 ext4_buffered_write_iter+0x58/0x124 ext4_file_write_iter+0x54/0x704 vfs_write+0x1c0/0x308 ksys_write+0x74/0x10c __arm64_sys_write+0x1c/0x28 invoke_syscall+0x48/0x114 el0_svc_common.constprop.0+0xc0/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x40/0xe4 el0t_64_sync_handler+0x120/0x12c el0t_64_sync+0x194/0x198 Root cause is: systemd-random-seed kworker ---------------------------------------------------------------------- ___mark_inode_dirty inode_switch_wbs_work_fn spin_lock(&inode->i_lock); inode_attach_wb locked_inode_to_wb_and_lock_list get inode->i_wb spin_unlock(&inode->i_lock); spin_lock(&wb->list_lock) spin_lock(&inode->i_lock) inode_io_list_move_locked spin_unlock(&wb->list_lock) spin_unlock(&inode->i_lock) spin_lock(&old_wb->list_lock) inode_do_switch_wbs spin_lock(&inode->i_lock) inode->i_wb = new_wb spin_unlock(&inode->i_lock) spin_unlock(&old_wb->list_lock) wb_put_many(old_wb, nr_switched) cgwb_release old wb released wb_wakeup_delayed() accesses wb, then trigger the use-after-free issue Fix this race condition by holding inode spinlock until wb_wakeup_delayed() finished. Signed-off-by: Jiufei Xue Link: https://lore.kernel.org/20250728100715.3863241-1-jiufei.xue@samsung.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fs-writeback.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index cc57367fb641..a07b8cf73ae2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2608,10 +2608,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) wakeup_bdi = inode_io_list_move_locked(inode, wb, dirty_list); - spin_unlock(&wb->list_lock); - spin_unlock(&inode->i_lock); - trace_writeback_dirty_inode_enqueue(inode); - /* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread @@ -2621,6 +2617,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (wakeup_bdi && (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) wb_wakeup_delayed(wb); + + spin_unlock(&wb->list_lock); + spin_unlock(&inode->i_lock); + trace_writeback_dirty_inode_enqueue(inode); + return; } } From 9308366f062129d52e0ee3f7a019f7dd41db33df Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 8 Aug 2025 03:55:05 +1000 Subject: [PATCH 0135/1307] open_tree_attr: do not allow id-mapping changes without OPEN_TREE_CLONE As described in commit 7a54947e727b ('Merge patch series "fs: allow changing idmappings"'), open_tree_attr(2) was necessary in order to allow for a detached mount to be created and have its idmappings changed without the risk of any racing threads operating on it. For this reason, mount_setattr(2) still does not allow for id-mappings to be changed. However, there was a bug in commit 2462651ffa76 ("fs: allow changing idmappings") which allowed users to bypass this restriction by calling open_tree_attr(2) *without* OPEN_TREE_CLONE. can_idmap_mount() prevented this bug from allowing an attached mountpoint's id-mapping from being modified (thanks to an is_anon_ns() check), but this still allows for detached (but visible) mounts to have their be id-mapping changed. This risks the same UAF and locking issues as described in the merge commit, and was likely unintentional. Fixes: 2462651ffa76 ("fs: allow changing idmappings") Cc: stable@vger.kernel.org # v6.15+ Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250808-open_tree_attr-bugfix-idmap-v1-1-0ec7bc05646c@cyphar.com Signed-off-by: Christian Brauner --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index ddfd4457d338..ceb6b57e6a57 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5176,7 +5176,8 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, int ret; struct mount_kattr kattr = {}; - kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; + if (flags & OPEN_TREE_CLONE) + kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; From 81e4b9cf365df4cde30157a85cc9f3d673946118 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 8 Aug 2025 03:55:06 +1000 Subject: [PATCH 0136/1307] selftests/mount_setattr: add smoke tests for open_tree_attr(2) bug There appear to be no other open_tree_attr(2) tests at the moment, but as a minimal solution just add some additional checks in the existing MOUNT_ATTR_IDMAP tests to make sure that open_tree_attr(2) cannot be used to bypass the tested restrictions that apply to mount_setattr(2). Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250808-open_tree_attr-bugfix-idmap-v1-2-0ec7bc05646c@cyphar.com Signed-off-by: Christian Brauner --- .../mount_setattr/mount_setattr_test.c | 77 +++++++++++++++---- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index b1e4618399be..a688871a98eb 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -107,6 +107,26 @@ #endif #endif +#ifndef __NR_open_tree_attr + #if defined __alpha__ + #define __NR_open_tree_attr 577 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree_attr (467 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree_attr (467 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree_attr (467 + 5000) + #endif + #elif defined __ia64__ + #define __NR_open_tree_attr (467 + 1024) + #else + #define __NR_open_tree_attr 467 + #endif +#endif + #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif @@ -121,6 +141,12 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); } +static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags, + struct mount_attr *attr, size_t size) +{ + return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size); +} + static ssize_t write_nointr(int fd, const void *buf, size_t count) { ssize_t ret; @@ -1222,6 +1248,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) attr.userns_fd = get_userns_fd(0, 10000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1255,6 +1287,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1321,6 +1359,19 @@ TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) ASSERT_EQ(close(open_tree_fd), 0); } +static bool expected_uid_gid(int dfd, const char *path, int flags, + uid_t expected_uid, gid_t expected_gid) +{ + int ret; + struct stat st; + + ret = fstatat(dfd, path, &st, flags); + if (ret < 0) + return false; + + return st.st_uid == expected_uid && st.st_gid == expected_gid; +} + /** * Validate that currently changing the idmapping of an idmapped mount fails. */ @@ -1331,6 +1382,8 @@ TEST_F(mount_setattr_idmapped, change_idmapping) .attr_set = MOUNT_ATTR_IDMAP, }; + ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0)); + if (!mount_setattr_supported()) SKIP(return, "mount_setattr syscall not supported"); @@ -1348,27 +1401,25 @@ TEST_F(mount_setattr_idmapped, change_idmapping) AT_EMPTY_PATH, &attr, sizeof(attr)), 0); ASSERT_EQ(close(attr.userns_fd), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + /* Change idmapping on a detached mount that is already idmapped. */ attr.userns_fd = get_userns_fd(0, 20000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } -static bool expected_uid_gid(int dfd, const char *path, int flags, - uid_t expected_uid, gid_t expected_gid) -{ - int ret; - struct stat st; - - ret = fstatat(dfd, path, &st, flags); - if (ret < 0) - return false; - - return st.st_uid == expected_uid && st.st_gid == expected_gid; -} - TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) { int open_tree_fd = -EBADF; From 6b65028e2b51c023a816eabffea88980fdd5564e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 30 Jul 2025 12:28:41 +0200 Subject: [PATCH 0137/1307] iomap: Fix broken data integrity guarantees for O_SYNC writes Commit d279c80e0bac ("iomap: inline iomap_dio_bio_opflags()") has broken the logic in iomap_dio_bio_iter() in a way that when the device does support FUA (or has no writeback cache) and the direct IO happens to freshly allocated or unwritten extents, we will *not* issue fsync after completing direct IO O_SYNC / O_DSYNC write because the IOMAP_DIO_WRITE_THROUGH flag stays mistakenly set. Fix the problem by clearing IOMAP_DIO_WRITE_THROUGH whenever we do not perform FUA write as it was originally intended. CC: John Garry CC: Ritesh Harjani (IBM) Fixes: d279c80e0bac ("iomap: inline iomap_dio_bio_opflags()") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/20250730102840.20470-2-jack@suse.cz Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: John Garry Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/direct-io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 6f25d4cfea9f..b84f6af2eb4c 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -363,14 +363,14 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) if (iomap->flags & IOMAP_F_SHARED) dio->flags |= IOMAP_DIO_COW; - if (iomap->flags & IOMAP_F_NEW) { + if (iomap->flags & IOMAP_F_NEW) need_zeroout = true; - } else if (iomap->type == IOMAP_MAPPED) { - if (iomap_dio_can_use_fua(iomap, dio)) - bio_opf |= REQ_FUA; - else - dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; - } + else if (iomap->type == IOMAP_MAPPED && + iomap_dio_can_use_fua(iomap, dio)) + bio_opf |= REQ_FUA; + + if (!(bio_opf & REQ_FUA)) + dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; /* * We can only do deferred completion for pure overwrites that From 542ede096e48436dbd70869640c0d88180565933 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 7 Aug 2025 10:50:15 -0700 Subject: [PATCH 0138/1307] fuse: keep inode->i_blkbits constant With fuse now using iomap for writeback handling, inode blkbits changes are problematic because iomap relies on inode->i_blkbits for its internal bitmap logic. Currently we change inode->i_blkbits in fuse to match the attr->blksize value passed in by the server. This commit keeps inode->i_blkbits constant in fuse. Any attr->blksize values passed in by the server will not update inode->i_blkbits. The client-side behavior for stat is unaffected, stat will still reflect the blocksize passed in by the server. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/20250807175015.515192-1-joannelkoong@gmail.com Fixes: ef7e7cbb32 ("fuse: use iomap for writeback") Signed-off-by: Christian Brauner --- fs/fuse/inode.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ecb869e895ab..67c2318bfc42 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -289,11 +289,6 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, } } - if (attr->blksize != 0) - inode->i_blkbits = ilog2(attr->blksize); - else - inode->i_blkbits = inode->i_sb->s_blocksize_bits; - /* * Don't set the sticky bit in i_mode, unless we want the VFS * to check permissions. This prevents failures due to the From 2319f9d0aa644eb9666c7be903078f50ecc2eb5b Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 11 Aug 2025 09:49:57 +0200 Subject: [PATCH 0139/1307] selftests/coredump: Remove the read() that fails the test Resolve a conflict between commit 6a68d28066b6 ("selftests/coredump: Fix "socket_detect_userspace_client" test failure") and commit 994dc26302ed ("selftests/coredump: fix build") The first commit adds a read() to wait for write() from another thread to finish. But the second commit removes the write(). Now that the two commits are in the same tree, the read() now gets EOF and the test fails. Remove this read() so that the test passes. Signed-off-by: Nam Cao Link: https://lore.kernel.org/20250811074957.4079616-1-namcao@linutronix.de Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/stackdump_test.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 5a5a7a5f7e1d..a4ac80bb1003 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -446,9 +446,6 @@ TEST_F(coredump, socket_detect_userspace_client) if (info.coredump_mask & PIDFD_COREDUMPED) goto out; - if (read(fd_coredump, &c, 1) < 1) - goto out; - exit_code = EXIT_SUCCESS; out: if (fd_peer_pidfd >= 0) From d5dd409812eca084e68208926bb629c8f708651f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 5 Jun 2025 12:38:52 +0200 Subject: [PATCH 0140/1307] drbd: Remove the open-coded page pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the network stack keeps a reference for too long, DRBD keeps references on a higher number of pages as a consequence. Fix all that by no longer relying on page reference counts dropping to an expected value. Instead, DRBD gives up its reference and lets the system handle everything else. While at it, remove the open-coded custom page pool mechanism and use the page_pool included in the kernel. Signed-off-by: Philipp Reisner Signed-off-by: Christoph Böhmwalder Tested-by: Eric Hagberg Link: https://lore.kernel.org/r/20250605103852.23029-1-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 39 +---- drivers/block/drbd/drbd_main.c | 59 ++----- drivers/block/drbd/drbd_receiver.c | 262 ++++------------------------- drivers/block/drbd/drbd_worker.c | 56 ++---- 4 files changed, 70 insertions(+), 346 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e21492981f7d..f6d6276974ee 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -380,6 +380,9 @@ enum { /* this is/was a write request */ __EE_WRITE, + /* hand back using mempool_free(e, drbd_buffer_page_pool) */ + __EE_RELEASE_TO_MEMPOOL, + /* this is/was a write same request */ __EE_WRITE_SAME, @@ -402,6 +405,7 @@ enum { #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) #define EE_SUBMITTED (1<<__EE_SUBMITTED) #define EE_WRITE (1<<__EE_WRITE) +#define EE_RELEASE_TO_MEMPOOL (1<<__EE_RELEASE_TO_MEMPOOL) #define EE_WRITE_SAME (1<<__EE_WRITE_SAME) #define EE_APPLICATION (1<<__EE_APPLICATION) #define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ) @@ -858,7 +862,6 @@ struct drbd_device { struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ struct list_head done_ee; /* need to send P_WRITE_ACK */ struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */ - struct list_head net_ee; /* zero-copy network send in progress */ struct list_head resync_reads; atomic_t pp_in_use; /* allocated from page pool */ @@ -1329,24 +1332,6 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t drbd_request_mempool; extern mempool_t drbd_ee_mempool; -/* drbd's page pool, used to buffer data received from the peer, - * or data requested by the peer. - * - * This does not have an emergency reserve. - * - * When allocating from this pool, it first takes pages from the pool. - * Only if the pool is depleted will try to allocate from the system. - * - * The assumption is that pages taken from this pool will be processed, - * and given back, "quickly", and then can be recycled, so we can avoid - * frequent calls to alloc_page(), and still will be able to make progress even - * under memory pressure. - */ -extern struct page *drbd_pp_pool; -extern spinlock_t drbd_pp_lock; -extern int drbd_pp_vacant; -extern wait_queue_head_t drbd_pp_wait; - /* We also need a standard (emergency-reserve backed) page pool * for meta data IO (activity log, bitmap). * We can keep it global, as long as it is used as "N pages at a time". @@ -1354,6 +1339,7 @@ extern wait_queue_head_t drbd_pp_wait; */ #define DRBD_MIN_POOL_PAGES 128 extern mempool_t drbd_md_io_page_pool; +extern mempool_t drbd_buffer_page_pool; /* We also need to make sure we get a bio * when we need it for housekeeping purposes */ @@ -1488,10 +1474,7 @@ extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, sector_t, unsigned int, unsigned int, gfp_t) __must_hold(local); -extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, - int); -#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0) -#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1) +extern void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *req); extern struct page *drbd_alloc_pages(struct drbd_peer_device *, unsigned int, bool); extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); extern int drbd_connected(struct drbd_peer_device *); @@ -1610,16 +1593,6 @@ static inline struct page *page_chain_next(struct page *page) for (; page && ({ n = page_chain_next(page); 1; }); page = n) -static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req) -{ - struct page *page = peer_req->pages; - page_chain_for_each(page) { - if (page_count(page) > 1) - return 1; - } - return 0; -} - static inline union drbd_state drbd_read_state(struct drbd_device *device) { struct drbd_resource *resource = device->resource; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 52724b79be30..c73376886e7a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -114,20 +114,10 @@ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t drbd_request_mempool; mempool_t drbd_ee_mempool; mempool_t drbd_md_io_page_pool; +mempool_t drbd_buffer_page_pool; struct bio_set drbd_md_io_bio_set; struct bio_set drbd_io_bio_set; -/* I do not use a standard mempool, because: - 1) I want to hand out the pre-allocated objects first. - 2) I want to be able to interrupt sleeping allocation with a signal. - Note: This is a single linked list, the next pointer is the private - member of struct page. - */ -struct page *drbd_pp_pool; -DEFINE_SPINLOCK(drbd_pp_lock); -int drbd_pp_vacant; -wait_queue_head_t drbd_pp_wait; - DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); static const struct block_device_operations drbd_ops = { @@ -1611,6 +1601,7 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, struct drbd_peer_request *peer_req) { + bool use_sendpage = !(peer_req->flags & EE_RELEASE_TO_MEMPOOL); struct page *page = peer_req->pages; unsigned len = peer_req->i.size; int err; @@ -1619,8 +1610,13 @@ static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - err = _drbd_send_page(peer_device, page, 0, l, - page_chain_next(page) ? MSG_MORE : 0); + if (likely(use_sendpage)) + err = _drbd_send_page(peer_device, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + else + err = _drbd_no_send_page(peer_device, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + if (err) return err; len -= l; @@ -1962,7 +1958,6 @@ void drbd_init_set_defaults(struct drbd_device *device) INIT_LIST_HEAD(&device->sync_ee); INIT_LIST_HEAD(&device->done_ee); INIT_LIST_HEAD(&device->read_ee); - INIT_LIST_HEAD(&device->net_ee); INIT_LIST_HEAD(&device->resync_reads); INIT_LIST_HEAD(&device->resync_work.list); INIT_LIST_HEAD(&device->unplug_work.list); @@ -2043,7 +2038,6 @@ void drbd_device_cleanup(struct drbd_device *device) D_ASSERT(device, list_empty(&device->sync_ee)); D_ASSERT(device, list_empty(&device->done_ee)); D_ASSERT(device, list_empty(&device->read_ee)); - D_ASSERT(device, list_empty(&device->net_ee)); D_ASSERT(device, list_empty(&device->resync_reads)); D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); D_ASSERT(device, list_empty(&device->resync_work.list)); @@ -2055,19 +2049,11 @@ void drbd_device_cleanup(struct drbd_device *device) static void drbd_destroy_mempools(void) { - struct page *page; - - while (drbd_pp_pool) { - page = drbd_pp_pool; - drbd_pp_pool = (struct page *)page_private(page); - __free_page(page); - drbd_pp_vacant--; - } - /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ bioset_exit(&drbd_io_bio_set); bioset_exit(&drbd_md_io_bio_set); + mempool_exit(&drbd_buffer_page_pool); mempool_exit(&drbd_md_io_page_pool); mempool_exit(&drbd_ee_mempool); mempool_exit(&drbd_request_mempool); @@ -2086,9 +2072,8 @@ static void drbd_destroy_mempools(void) static int drbd_create_mempools(void) { - struct page *page; const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count; - int i, ret; + int ret; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2125,6 +2110,10 @@ static int drbd_create_mempools(void) if (ret) goto Enomem; + ret = mempool_init_page_pool(&drbd_buffer_page_pool, number, 0); + if (ret) + goto Enomem; + ret = mempool_init_slab_pool(&drbd_request_mempool, number, drbd_request_cache); if (ret) @@ -2134,15 +2123,6 @@ static int drbd_create_mempools(void) if (ret) goto Enomem; - for (i = 0; i < number; i++) { - page = alloc_page(GFP_HIGHUSER); - if (!page) - goto Enomem; - set_page_private(page, (unsigned long)drbd_pp_pool); - drbd_pp_pool = page; - } - drbd_pp_vacant = number; - return 0; Enomem: @@ -2169,10 +2149,6 @@ static void drbd_release_all_peer_reqs(struct drbd_device *device) rr = drbd_free_peer_reqs(device, &device->done_ee); if (rr) drbd_err(device, "%d EEs in done list found!\n", rr); - - rr = drbd_free_peer_reqs(device, &device->net_ee); - if (rr) - drbd_err(device, "%d EEs in net list found!\n", rr); } /* caution. no locking. */ @@ -2863,11 +2839,6 @@ static int __init drbd_init(void) return err; } - /* - * allocate all necessary structs - */ - init_waitqueue_head(&drbd_pp_wait); - drbd_proc = NULL; /* play safe for drbd_cleanup */ idr_init(&drbd_devices); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 975024cf03c5..caaf2781136d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "drbd_int.h" #include "drbd_protocol.h" #include "drbd_req.h" @@ -63,182 +64,31 @@ static int e_end_block(struct drbd_work *, int); #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) -/* - * some helper functions to deal with single linked page lists, - * page->private being our "next" pointer. - */ - -/* If at least n pages are linked at head, get n pages off. - * Otherwise, don't modify head, and return NULL. - * Locking is the responsibility of the caller. - */ -static struct page *page_chain_del(struct page **head, int n) -{ - struct page *page; - struct page *tmp; - - BUG_ON(!n); - BUG_ON(!head); - - page = *head; - - if (!page) - return NULL; - - while (page) { - tmp = page_chain_next(page); - if (--n == 0) - break; /* found sufficient pages */ - if (tmp == NULL) - /* insufficient pages, don't use any of them. */ - return NULL; - page = tmp; - } - - /* add end of list marker for the returned list */ - set_page_private(page, 0); - /* actual return value, and adjustment of head */ - page = *head; - *head = tmp; - return page; -} - -/* may be used outside of locks to find the tail of a (usually short) - * "private" page chain, before adding it back to a global chain head - * with page_chain_add() under a spinlock. */ -static struct page *page_chain_tail(struct page *page, int *len) -{ - struct page *tmp; - int i = 1; - while ((tmp = page_chain_next(page))) { - ++i; - page = tmp; - } - if (len) - *len = i; - return page; -} - -static int page_chain_free(struct page *page) -{ - struct page *tmp; - int i = 0; - page_chain_for_each_safe(page, tmp) { - put_page(page); - ++i; - } - return i; -} - -static void page_chain_add(struct page **head, - struct page *chain_first, struct page *chain_last) -{ -#if 1 - struct page *tmp; - tmp = page_chain_tail(chain_first, NULL); - BUG_ON(tmp != chain_last); -#endif - - /* add chain to head */ - set_page_private(chain_last, (unsigned long)*head); - *head = chain_first; -} - -static struct page *__drbd_alloc_pages(struct drbd_device *device, - unsigned int number) +static struct page *__drbd_alloc_pages(unsigned int number) { struct page *page = NULL; struct page *tmp = NULL; unsigned int i = 0; - /* Yes, testing drbd_pp_vacant outside the lock is racy. - * So what. It saves a spin_lock. */ - if (drbd_pp_vacant >= number) { - spin_lock(&drbd_pp_lock); - page = page_chain_del(&drbd_pp_pool, number); - if (page) - drbd_pp_vacant -= number; - spin_unlock(&drbd_pp_lock); - if (page) - return page; - } - /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ for (i = 0; i < number; i++) { - tmp = alloc_page(GFP_TRY); + tmp = mempool_alloc(&drbd_buffer_page_pool, GFP_TRY); if (!tmp) - break; + goto fail; set_page_private(tmp, (unsigned long)page); page = tmp; } - - if (i == number) - return page; - - /* Not enough pages immediately available this time. - * No need to jump around here, drbd_alloc_pages will retry this - * function "soon". */ - if (page) { - tmp = page_chain_tail(page, NULL); - spin_lock(&drbd_pp_lock); - page_chain_add(&drbd_pp_pool, page, tmp); - drbd_pp_vacant += i; - spin_unlock(&drbd_pp_lock); + return page; +fail: + page_chain_for_each_safe(page, tmp) { + set_page_private(page, 0); + mempool_free(page, &drbd_buffer_page_pool); } return NULL; } -static void reclaim_finished_net_peer_reqs(struct drbd_device *device, - struct list_head *to_be_freed) -{ - struct drbd_peer_request *peer_req, *tmp; - - /* The EEs are always appended to the end of the list. Since - they are sent in order over the wire, they have to finish - in order. As soon as we see the first not finished we can - stop to examine the list... */ - - list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { - if (drbd_peer_req_has_active_page(peer_req)) - break; - list_move(&peer_req->w.list, to_be_freed); - } -} - -static void drbd_reclaim_net_peer_reqs(struct drbd_device *device) -{ - LIST_HEAD(reclaimed); - struct drbd_peer_request *peer_req, *t; - - spin_lock_irq(&device->resource->req_lock); - reclaim_finished_net_peer_reqs(device, &reclaimed); - spin_unlock_irq(&device->resource->req_lock); - list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(device, peer_req); -} - -static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) -{ - struct drbd_peer_device *peer_device; - int vnr; - - rcu_read_lock(); - idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { - struct drbd_device *device = peer_device->device; - if (!atomic_read(&device->pp_in_use_by_net)) - continue; - - kref_get(&device->kref); - rcu_read_unlock(); - drbd_reclaim_net_peer_reqs(device); - kref_put(&device->kref, drbd_destroy_device); - rcu_read_lock(); - } - rcu_read_unlock(); -} - /** * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) * @peer_device: DRBD device. @@ -263,9 +113,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int bool retry) { struct drbd_device *device = peer_device->device; - struct page *page = NULL; + struct page *page; struct net_conf *nc; - DEFINE_WAIT(wait); unsigned int mxb; rcu_read_lock(); @@ -273,37 +122,9 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int mxb = nc ? nc->max_buffers : 1000000; rcu_read_unlock(); - if (atomic_read(&device->pp_in_use) < mxb) - page = __drbd_alloc_pages(device, number); - - /* Try to keep the fast path fast, but occasionally we need - * to reclaim the pages we lended to the network stack. */ - if (page && atomic_read(&device->pp_in_use_by_net) > 512) - drbd_reclaim_net_peer_reqs(device); - - while (page == NULL) { - prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); - - drbd_reclaim_net_peer_reqs(device); - - if (atomic_read(&device->pp_in_use) < mxb) { - page = __drbd_alloc_pages(device, number); - if (page) - break; - } - - if (!retry) - break; - - if (signal_pending(current)) { - drbd_warn(device, "drbd_alloc_pages interrupted!\n"); - break; - } - - if (schedule_timeout(HZ/10) == 0) - mxb = UINT_MAX; - } - finish_wait(&drbd_pp_wait, &wait); + if (atomic_read(&device->pp_in_use) >= mxb) + schedule_timeout_interruptible(HZ / 10); + page = __drbd_alloc_pages(number); if (page) atomic_add(number, &device->pp_in_use); @@ -314,29 +135,25 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int * Is also used from inside an other spin_lock_irq(&resource->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ -static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) +static void drbd_free_pages(struct drbd_device *device, struct page *page) { - atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; - int i; + struct page *tmp; + int i = 0; if (page == NULL) return; - if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count) - i = page_chain_free(page); - else { - struct page *tmp; - tmp = page_chain_tail(page, &i); - spin_lock(&drbd_pp_lock); - page_chain_add(&drbd_pp_pool, page, tmp); - drbd_pp_vacant += i; - spin_unlock(&drbd_pp_lock); + page_chain_for_each_safe(page, tmp) { + set_page_private(page, 0); + if (page_count(page) == 1) + mempool_free(page, &drbd_buffer_page_pool); + else + put_page(page); + i++; } - i = atomic_sub_return(i, a); + i = atomic_sub_return(i, &device->pp_in_use); if (i < 0) - drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", - is_net ? "pp_in_use_by_net" : "pp_in_use", i); - wake_up(&drbd_pp_wait); + drbd_warn(device, "ASSERTION FAILED: pp_in_use: %d < 0\n", i); } /* @@ -380,6 +197,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto gfpflags_allow_blocking(gfp_mask)); if (!page) goto fail; + if (!mempool_is_saturated(&drbd_buffer_page_pool)) + peer_req->flags |= EE_RELEASE_TO_MEMPOOL; } memset(peer_req, 0, sizeof(*peer_req)); @@ -403,13 +222,12 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto return NULL; } -void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, - int is_net) +void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req) { might_sleep(); if (peer_req->flags & EE_HAS_DIGEST) kfree(peer_req->digest); - drbd_free_pages(device, peer_req->pages, is_net); + drbd_free_pages(device, peer_req->pages); D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); D_ASSERT(device, drbd_interval_empty(&peer_req->i)); if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { @@ -424,14 +242,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) LIST_HEAD(work_list); struct drbd_peer_request *peer_req, *t; int count = 0; - int is_net = list == &device->net_ee; spin_lock_irq(&device->resource->req_lock); list_splice_init(list, &work_list); spin_unlock_irq(&device->resource->req_lock); list_for_each_entry_safe(peer_req, t, &work_list, w.list) { - __drbd_free_peer_req(device, peer_req, is_net); + drbd_free_peer_req(device, peer_req); count++; } return count; @@ -443,18 +260,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) static int drbd_finish_peer_reqs(struct drbd_device *device) { LIST_HEAD(work_list); - LIST_HEAD(reclaimed); struct drbd_peer_request *peer_req, *t; int err = 0; spin_lock_irq(&device->resource->req_lock); - reclaim_finished_net_peer_reqs(device, &reclaimed); list_splice_init(&device->done_ee, &work_list); spin_unlock_irq(&device->resource->req_lock); - list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(device, peer_req); - /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_superseded. * all ignore the last argument. @@ -1975,7 +1787,7 @@ static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) data_size -= len; } kunmap(page); - drbd_free_pages(peer_device->device, page, 0); + drbd_free_pages(peer_device->device, page); return err; } @@ -5224,16 +5036,6 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) put_ldev(device); } - /* tcp_close and release of sendpage pages can be deferred. I don't - * want to use SO_LINGER, because apparently it can be deferred for - * more than 20 seconds (longest time I checked). - * - * Actually we don't care for exactly when the network stack does its - * put_page(), but release our reference on these pages right here. - */ - i = drbd_free_peer_reqs(device, &device->net_ee); - if (i) - drbd_info(device, "net_ee not empty, killed %u entries\n", i); i = atomic_read(&device->pp_in_use_by_net); if (i) drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); @@ -5980,8 +5782,6 @@ int drbd_ack_receiver(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - conn_reclaim_net_peer_reqs(connection); - if (test_and_clear_bit(SEND_PING, &connection->flags)) { if (drbd_send_ping(connection)) { drbd_err(connection, "drbd_send_ping has failed\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a6ea737b3b71..dea3e79d044f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1030,22 +1030,6 @@ int drbd_resync_finished(struct drbd_peer_device *peer_device) return 1; } -/* helper */ -static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) -{ - if (drbd_peer_req_has_active_page(peer_req)) { - /* This might happen if sendpage() has not finished */ - int i = PFN_UP(peer_req->i.size); - atomic_add(i, &device->pp_in_use_by_net); - atomic_sub(i, &device->pp_in_use); - spin_lock_irq(&device->resource->req_lock); - list_add_tail(&peer_req->w.list, &device->net_ee); - spin_unlock_irq(&device->resource->req_lock); - wake_up(&drbd_pp_wait); - } else - drbd_free_peer_req(device, peer_req); -} - /** * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST * @w: work object. @@ -1059,9 +1043,8 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { @@ -1074,12 +1057,12 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); } - dec_unacked(device); - - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } @@ -1120,9 +1103,8 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (get_ldev_if_state(device, D_FAILED)) { @@ -1155,13 +1137,12 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) /* update resync data with failure */ drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size); } - - dec_unacked(device); - - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } @@ -1176,9 +1157,8 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) int err, eq = 0; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (get_ldev(device)) { @@ -1220,12 +1200,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) if (drbd_ratelimit()) drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); } - - dec_unacked(device); - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block/ack() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } From 212c928d01e9ea1d1c46a114650b551da8ca823e Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Fri, 8 Aug 2025 15:44:43 -0600 Subject: [PATCH 0141/1307] ublk: don't quiesce in ublk_ch_release ublk_ch_release currently quiesces the device's request_queue while setting force_abort/fail_io. This avoids data races by preventing concurrent reads from the I/O path, but is not strictly needed - at this point, canceling is already set and guaranteed to be observed by any concurrently executing I/Os, so they will be handled properly even if the changes to force_abort/fail_io propagate to the I/O path later. Remove the quiesce/unquiesce calls from ublk_ch_release. This makes the writes to force_abort/fail_io concurrent with the reads in the I/O path, so make the accesses atomic. Before this change, the call to blk_mq_quiesce_queue was responsible for most (90%) of the runtime of ublk_ch_release. With that call eliminated, ublk_ch_release runs much faster. Here is a comparison of the total time spent in calls to ublk_ch_release when a server handling 128 devices exits, before and after this change: before: 1.11s after: 0.09s Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250808-ublk_quiesce2-v1-1-f87ade33fa3d@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6561d2a561fa..6b95cf48ae77 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1389,7 +1389,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, { blk_status_t res; - if (unlikely(ubq->fail_io)) + if (unlikely(READ_ONCE(ubq->fail_io))) return BLK_STS_TARGET; /* With recovery feature enabled, force_abort is set in @@ -1401,7 +1401,8 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, * Note: force_abort is guaranteed to be seen because it is set * before request queue is unqiuesced. */ - if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) + if (ublk_nosrv_should_queue_io(ubq) && + unlikely(READ_ONCE(ubq->force_abort))) return BLK_STS_IOERR; if (check_cancel && unlikely(ubq->canceling)) @@ -1644,7 +1645,6 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * Transition the device to the nosrv state. What exactly this * means depends on the recovery flags */ - blk_mq_quiesce_queue(disk->queue); if (ublk_nosrv_should_stop_dev(ub)) { /* * Allow any pending/future I/O to pass through quickly @@ -1652,8 +1652,7 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * waits for all pending I/O to complete */ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->force_abort = true; - blk_mq_unquiesce_queue(disk->queue); + WRITE_ONCE(ublk_get_queue(ub, i)->force_abort, true); ublk_stop_dev_unlocked(ub); } else { @@ -1663,9 +1662,8 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) } else { ub->dev_info.state = UBLK_S_DEV_FAIL_IO; for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->fail_io = true; + WRITE_ONCE(ublk_get_queue(ub, i)->fail_io, true); } - blk_mq_unquiesce_queue(disk->queue); } unlock: mutex_unlock(&ub->mutex); From 5058a62875e1916e5133a1639f0207ea2148c0bc Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 8 Aug 2025 09:52:15 -0600 Subject: [PATCH 0142/1307] ublk: check for unprivileged daemon on each I/O fetch Commit ab03a61c6614 ("ublk: have a per-io daemon instead of a per-queue daemon") allowed each ublk I/O to have an independent daemon task. However, nr_privileged_daemon is only computed based on whether the last I/O fetched in each ublk queue has an unprivileged daemon task. Fix this by checking whether every fetched I/O's daemon is privileged. Change nr_privileged_daemon from a count of queues to a boolean indicating whether any I/Os have an unprivileged daemon. Signed-off-by: Caleb Sander Mateos Fixes: ab03a61c6614 ("ublk: have a per-io daemon instead of a per-queue daemon") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250808155216.296170-1-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6b95cf48ae77..99abd67b708b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -235,7 +235,7 @@ struct ublk_device { struct completion completion; unsigned int nr_queues_ready; - unsigned int nr_privileged_daemon; + bool unprivileged_daemons; struct mutex cancel_mutex; bool canceling; pid_t ublksrv_tgid; @@ -1551,7 +1551,7 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; ub->nr_queues_ready = 0; - ub->nr_privileged_daemon = 0; + ub->unprivileged_daemons = false; ub->ublksrv_tgid = -1; } @@ -1978,12 +1978,10 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) __must_hold(&ub->mutex) { ubq->nr_io_ready++; - if (ublk_queue_ready(ubq)) { + if (ublk_queue_ready(ubq)) ub->nr_queues_ready++; - - if (capable(CAP_SYS_ADMIN)) - ub->nr_privileged_daemon++; - } + if (!ub->unprivileged_daemons && !capable(CAP_SYS_ADMIN)) + ub->unprivileged_daemons = true; if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { /* now we are ready for handling ublk io request */ @@ -2878,8 +2876,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, ublk_apply_params(ub); - /* don't probe partitions if any one ubq daemon is un-trusted */ - if (ub->nr_privileged_daemon != ub->nr_queues_ready) + /* don't probe partitions if any daemon task is un-trusted */ + if (ub->unprivileged_daemons) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); ublk_get_device(ub); From 8f3e4e87b0945aeea8b5a5aa43c419f4a1b4ca6a Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Mon, 11 Aug 2025 16:11:35 +0800 Subject: [PATCH 0143/1307] block, bfq: remove redundant __GFP_NOWARN Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made GFP_NOWAIT implicitly include __GFP_NOWARN. Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g., `GFP_NOWAIT | __GFP_NOWARN`) is now redundant. Let's clean up these redundant flags across subsystems. Reviewed-by: Yu Kuai Signed-off-by: Qianfeng Rong Link: https://lore.kernel.org/r/20250811081135.374315-1-rongqianfeng@vivo.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3bf76902f07f..50e51047e1fe 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5847,8 +5847,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, goto out; } - bfqq = kmem_cache_alloc_node(bfq_pool, - GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN, + bfqq = kmem_cache_alloc_node(bfq_pool, GFP_NOWAIT | __GFP_ZERO, bfqd->queue->node); if (bfqq) { From 196447c712dd486f4315356c572a1d13dd743f08 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Sat, 9 Aug 2025 22:13:58 +0800 Subject: [PATCH 0144/1307] blk-cgroup: remove redundant __GFP_NOWARN Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made GFP_NOWAIT implicitly include __GFP_NOWARN. Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g., `GFP_NOWAIT | __GFP_NOWARN`) is now redundant. Let's clean up these redundant flags across subsystems. Signed-off-by: Qianfeng Rong Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250809141358.168781-1-rongqianfeng@vivo.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5936db7f8475..fe9ebd6a2e14 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -394,7 +394,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk, /* allocate */ if (!new_blkg) { - new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT | __GFP_NOWARN); + new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT); if (unlikely(!new_blkg)) { ret = -ENOMEM; goto err_put_css; @@ -1467,7 +1467,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) spin_lock_init(&blkcg->lock); refcount_set(&blkcg->online_pin, 1); - INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN); + INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT); INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); @@ -1630,7 +1630,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) pd_prealloc = NULL; } else { pd = pol->pd_alloc_fn(disk, blkg->blkcg, - GFP_NOWAIT | __GFP_NOWARN); + GFP_NOWAIT); } if (!pd) { From 343dc5423bfe876c12bb80c56f5e44286e442a07 Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Fri, 8 Aug 2025 13:36:09 +0800 Subject: [PATCH 0145/1307] block: fix kobject double initialization in add_disk Device-mapper can call add_disk() multiple times for the same gendisk due to its two-phase creation process (dm create + dm load). This leads to kobject double initialization errors when the underlying iSCSI devices become temporarily unavailable and then reappear. However, if the first add_disk() call fails and is retried, the queue_kobj gets initialized twice, causing: kobject: kobject (ffff88810c27bb90): tried to init an initialized object, something is seriously wrong. Call Trace: dump_stack_lvl+0x5b/0x80 kobject_init.cold+0x43/0x51 blk_register_queue+0x46/0x280 add_disk_fwnode+0xb5/0x280 dm_setup_md_queue+0x194/0x1c0 table_load+0x297/0x2d0 ctl_ioctl+0x2a2/0x480 dm_ctl_ioctl+0xe/0x20 __x64_sys_ioctl+0xc7/0x110 do_syscall_64+0x72/0x390 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix this by separating kobject initialization from sysfs registration: - Initialize queue_kobj early during gendisk allocation - add_disk() only adds the already-initialized kobject to sysfs - del_gendisk() removes from sysfs but doesn't destroy the kobject - Final cleanup happens when the disk is released Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs") Reported-by: Li Lingfeng Closes: https://lore.kernel.org/all/83591d0b-2467-433c-bce0-5581298eb161@huawei.com/ Signed-off-by: Zheng Qixing Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Reviewed-by: Nilay Shroff Link: https://lore.kernel.org/r/20250808053609.3237836-1-zhengqixing@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 12 +++++------- block/blk.h | 1 + block/genhd.c | 2 ++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 396cded255ea..c5cf79a20842 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -847,7 +847,7 @@ static void blk_queue_release(struct kobject *kobj) /* nothing to do here, all data is associated with the parent gendisk */ } -static const struct kobj_type blk_queue_ktype = { +const struct kobj_type blk_queue_ktype = { .default_groups = blk_queue_attr_groups, .sysfs_ops = &queue_sysfs_ops, .release = blk_queue_release, @@ -875,15 +875,14 @@ int blk_register_queue(struct gendisk *disk) struct request_queue *q = disk->queue; int ret; - kobject_init(&disk->queue_kobj, &blk_queue_ktype); ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) - goto out_put_queue_kobj; + return ret; if (queue_is_mq(q)) { ret = blk_mq_sysfs_register(disk); if (ret) - goto out_put_queue_kobj; + goto out_del_queue_kobj; } mutex_lock(&q->sysfs_lock); @@ -934,8 +933,8 @@ int blk_register_queue(struct gendisk *disk) mutex_unlock(&q->sysfs_lock); if (queue_is_mq(q)) blk_mq_sysfs_unregister(disk); -out_put_queue_kobj: - kobject_put(&disk->queue_kobj); +out_del_queue_kobj: + kobject_del(&disk->queue_kobj); return ret; } @@ -986,5 +985,4 @@ void blk_unregister_queue(struct gendisk *disk) elevator_set_none(q); blk_debugfs_remove(disk); - kobject_put(&disk->queue_kobj); } diff --git a/block/blk.h b/block/blk.h index 0a2eccf28ca4..46f566f9b126 100644 --- a/block/blk.h +++ b/block/blk.h @@ -29,6 +29,7 @@ struct elevator_tags; /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) +extern const struct kobj_type blk_queue_ktype; extern struct dentry *blk_debugfs_root; struct blk_flush_queue { diff --git a/block/genhd.c b/block/genhd.c index c26733f6324b..9bbc38d12792 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1303,6 +1303,7 @@ static void disk_release(struct device *dev) disk_free_zone_resources(disk); xa_destroy(&disk->part_tbl); + kobject_put(&disk->queue_kobj); disk->queue->disk = NULL; blk_put_queue(disk->queue); @@ -1486,6 +1487,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, INIT_LIST_HEAD(&disk->slave_bdevs); #endif mutex_init(&disk->rqos_state_mutex); + kobject_init(&disk->queue_kobj, &blk_queue_ktype); return disk; out_erase_part0: From 593d9e4c3d634c370f226f55453c376bf43b3684 Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Mon, 11 Aug 2025 13:24:26 +0800 Subject: [PATCH 0146/1307] fs: fix incorrect lflags value in the move_mount syscall The lflags value used to look up from_path was overwritten by the one used to look up to_path. In other words, from_path was looked up with the wrong lflags value. Fix it. Fixes: f9fde814de37 ("fs: support getname_maybe_null() in move_mount()") Signed-off-by: Yuntao Wang Link: https://lore.kernel.org/20250811052426.129188-1-yuntao.wang@linux.dev [Christian Brauner : massage patch] Signed-off-by: Christian Brauner --- fs/namespace.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index ceb6b57e6a57..43f32ee9f95c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4551,20 +4551,10 @@ SYSCALL_DEFINE5(move_mount, if (flags & MOVE_MOUNT_SET_GROUP) mflags |= MNT_TREE_PROPAGATION; if (flags & MOVE_MOUNT_BENEATH) mflags |= MNT_TREE_BENEATH; - lflags = 0; - if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW; - if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; uflags = 0; - if (flags & MOVE_MOUNT_F_EMPTY_PATH) uflags = AT_EMPTY_PATH; - from_name = getname_maybe_null(from_pathname, uflags); - if (IS_ERR(from_name)) - return PTR_ERR(from_name); + if (flags & MOVE_MOUNT_T_EMPTY_PATH) + uflags = AT_EMPTY_PATH; - lflags = 0; - if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW; - if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; - uflags = 0; - if (flags & MOVE_MOUNT_T_EMPTY_PATH) uflags = AT_EMPTY_PATH; to_name = getname_maybe_null(to_pathname, uflags); if (IS_ERR(to_name)) return PTR_ERR(to_name); @@ -4577,11 +4567,24 @@ SYSCALL_DEFINE5(move_mount, to_path = fd_file(f_to)->f_path; path_get(&to_path); } else { + lflags = 0; + if (flags & MOVE_MOUNT_T_SYMLINKS) + lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_T_AUTOMOUNTS) + lflags |= LOOKUP_AUTOMOUNT; ret = filename_lookup(to_dfd, to_name, lflags, &to_path, NULL); if (ret) return ret; } + uflags = 0; + if (flags & MOVE_MOUNT_F_EMPTY_PATH) + uflags = AT_EMPTY_PATH; + + from_name = getname_maybe_null(from_pathname, uflags); + if (IS_ERR(from_name)) + return PTR_ERR(from_name); + if (!from_name && from_dfd >= 0) { CLASS(fd_raw, f_from)(from_dfd); if (fd_empty(f_from)) @@ -4590,6 +4593,11 @@ SYSCALL_DEFINE5(move_mount, return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags); } + lflags = 0; + if (flags & MOVE_MOUNT_F_SYMLINKS) + lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_F_AUTOMOUNTS) + lflags |= LOOKUP_AUTOMOUNT; ret = filename_lookup(from_dfd, from_name, lflags, &from_path, NULL); if (ret) return ret; From 6d3c3ca4c77e93660cce5819bf707f75df03e0c8 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 8 Aug 2025 15:28:47 +0200 Subject: [PATCH 0147/1307] module: Rename EXPORT_SYMBOL_GPL_FOR_MODULES to EXPORT_SYMBOL_FOR_MODULES Christoph suggested that the explicit _GPL_ can be dropped from the module namespace export macro, as it's intended for in-tree modules only. It would be possible to restrict it technically, but it was pointed out [2] that some cases of using an out-of-tree build of an in-tree module with the same name are legitimate. But in that case those also have to be GPL anyway so it's unnecessary to spell it out in the macro name. Link: https://lore.kernel.org/all/aFleJN_fE-RbSoFD@infradead.org/ [1] Link: https://lore.kernel.org/all/CAK7LNATRkZHwJGpojCnvdiaoDnP%2BaeUXgdey5sb_8muzdWTMkA@mail.gmail.com/ [2] Suggested-by: Christoph Hellwig Reviewed-by: Shivank Garg Acked-by: David Hildenbrand Acked-by: Nicolas Schier Reviewed-by: Daniel Gomez Reviewed-by: Christian Brauner Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/20250808-export_modules-v4-1-426945bcc5e1@suse.cz Signed-off-by: Christian Brauner --- Documentation/core-api/symbol-namespaces.rst | 11 ++++++----- drivers/tty/serial/8250/8250_rsa.c | 8 ++++---- fs/anon_inodes.c | 2 +- include/linux/export.h | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst index 32fc73dc5529..034898e81ba2 100644 --- a/Documentation/core-api/symbol-namespaces.rst +++ b/Documentation/core-api/symbol-namespaces.rst @@ -76,20 +76,21 @@ unit as preprocessor statement. The above example would then read:: within the corresponding compilation unit before the #include for . Typically it's placed before the first #include statement. -Using the EXPORT_SYMBOL_GPL_FOR_MODULES() macro ------------------------------------------------ +Using the EXPORT_SYMBOL_FOR_MODULES() macro +------------------------------------------- Symbols exported using this macro are put into a module namespace. This -namespace cannot be imported. +namespace cannot be imported. These exports are GPL-only as they are only +intended for in-tree modules. The macro takes a comma separated list of module names, allowing only those modules to access this symbol. Simple tail-globs are supported. For example:: - EXPORT_SYMBOL_GPL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*") + EXPORT_SYMBOL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*") -will limit usage of this symbol to modules whoes name matches the given +will limit usage of this symbol to modules whose name matches the given patterns. How to use Symbols exported in Namespaces diff --git a/drivers/tty/serial/8250/8250_rsa.c b/drivers/tty/serial/8250/8250_rsa.c index d34093cc03ad..12a65b79583c 100644 --- a/drivers/tty/serial/8250/8250_rsa.c +++ b/drivers/tty/serial/8250/8250_rsa.c @@ -147,7 +147,7 @@ void rsa_enable(struct uart_8250_port *up) if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) serial_out(up, UART_RSA_FRR, 0); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_enable, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_enable, "8250_base"); /* * Attempts to turn off the RSA FIFO and resets the RSA board back to 115kbps compat mode. It is @@ -179,7 +179,7 @@ void rsa_disable(struct uart_8250_port *up) up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16; uart_port_unlock_irq(&up->port); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_disable, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_disable, "8250_base"); void rsa_autoconfig(struct uart_8250_port *up) { @@ -192,7 +192,7 @@ void rsa_autoconfig(struct uart_8250_port *up) if (__rsa_enable(up)) up->port.type = PORT_RSA; } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_autoconfig, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_autoconfig, "8250_base"); void rsa_reset(struct uart_8250_port *up) { @@ -201,7 +201,7 @@ void rsa_reset(struct uart_8250_port *up) serial_out(up, UART_RSA_FRR, 0); } -EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_reset, "8250_base"); +EXPORT_SYMBOL_FOR_MODULES(rsa_reset, "8250_base"); #ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS #ifndef MODULE diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 1d847a939f29..180a458fc4f7 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -129,7 +129,7 @@ struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *n } return inode; } -EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); +EXPORT_SYMBOL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, diff --git a/include/linux/export.h b/include/linux/export.h index f35d03b4113b..a686fd0ba406 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -91,6 +91,6 @@ #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) -#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) +#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) #endif /* _LINUX_EXPORT_H */ From b26e2afb3834d4a61ce54c8484ff6014bef0b4b7 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Mon, 11 Aug 2025 16:27:16 +0300 Subject: [PATCH 0148/1307] ALSA: hda/realtek: Fix headset mic on HONOR BRB-X Add a PCI quirk to enable microphone input on the headphone jack on the HONOR BRB-X M1010 laptop. Signed-off-by: Vasiliy Kovalev Cc: Link: https://patch.msgid.link/20250811132716.45076-1-kovalev@altlinux.org Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 337e33a59de8..e90c4047ea62 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7140,6 +7140,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x1ee7, 0x2078, "HONOR BRB-X M1010", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1f66, 0x0105, "Ayaneo Portable Game Player", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x2014, 0x800a, "Positivo ARN50", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From d28b9d2925b4f773adb21b1fc20260ddc370fb13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Mon, 30 Jun 2025 11:00:53 +0200 Subject: [PATCH 0149/1307] drm/tests: Fix endian warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling with sparse enabled, this warning is thrown: warning: incorrect type in argument 2 (different base types) expected restricted __le32 const [usertype] *buf got unsigned int [usertype] *[assigned] buf Add a cast to fix it. Fixes: 453114319699 ("drm/format-helper: Add KUnit tests for drm_fb_xrgb8888_to_xrgb2101010()") Signed-off-by: José Expósito Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250630090054.353246-1-jose.exposito89@gmail.com --- drivers/gpu/drm/tests/drm_format_helper_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index 7299fa8971ce..86829e1cb7f0 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -1033,7 +1033,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) NULL : &result->dst_pitch; drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); - buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32)); + buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ From 05663d88fd0b8ee1c54ab2d5fb36f9b6a3ed37f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Mon, 30 Jun 2025 11:00:54 +0200 Subject: [PATCH 0150/1307] drm/tests: Fix drm_test_fb_xrgb8888_to_xrgb2101010() on big-endian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix failures on big-endian architectures on tests cases single_pixel_source_buffer, single_pixel_clip_rectangle, well_known_colors and destination_pitch. Fixes: 15bda1f8de5d ("drm/tests: Add calls to drm_fb_blit() on supported format conversion tests") Signed-off-by: José Expósito Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250630090054.353246-2-jose.exposito89@gmail.com --- drivers/gpu/drm/tests/drm_format_helper_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index 86829e1cb7f0..981dada8f3a8 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -1040,6 +1040,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) memset(buf, 0, dst_size); drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); + buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } From 5634c8cb298a7146b4e38873473e280b50e27a2c Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Fri, 18 Jul 2025 16:20:51 +0530 Subject: [PATCH 0151/1307] iosys-map: Fix undefined behavior in iosys_map_clear() The current iosys_map_clear() implementation reads the potentially uninitialized 'is_iomem' boolean field to decide which union member to clear. This causes undefined behavior when called on uninitialized structures, as 'is_iomem' may contain garbage values like 0xFF. UBSAN detects this as: UBSAN: invalid-load in include/linux/iosys-map.h:267 load of value 255 is not a valid value for type '_Bool' Fix by unconditionally clearing the entire structure with memset(), eliminating the need to read uninitialized data and ensuring all fields are set to known good values. Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14639 Fixes: 01fd30da0474 ("dma-buf: Add struct dma-buf-map for storing struct dma_buf.vaddr_ptr") Signed-off-by: Nitin Gote Reviewed-by: Andi Shyti Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250718105051.2709487-1-nitin.r.gote@intel.com --- include/linux/iosys-map.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index 4696abfd311c..3e85afe794c0 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -264,12 +264,7 @@ static inline bool iosys_map_is_set(const struct iosys_map *map) */ static inline void iosys_map_clear(struct iosys_map *map) { - if (map->is_iomem) { - map->vaddr_iomem = NULL; - map->is_iomem = false; - } else { - map->vaddr = NULL; - } + memset(map, 0, sizeof(*map)); } /** From 4fa7d880aeb8cdbdaa4fb72be3e53ac1d6bcc088 Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Mon, 21 Jul 2025 11:03:10 -0500 Subject: [PATCH 0152/1307] x86/bugs: Select best SRSO mitigation The SRSO bug can theoretically be used to conduct user->user or guest->guest attacks and requires a mitigation (namely IBPB instead of SBPB on context switch) for these. So mark SRSO as being applicable to the user->user and guest->guest attack vectors. Additionally, SRSO supports multiple mitigations which mitigate different potential attack vectors. Some CPUs are also immune to SRSO from certain attack vectors (like user->kernel). Use the specific attack vectors requiring mitigation to select the best SRSO mitigation to avoid unnecessary performance hits. Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250721160310.1804203-1-david.kaplan@amd.com --- .../admin-guide/hw-vuln/attack_vector_controls.rst | 2 +- arch/x86/kernel/cpu/bugs.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst index b4de16f5ec44..6dd0800146f6 100644 --- a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst +++ b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst @@ -214,7 +214,7 @@ Spectre_v1 X Spectre_v2 X X Spectre_v2_user X X * (Note 1) SRBDS X X X X -SRSO X X +SRSO X X X X SSB (Note 4) TAA X X X X * (Note 2) TSA X X X X diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b74bf937cd9f..2186a771b9fc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -386,7 +386,6 @@ static bool __init should_mitigate_vuln(unsigned int bug) case X86_BUG_SPECTRE_V2: case X86_BUG_RETBLEED: - case X86_BUG_SRSO: case X86_BUG_L1TF: case X86_BUG_ITS: return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || @@ -3184,8 +3183,18 @@ static void __init srso_select_mitigation(void) } if (srso_mitigation == SRSO_MITIGATION_AUTO) { - if (should_mitigate_vuln(X86_BUG_SRSO)) { + /* + * Use safe-RET if user->kernel or guest->host protection is + * required. Otherwise the 'microcode' mitigation is sufficient + * to protect the user->user and guest->guest vectors. + */ + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) && + !boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO))) { srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST)) { + srso_mitigation = SRSO_MITIGATION_MICROCODE; } else { srso_mitigation = SRSO_MITIGATION_NONE; return; From f87d597e8393f7038de046ed7f13bb176a4ead55 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 8 Aug 2025 11:20:49 +0200 Subject: [PATCH 0153/1307] MAINTAINERS: entry for DRM GPUVM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPUVM deserves a bit more coordination, also given the upcoming Rust work for GPUVM, hence add a dedicated maintainers entry for DRM GPUVM. Cc: Boris Brezillon Cc: Matthew Brost Cc: Thomas Hellström Cc: Alice Ryhl Acked-by: Thomas Hellström Acked-by: Matthew Brost Acked-by: Alice Ryhl Link: https://lore.kernel.org/r/20250808092432.461250-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..20ffd334e0a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8426,6 +8426,17 @@ T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/scheduler/ F: include/drm/gpu_scheduler.h +DRM GPUVM +M: Danilo Krummrich +R: Matthew Brost +R: Thomas Hellström +R: Alice Ryhl +L: dri-devel@lists.freedesktop.org +S: Supported +T: git https://gitlab.freedesktop.org/drm/misc/kernel.git +F: drivers/gpu/drm/drm_gpuvm.c +F: include/drm/drm_gpuvm.h + DRM LOG M: Jocelyn Falempe M: Javier Martinez Canillas From dfb36e4a8db0cd56f92d4cb445f54e85a9b40897 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 11 Aug 2025 10:11:47 -0400 Subject: [PATCH 0154/1307] futex: Use user_write_access_begin/_end() in futex_put_value() Commit cec199c5e39b ("futex: Implement FUTEX2_NUMA") introduced the futex_put_value() helper to write a value to the given user address. However, it uses user_read_access_begin() before the write. For architectures that differentiate between read and write accesses, like PowerPC, futex_put_value() fails with -EFAULT. Fix that by using the user_write_access_begin/user_write_access_end() pair instead. Fixes: cec199c5e39b ("futex: Implement FUTEX2_NUMA") Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250811141147.322261-1-longman@redhat.com --- kernel/futex/futex.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index c74eac572acd..2cd57096c38e 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -319,13 +319,13 @@ static __always_inline int futex_put_value(u32 val, u32 __user *to) { if (can_do_masked_user_access()) to = masked_user_access_begin(to); - else if (!user_read_access_begin(to, sizeof(*to))) + else if (!user_write_access_begin(to, sizeof(*to))) return -EFAULT; unsafe_put_user(val, to, Efault); - user_read_access_end(); + user_write_access_end(); return 0; Efault: - user_read_access_end(); + user_write_access_end(); return -EFAULT; } From 4b0ad968717eb1862b74c1f549e4225bf95c286f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Aug 2025 16:15:27 +0200 Subject: [PATCH 0155/1307] regulator: dt-bindings: infineon,ir38060: Add Guenter as maintainer from IBM The infineon,ir38060 binding never got maintainer and fake "Not Me" entry have been causing dt_binding_check warnings for 1.5 years now: regulator/infineon,ir38060.yaml: maintainers:0: 'Not Me.' does not match '@' Guenter agreed to keep an eye for this hardware and binding. Cc: Guenter Roeck Cc: Conor Dooley Cc: Andrew Jeffery Cc: Ninad Palsule Signed-off-by: Krzysztof Kozlowski Acked-by: Guenter Roeck Link: https://patch.msgid.link/20250811141526.168752-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/infineon,ir38060.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml b/Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml index e6ffbc2a2298..57ff6bf1e188 100644 --- a/Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml +++ b/Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Infineon Buck Regulators with PMBUS interfaces maintainers: - - Not Me. + - Guenter Roeck allOf: - $ref: regulator.yaml# From d8b96a79622e03813c221450498ca9742704ebf2 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Mon, 28 Jul 2025 01:39:57 +0800 Subject: [PATCH 0156/1307] blk-wbt: Optimize wbt_done() for non-throttled writes In the current implementation, the sync_cookie and last_cookie members of struct rq_wb are used only by read requests and not by non-throttled write requests. Based on this, we can optimize wbt_done() by removing one if condition check for non-throttled write requests. Signed-off-by: Tang Yizhou Reviewed-by: Jan Kara Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250727173959.160835-2-yizhou.tang@shopee.com Signed-off-by: Jens Axboe --- block/blk-wbt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index a50d4cd55f41..30886d44f6cd 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -248,13 +248,14 @@ static void wbt_done(struct rq_qos *rqos, struct request *rq) struct rq_wb *rwb = RQWB(rqos); if (!wbt_is_tracked(rq)) { - if (rwb->sync_cookie == rq) { - rwb->sync_issue = 0; - rwb->sync_cookie = NULL; - } + if (wbt_is_read(rq)) { + if (rwb->sync_cookie == rq) { + rwb->sync_issue = 0; + rwb->sync_cookie = NULL; + } - if (wbt_is_read(rq)) wb_timestamp(rwb, &rwb->last_comp); + } } else { WARN_ON_ONCE(rq == rwb->sync_cookie); __wbt_done(rqos, wbt_flags(rq)); From bccdfcd56d4b5b78d0d76f46d0e89a51330dfd75 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Mon, 28 Jul 2025 01:39:58 +0800 Subject: [PATCH 0157/1307] blk-wbt: Eliminate ambiguity in the comments of struct rq_wb In the current implementation, the last_issue and last_comp members of struct rq_wb are used only by read requests and not by non-throttled write requests. Therefore, eliminate the ambiguity here. Signed-off-by: Tang Yizhou Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20250727173959.160835-3-yizhou.tang@shopee.com Signed-off-by: Jens Axboe --- block/blk-wbt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 30886d44f6cd..eb8037bae0bd 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -85,8 +85,8 @@ struct rq_wb { u64 sync_issue; void *sync_cookie; - unsigned long last_issue; /* last non-throttled issue */ - unsigned long last_comp; /* last non-throttled comp */ + unsigned long last_issue; /* issue time of last read rq */ + unsigned long last_comp; /* completion time of last read rq */ unsigned long min_lat_nsec; struct rq_qos rqos; struct rq_wait rq_wait[WBT_NUM_RWQ]; From 0452f08395f8e7d04fe3744443dad396b3330d0c Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Mon, 28 Jul 2025 01:39:59 +0800 Subject: [PATCH 0158/1307] blk-wbt: doc: Update the doc of the wbt_lat_usec interface The symbol wb_window_usec cannot be found. Update the doc to reflect the latest implementation, in other words, the debugfs interface 'curr_win_nsec'. Signed-off-by: Tang Yizhou Reviewed-by: Jan Kara Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250727173959.160835-4-yizhou.tang@shopee.com Signed-off-by: Jens Axboe --- Documentation/ABI/stable/sysfs-block | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 803f578dc023..0ddffc9133d0 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -731,7 +731,7 @@ Contact: linux-block@vger.kernel.org Description: [RW] If the device is registered for writeback throttling, then this file shows the target minimum read latency. If this latency - is exceeded in a given window of time (see wb_window_usec), then + is exceeded in a given window of time (see curr_win_nsec), then the writeback throttling will start scaling back writes. Writing a value of '0' to this file disables the feature. Writing a value of '-1' to this file resets the value to the default From ddf7233fcab6c247379d0928d46cc316ee122229 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 5 Aug 2025 10:59:11 +0200 Subject: [PATCH 0159/1307] sched/ext: Fix invalid task state transitions on class switch When enabling a sched_ext scheduler, we may trigger invalid task state transitions, resulting in warnings like the following (which can be easily reproduced by running the hotplug selftest in a loop): sched_ext: Invalid task state transition 0 -> 3 for fish[770] WARNING: CPU: 18 PID: 787 at kernel/sched/ext.c:3862 scx_set_task_state+0x7c/0xc0 ... RIP: 0010:scx_set_task_state+0x7c/0xc0 ... Call Trace: scx_enable_task+0x11f/0x2e0 switching_to_scx+0x24/0x110 scx_enable.isra.0+0xd14/0x13d0 bpf_struct_ops_link_create+0x136/0x1a0 __sys_bpf+0x1edd/0x2c30 __x64_sys_bpf+0x21/0x30 do_syscall_64+0xbb/0x370 entry_SYSCALL_64_after_hwframe+0x77/0x7f This happens because we skip initialization for tasks that are already dead (with their usage counter set to zero), but we don't exclude them during the scheduling class transition phase. Fix this by also skipping dead tasks during class swiching, preventing invalid task state transitions. Fixes: a8532fac7b5d2 ("sched_ext: TASK_DEAD tasks must be switched into SCX on ops_enable") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7dedc9a16281..4ae32ef179dd 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5749,6 +5749,9 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) __setscheduler_class(p->policy, p->prio); struct sched_enq_and_set_ctx ctx; + if (!tryget_task_struct(p)) + continue; + if (old_class != new_class && p->se.sched_delayed) dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); @@ -5761,6 +5764,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) sched_enq_and_set_task(&ctx); check_class_changed(task_rq(p), p, old_class, p->prio); + put_task_struct(p); } scx_task_iter_stop(&sti); percpu_up_write(&scx_fork_rwsem); From e69980bd16f264581c3f606bae987e54f0ba8c4a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 17:04:12 +0800 Subject: [PATCH 0160/1307] selftests/sched_ext: Remove duplicate sched.h header ./tools/testing/selftests/sched_ext/hotplug.c: sched.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22941 Signed-off-by: Jiapeng Chong Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/hotplug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 1c9ceb661c43..0cfbb111a2d0 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include From e91a158b694d7f4bd937763dde79ed0afa472d8a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 8 Aug 2025 15:37:14 -0400 Subject: [PATCH 0161/1307] intel_idle: Allow loading ACPI tables for any family There is no reason to limit intel_idle's loading of ACPI tables to family 6. Upcoming Intel processors are not in family 6. Below "Fixes" really means "applies cleanly until". That syntax commit didn't change the previous logic, but shows this patch applies back 5-years. Fixes: 4a9f45a0533f ("intel_idle: Convert to new X86 CPU match macros") Signed-off-by: Len Brown Link: https://patch.msgid.link/06101aa4fe784e5b0be1cb2c0bdd9afcf16bd9d4.1754681697.git.len.brown@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 73747d20df85..91a7b7e7c0c8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1679,7 +1679,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { }; static const struct x86_cpu_id intel_mwait_ids[] __initconst = { - X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), + X86_MATCH_VENDOR_FAM_FEATURE(INTEL, X86_FAMILY_ANY, X86_FEATURE_MWAIT, NULL), {} }; From fa3fa55de0d6177fdcaf6fc254f13cc8f33c3eed Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 11 Aug 2025 17:03:11 +0200 Subject: [PATCH 0162/1307] cpuidle: governors: menu: Avoid using invalid recent intervals data Marc has reported that commit 85975daeaa4d ("cpuidle: menu: Avoid discarding useful information") caused the number of wakeup interrupts to increase on an idle system [1], which was not expected to happen after merely allowing shallower idle states to be selected by the governor in some cases. However, on the system in question, all of the idle states deeper than WFI are rejected by the driver due to a firmware issue [2]. This causes the governor to only consider the recent interval duriation data corresponding to attempts to enter WFI that are successful and the recent invervals table is filled with values lower than the scheduler tick period. Consequently, the governor predicts an idle duration below the scheduler tick period length and avoids stopping the tick more often which leads to the observed symptom. Address it by modifying the governor to update the recent intervals table also when entering the previously selected idle state fails, so it knows that the short idle intervals might have been the minority had the selected idle states been actually entered every time. Fixes: 85975daeaa4d ("cpuidle: menu: Avoid discarding useful information") Link: https://lore.kernel.org/linux-pm/86o6sv6n94.wl-maz@kernel.org/ [1] Link: https://lore.kernel.org/linux-pm/7ffcb716-9a1b-48c2-aaa4-469d0df7c792@arm.com/ [2] Signed-off-by: Rafael J. Wysocki Tested-by: Christian Loehle Tested-by: Marc Zyngier Reviewed-by: Christian Loehle Link: https://patch.msgid.link/2793874.mvXUDI8C0e@rafael.j.wysocki --- drivers/cpuidle/governors/menu.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 52d5d26fc7c6..81306612a5c6 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -97,6 +97,14 @@ static inline int which_bucket(u64 duration_ns) static DEFINE_PER_CPU(struct menu_device, menu_devices); +static void menu_update_intervals(struct menu_device *data, unsigned int interval_us) +{ + /* Update the repeating-pattern data. */ + data->intervals[data->interval_ptr++] = interval_us; + if (data->interval_ptr >= INTERVALS) + data->interval_ptr = 0; +} + static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); /* @@ -222,6 +230,14 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (data->needs_update) { menu_update(drv, dev); data->needs_update = 0; + } else if (!dev->last_residency_ns) { + /* + * This happens when the driver rejects the previously selected + * idle state and returns an error, so update the recent + * intervals table to prevent invalid information from being + * used going forward. + */ + menu_update_intervals(data, UINT_MAX); } /* Find the shortest expected idle interval. */ @@ -482,10 +498,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->correction_factor[data->bucket] = new_factor; - /* update the repeating-pattern data */ - data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns); - if (data->interval_ptr >= INTERVALS) - data->interval_ptr = 0; + menu_update_intervals(data, ktime_to_us(measured_ns)); } /** From 3ead77989c20cb2d774a3b6045d7a928b6fb53ed Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 8 Aug 2025 07:51:22 -0700 Subject: [PATCH 0163/1307] cpufreq: intel_pstate: Support Clearwater Forest OOB mode Prevent intel_pstate from loading when OOB (Out Of Band) P-states mode is enabled. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250808145122.4057208-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 06a1c7dd081f..f366d35c5840 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2793,6 +2793,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs), X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs), X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs), + X86_MATCH(INTEL_ATOM_DARKMONT_X, core_funcs), {} }; #endif From 31cd31c9e17ece125aad27259501a2af69ccb020 Mon Sep 17 00:00:00 2001 From: Fushuai Wang Date: Mon, 11 Aug 2025 11:50:44 -0700 Subject: [PATCH 0164/1307] x86/fpu: Fix NULL dereference in avx512_status() Problem ------- With CONFIG_X86_DEBUG_FPU enabled, reading /proc/[kthread]/arch_status causes a warning and a NULL pointer dereference. This is because the AVX-512 timestamp code uses x86_task_fpu() but doesn't check it for NULL. CONFIG_X86_DEBUG_FPU addles that function for kernel threads (PF_KTHREAD specifically), making it return NULL. The point of the warning was to ensure that kernel threads only access task->fpu after going through kernel_fpu_begin()/_end(). Note: all kernel tasks exposed in /proc have a valid task->fpu. Solution -------- One option is to silence the warning and check for NULL from x86_task_fpu(). However, that warning is fairly fresh and seems like a defense against misuse of the FPU state in kernel threads. Instead, stop outputting AVX-512_elapsed_ms for kernel threads altogether. The data was garbage anyway because avx512_timestamp is only updated for user threads, not kernel threads. If anyone ever wants to track kernel thread AVX-512 use, they can come back later and do it properly, separate from this bug fix. [ dhansen: mostly rewrite changelog ] Fixes: 22aafe3bcb67 ("x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks") Co-developed-by: Sohil Mehta Signed-off-by: Sohil Mehta Signed-off-by: Fushuai Wang Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250811185044.2227268-1-sohil.mehta%40intel.com --- arch/x86/kernel/fpu/xstate.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 12ed75c1b567..28e4fd65c9da 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1881,19 +1881,20 @@ long fpu_xstate_prctl(int option, unsigned long arg2) #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 - * use in the task. + * use in the task. Report -1 if no AVX-512 usage. */ static void avx512_status(struct seq_file *m, struct task_struct *task) { - unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); - long delta; + unsigned long timestamp; + long delta = -1; - if (!timestamp) { - /* - * Report -1 if no AVX512 usage - */ - delta = -1; - } else { + /* AVX-512 usage is not tracked for kernel threads. Don't report anything. */ + if (task->flags & (PF_KTHREAD | PF_USER_WORKER)) + return; + + timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); + + if (timestamp) { delta = (long)(jiffies - timestamp); /* * Cap to LONG_MAX if time difference > LONG_MAX From 4e5b705cc6147f0b9173c6219079f41416bdd3c0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 23 Jul 2025 10:29:12 -0500 Subject: [PATCH 0165/1307] iio: accel: sca3300: fix uninitialized iio scan data Fix potential leak of uninitialized stack data to userspace by ensuring that the `channels` array is zeroed before use. Fixes: edeb67fbbf4b ("iio: accel: sca3300: use IIO_DECLARE_BUFFER_WITH_TS") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250723-iio-accel-sca3300-fix-uninitialized-iio-scan-data-v1-1-12dbfb3307b7@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/sca3300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/sca3300.c b/drivers/iio/accel/sca3300.c index bda370c0f660..8380b237831c 100644 --- a/drivers/iio/accel/sca3300.c +++ b/drivers/iio/accel/sca3300.c @@ -477,7 +477,7 @@ static irqreturn_t sca3300_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct sca3300_data *data = iio_priv(indio_dev); int bit, ret, val, i = 0; - IIO_DECLARE_BUFFER_WITH_TS(s16, channels, SCA3300_SCAN_MAX); + IIO_DECLARE_BUFFER_WITH_TS(s16, channels, SCA3300_SCAN_MAX) = { }; iio_for_each_active_channel(indio_dev, bit) { ret = sca3300_read_reg(data, indio_dev->channels[bit].address, &val); From de18e978d0cda23e4c102e18092b63a5b0b3a800 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 22 Jul 2025 15:54:21 -0500 Subject: [PATCH 0166/1307] iio: proximity: isl29501: fix buffered read on big-endian systems Fix passing a u32 value as a u16 buffer scan item. This works on little- endian systems, but not on big-endian systems. A new local variable is introduced for getting the register value and the array is changed to a struct to make the data layout more explicit rather than just changing the type and having to recalculate the proper length needed for the timestamp. Fixes: 1c28799257bc ("iio: light: isl29501: Add support for the ISL29501 ToF sensor.") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250722-iio-use-more-iio_declare_buffer_with_ts-7-v2-1-d3ebeb001ed3@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/isl29501.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/iio/proximity/isl29501.c b/drivers/iio/proximity/isl29501.c index d1510fe24050..f69db6f2f380 100644 --- a/drivers/iio/proximity/isl29501.c +++ b/drivers/iio/proximity/isl29501.c @@ -938,12 +938,18 @@ static irqreturn_t isl29501_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct isl29501_private *isl29501 = iio_priv(indio_dev); const unsigned long *active_mask = indio_dev->active_scan_mask; - u32 buffer[4] __aligned(8) = {}; /* 1x16-bit + naturally aligned ts */ + u32 value; + struct { + u16 data; + aligned_s64 ts; + } scan = { }; - if (test_bit(ISL29501_DISTANCE_SCAN_INDEX, active_mask)) - isl29501_register_read(isl29501, REG_DISTANCE, buffer); + if (test_bit(ISL29501_DISTANCE_SCAN_INDEX, active_mask)) { + isl29501_register_read(isl29501, REG_DISTANCE, &value); + scan.data = value; + } - iio_push_to_buffers_with_timestamp(indio_dev, buffer, pf->timestamp); + iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp); iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; From 1cfb22c277c7274f54babaa5b416dfbc00181e16 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 22 Jul 2025 14:20:07 -0500 Subject: [PATCH 0167/1307] iio: adc: ad7173: prevent scan if too many setups requested Add a check to ad7173_update_scan_mode() to ensure that we didn't exceed the maximum number of unique channel configurations. In the AD7173 family of chips, there are some chips that have 16 CHANNELx registers but only 8 setups (combination of CONFIGx, FILTERx, GAINx and OFFSETx registers). Since commit 92c247216918 ("iio: adc: ad7173: fix num_slots"), it is possible to have more than 8 channels enabled in a scan at the same time, so it is possible to get a bad configuration when more than 8 channels are using unique configurations. This happens because the algorithm to allocate the setup slots only takes into account which slot has been least recently used and doesn't know about the maximum number of slots available. Since the algorithm to allocate the setup slots is quite complex, it is simpler to check after the fact if the current state is valid or not. So this patch adds a check in ad7173_update_scan_mode() after setting up all of the configurations to make sure that the actual setup still matches the requested setup for each enabled channel. If not, we prevent the scan from being enabled and return an error. The setup comparison in ad7173_setup_equal() is refactored to a separate function since we need to call it in two places now. Fixes: 92c247216918 ("iio: adc: ad7173: fix num_slots") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250722-iio-adc-ad7173-fix-setup-use-limits-v2-1-8e96bdb72a9c@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7173.c | 87 ++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 12 deletions(-) diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index 4413207be28f..683146e83ab2 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -200,7 +200,7 @@ struct ad7173_channel_config { /* * Following fields are used to compare equality. If you * make adaptations in it, you most likely also have to adapt - * ad7173_find_live_config(), too. + * ad7173_is_setup_equal(), too. */ struct_group(config_props, bool bipolar; @@ -561,12 +561,19 @@ static void ad7173_reset_usage_cnts(struct ad7173_state *st) st->config_usage_counter = 0; } -static struct ad7173_channel_config * -ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *cfg) +/** + * ad7173_is_setup_equal - Compare two channel setups + * @cfg1: First channel configuration + * @cfg2: Second channel configuration + * + * Compares all configuration options that affect the registers connected to + * SETUP_SEL, namely CONFIGx, FILTERx, GAINx and OFFSETx. + * + * Returns: true if the setups are identical, false otherwise + */ +static bool ad7173_is_setup_equal(const struct ad7173_channel_config *cfg1, + const struct ad7173_channel_config *cfg2) { - struct ad7173_channel_config *cfg_aux; - int i; - /* * This is just to make sure that the comparison is adapted after * struct ad7173_channel_config was changed. @@ -579,14 +586,22 @@ ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *c u8 ref_sel; })); + return cfg1->bipolar == cfg2->bipolar && + cfg1->input_buf == cfg2->input_buf && + cfg1->odr == cfg2->odr && + cfg1->ref_sel == cfg2->ref_sel; +} + +static struct ad7173_channel_config * +ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *cfg) +{ + struct ad7173_channel_config *cfg_aux; + int i; + for (i = 0; i < st->num_channels; i++) { cfg_aux = &st->channels[i].cfg; - if (cfg_aux->live && - cfg->bipolar == cfg_aux->bipolar && - cfg->input_buf == cfg_aux->input_buf && - cfg->odr == cfg_aux->odr && - cfg->ref_sel == cfg_aux->ref_sel) + if (cfg_aux->live && ad7173_is_setup_equal(cfg, cfg_aux)) return cfg_aux; } return NULL; @@ -1228,7 +1243,7 @@ static int ad7173_update_scan_mode(struct iio_dev *indio_dev, const unsigned long *scan_mask) { struct ad7173_state *st = iio_priv(indio_dev); - int i, ret; + int i, j, k, ret; for (i = 0; i < indio_dev->num_channels; i++) { if (test_bit(i, scan_mask)) @@ -1239,6 +1254,54 @@ static int ad7173_update_scan_mode(struct iio_dev *indio_dev, return ret; } + /* + * On some chips, there are more channels that setups, so if there were + * more unique setups requested than the number of available slots, + * ad7173_set_channel() will have written over some of the slots. We + * can detect this by making sure each assigned cfg_slot matches the + * requested configuration. If it doesn't, we know that the slot was + * overwritten by a different channel. + */ + for_each_set_bit(i, scan_mask, indio_dev->num_channels) { + const struct ad7173_channel_config *cfg1, *cfg2; + + cfg1 = &st->channels[i].cfg; + + for_each_set_bit(j, scan_mask, indio_dev->num_channels) { + cfg2 = &st->channels[j].cfg; + + /* + * Only compare configs that are assigned to the same + * SETUP_SEL slot and don't compare channel to itself. + */ + if (i == j || cfg1->cfg_slot != cfg2->cfg_slot) + continue; + + /* + * If we find two different configs trying to use the + * same SETUP_SEL slot, then we know that the that we + * have too many unique configurations requested for + * the available slots and at least one was overwritten. + */ + if (!ad7173_is_setup_equal(cfg1, cfg2)) { + /* + * At this point, there isn't a way to tell + * which setups are actually programmed in the + * ADC anymore, so we could read them back to + * see, but it is simpler to just turn off all + * of the live flags so that everything gets + * reprogramed on the next attempt read a sample. + */ + for (k = 0; k < st->num_channels; k++) + st->channels[k].cfg.live = false; + + dev_err(&st->sd.spi->dev, + "Too many unique channel configurations requested for scan\n"); + return -EINVAL; + } + } + } + return 0; } From ae5bc07ec9f73a41734270ef3f800c5c8a7e0ad3 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 21 Jul 2025 18:04:04 -0500 Subject: [PATCH 0168/1307] iio: temperature: maxim_thermocouple: use DMA-safe buffer for spi_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace using stack-allocated buffers with a DMA-safe buffer for use with spi_read(). This allows the driver to be safely used with DMA-enabled SPI controllers. The buffer array is also converted to a struct with a union to make the usage of the memory in the buffer more clear and ensure proper alignment. Fixes: 1f25ca11d84a ("iio: temperature: add support for Maxim thermocouple chips") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250721-iio-use-more-iio_declare_buffer_with_ts-3-v2-1-0c68d41ccf6c@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/maxim_thermocouple.c | 26 ++++++++++++-------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index cae8e84821d7..205939680fd4 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -121,8 +122,15 @@ struct maxim_thermocouple_data { struct spi_device *spi; const struct maxim_thermocouple_chip *chip; char tc_type; - - u8 buffer[16] __aligned(IIO_DMA_MINALIGN); + /* Buffer for reading up to 2 hardware channels. */ + struct { + union { + __be16 raw16; + __be32 raw32; + __be16 raw[2]; + }; + aligned_s64 timestamp; + } buffer __aligned(IIO_DMA_MINALIGN); }; static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, @@ -130,18 +138,16 @@ static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, { unsigned int storage_bytes = data->chip->read_size; unsigned int shift = chan->scan_type.shift + (chan->address * 8); - __be16 buf16; - __be32 buf32; int ret; switch (storage_bytes) { case 2: - ret = spi_read(data->spi, (void *)&buf16, storage_bytes); - *val = be16_to_cpu(buf16); + ret = spi_read(data->spi, &data->buffer.raw16, storage_bytes); + *val = be16_to_cpu(data->buffer.raw16); break; case 4: - ret = spi_read(data->spi, (void *)&buf32, storage_bytes); - *val = be32_to_cpu(buf32); + ret = spi_read(data->spi, &data->buffer.raw32, storage_bytes); + *val = be32_to_cpu(data->buffer.raw32); break; default: ret = -EINVAL; @@ -166,9 +172,9 @@ static irqreturn_t maxim_thermocouple_trigger_handler(int irq, void *private) struct maxim_thermocouple_data *data = iio_priv(indio_dev); int ret; - ret = spi_read(data->spi, data->buffer, data->chip->read_size); + ret = spi_read(data->spi, data->buffer.raw, data->chip->read_size); if (!ret) { - iio_push_to_buffers_with_ts(indio_dev, data->buffer, + iio_push_to_buffers_with_ts(indio_dev, &data->buffer, sizeof(data->buffer), iio_get_time_ns(indio_dev)); } From 197e299aae42ffa19028eaea92b2f30dd9fb8445 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sat, 26 Jul 2025 11:28:48 -0500 Subject: [PATCH 0169/1307] iio: adc: ad7124: fix channel lookup in syscalib functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix possible incorrect channel lookup in the syscalib functions by using the correct channel address instead of the channel number. In the ad7124 driver, the channel field of struct iio_chan_spec is the input pin number of the positive input of the channel. This can be, but is not always the same as the index in the channels array. The correct index in the channels array is stored in the address field (and also scan_index). We use the address field to perform the correct lookup. Fixes: 47036a03a303 ("iio: adc: ad7124: Implement internal calibration at probe time") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250726-iio-adc-ad7124-fix-channel-lookup-in-syscalib-v1-1-b9d14bb684af@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 9808df2e9242..4d8c6bafd1c3 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -849,7 +849,7 @@ enum { static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan_spec *chan) { struct device *dev = &st->sd.spi->dev; - struct ad7124_channel *ch = &st->channels[chan->channel]; + struct ad7124_channel *ch = &st->channels[chan->address]; int ret; if (ch->syscalib_mode == AD7124_SYSCALIB_ZERO_SCALE) { @@ -865,8 +865,8 @@ static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan if (ret < 0) return ret; - dev_dbg(dev, "offset for channel %d after zero-scale calibration: 0x%x\n", - chan->channel, ch->cfg.calibration_offset); + dev_dbg(dev, "offset for channel %lu after zero-scale calibration: 0x%x\n", + chan->address, ch->cfg.calibration_offset); } else { ch->cfg.calibration_gain = st->gain_default; @@ -880,8 +880,8 @@ static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan if (ret < 0) return ret; - dev_dbg(dev, "gain for channel %d after full-scale calibration: 0x%x\n", - chan->channel, ch->cfg.calibration_gain); + dev_dbg(dev, "gain for channel %lu after full-scale calibration: 0x%x\n", + chan->address, ch->cfg.calibration_gain); } return 0; @@ -924,7 +924,7 @@ static int ad7124_set_syscalib_mode(struct iio_dev *indio_dev, { struct ad7124_state *st = iio_priv(indio_dev); - st->channels[chan->channel].syscalib_mode = mode; + st->channels[chan->address].syscalib_mode = mode; return 0; } @@ -934,7 +934,7 @@ static int ad7124_get_syscalib_mode(struct iio_dev *indio_dev, { struct ad7124_state *st = iio_priv(indio_dev); - return st->channels[chan->channel].syscalib_mode; + return st->channels[chan->address].syscalib_mode; } static const struct iio_enum ad7124_syscalib_mode_enum = { From dfdc31e7ccf3ac1d5ec01d5120c71e14745e3dd8 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Fri, 8 Aug 2025 09:40:10 +0200 Subject: [PATCH 0170/1307] iio: imu: inv_icm42600: change invalid data error to -EBUSY Temperature sensor returns the temperature of the mechanical parts of the chip. If both accel and gyro are off, the temperature sensor is also automatically turned off and returns invalid data. In this case, returning -EBUSY error code is better then -EINVAL and indicates userspace that it needs to retry reading temperature in another context. Fixes: bc3eb0207fb5 ("iio: imu: inv_icm42600: add temperature sensor support") Signed-off-by: Jean-Baptiste Maneyrol Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Reviewed-by: Sean Nyekjaer Link: https://patch.msgid.link/20250808-inv-icm42600-change-temperature-error-code-v1-1-986fbf63b77d@tdk.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c index 8b15afca498c..271a4788604a 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_temp.c @@ -32,8 +32,12 @@ static int inv_icm42600_temp_read(struct inv_icm42600_state *st, s16 *temp) goto exit; *temp = (s16)be16_to_cpup(raw); + /* + * Temperature data is invalid if both accel and gyro are off. + * Return -EBUSY in this case. + */ if (*temp == INV_ICM42600_DATA_INVALID) - ret = -EINVAL; + ret = -EBUSY; exit: mutex_unlock(&st->lock); From fde578c86281f27b182680c7642836a0dbbd0be7 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 31 Jul 2025 17:48:06 +0200 Subject: [PATCH 0171/1307] rust: alloc: replace aligned_size() with Kmalloc::aligned_layout() aligned_size() dates back to when Rust did support kmalloc() only, but is now used in ReallocFunc::call() and hence for all allocators. However, the additional padding applied by aligned_size() is only required by the kmalloc() allocator backend. Hence, replace aligned_size() with Kmalloc::aligned_layout() and use it for the affected allocators, i.e. kmalloc() and kvmalloc(), only. While at it, make Kmalloc::aligned_layout() public, such that Rust abstractions, which have to call subsystem specific kmalloc() based allocation primitives directly, can make use of it. Fixes: 8a799831fc63 ("rust: alloc: implement `ReallocFunc`") Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250731154919.4132-2-dakr@kernel.org [ Remove `const` from Kmalloc::aligned_layout(). - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/allocator.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs index aa2dfa9dca4c..2692cf90c948 100644 --- a/rust/kernel/alloc/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -43,17 +43,6 @@ /// For more details see [self]. pub struct KVmalloc; -/// Returns a proper size to alloc a new object aligned to `new_layout`'s alignment. -fn aligned_size(new_layout: Layout) -> usize { - // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first. - let layout = new_layout.pad_to_align(); - - // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()` - // which together with the slab guarantees means the `krealloc` will return a properly aligned - // object (see comments in `kmalloc()` for more information). - layout.size() -} - /// # Invariants /// /// One of the following: `krealloc`, `vrealloc`, `kvrealloc`. @@ -88,7 +77,7 @@ unsafe fn call( old_layout: Layout, flags: Flags, ) -> Result, AllocError> { - let size = aligned_size(layout); + let size = layout.size(); let ptr = match ptr { Some(ptr) => { if old_layout.size() == 0 { @@ -123,6 +112,17 @@ unsafe fn call( } } +impl Kmalloc { + /// Returns a [`Layout`] that makes [`Kmalloc`] fulfill the requested size and alignment of + /// `layout`. + pub fn aligned_layout(layout: Layout) -> Layout { + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of + // `layout.align()` which together with the slab guarantees means that `Kmalloc` will return + // a properly aligned object (see comments in `kmalloc()` for more information). + layout.pad_to_align() + } +} + // SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that // - memory remains valid until it is explicitly freed, // - passing a pointer to a valid memory allocation is OK, @@ -135,6 +135,8 @@ unsafe fn realloc( old_layout: Layout, flags: Flags, ) -> Result, AllocError> { + let layout = Kmalloc::aligned_layout(layout); + // SAFETY: `ReallocFunc::call` has the same safety requirements as `Allocator::realloc`. unsafe { ReallocFunc::KREALLOC.call(ptr, layout, old_layout, flags) } } @@ -176,6 +178,10 @@ unsafe fn realloc( old_layout: Layout, flags: Flags, ) -> Result, AllocError> { + // `KVmalloc` may use the `Kmalloc` backend, hence we have to enforce a `Kmalloc` + // compatible layout. + let layout = Kmalloc::aligned_layout(layout); + // TODO: Support alignments larger than PAGE_SIZE. if layout.align() > bindings::PAGE_SIZE { pr_warn!("KVmalloc does not support alignments larger than PAGE_SIZE yet.\n"); From 22ab0641b939967f630d108e33a3582841ad6846 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 31 Jul 2025 17:48:07 +0200 Subject: [PATCH 0172/1307] rust: drm: ensure kmalloc() compatible Layout drm::Device is allocated through __drm_dev_alloc() (which uses kmalloc()) and the driver private data, ::Data, is initialized in-place. Due to the order of fields in drm::Device pub struct Device { dev: Opaque, data: T::Data, } even with an arbitrary large alignment requirement of T::Data it can't happen that the size of Device is smaller than its alignment requirement. However, let's not rely on this subtle circumstance and create a proper kmalloc() compatible Layout. Fixes: 1e4b8896c0f3 ("rust: drm: add device abstraction") Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250731154919.4132-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/drm/device.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 3bb7c83966cf..d19410deaf6c 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -5,6 +5,7 @@ //! C header: [`include/linux/drm/drm_device.h`](srctree/include/linux/drm/drm_device.h) use crate::{ + alloc::allocator::Kmalloc, bindings, device, drm, drm::driver::AllocImpl, error::from_err_ptr, @@ -12,7 +13,7 @@ prelude::*, types::{ARef, AlwaysRefCounted, Opaque}, }; -use core::{mem, ops::Deref, ptr, ptr::NonNull}; +use core::{alloc::Layout, mem, ops::Deref, ptr, ptr::NonNull}; #[cfg(CONFIG_DRM_LEGACY)] macro_rules! drm_legacy_fields { @@ -96,6 +97,10 @@ impl Device { /// Create a new `drm::Device` for a `drm::Driver`. pub fn new(dev: &device::Device, data: impl PinInit) -> Result> { + // `__drm_dev_alloc` uses `kmalloc()` to allocate memory, hence ensure a `kmalloc()` + // compatible `Layout`. + let layout = Kmalloc::aligned_layout(Layout::new::()); + // SAFETY: // - `VTABLE`, as a `const` is pinned to the read-only section of the compilation, // - `dev` is valid by its type invarants, @@ -103,7 +108,7 @@ pub fn new(dev: &device::Device, data: impl PinInit) -> Result(), + layout.size(), mem::offset_of!(Self, dev), ) } From 0c04a81c1d0214d5b2025f805ccec1ac37c96b08 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 31 Jul 2025 17:48:08 +0200 Subject: [PATCH 0173/1307] rust: drm: remove pin annotations from drm::Device The #[pin_data] and #[pin] annotations are not necessary for drm::Device, since we don't use any pin-init macros, but only __pinned_init() on the impl PinInit argument of drm::Device::new(). Fixes: 1e4b8896c0f3 ("rust: drm: add device abstraction") Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250731154919.4132-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/drm/device.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index d19410deaf6c..d0a9528121f1 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -54,10 +54,8 @@ macro_rules! drm_legacy_fields { /// /// `self.dev` is a valid instance of a `struct device`. #[repr(C)] -#[pin_data] pub struct Device { dev: Opaque, - #[pin] data: T::Data, } From 360077278ba62e81310080f075a1a3028e778ef9 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 31 Jul 2025 17:48:09 +0200 Subject: [PATCH 0174/1307] rust: drm: don't pass the address of drm::Device to drm_dev_put() In drm_dev_put() call in AlwaysRefCounted::dec_ref() we rely on struct drm_device to be the first field in drm::Device, whereas everywhere else we correctly obtain the address of the actual struct drm_device. Analogous to the from_drm_device() helper, provide the into_drm_device() helper in order to address this. Fixes: 1e4b8896c0f3 ("rust: drm: add device abstraction") Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250731154919.4132-5-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/drm/device.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index d0a9528121f1..d29c477e89a8 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -120,9 +120,13 @@ pub fn new(dev: &device::Device, data: impl PinInit) -> Result *mut Self { unsafe { crate::container_of!(Opaque::cast_from(ptr), Self, dev) }.cast_mut() } + /// # Safety + /// + /// `ptr` must be a valid pointer to `Self`. + unsafe fn into_drm_device(ptr: NonNull) -> *mut bindings::drm_device { + // SAFETY: By the safety requirements of this function, `ptr` is a valid pointer to `Self`. + unsafe { &raw mut (*ptr.as_ptr()).dev }.cast() + } + /// Not intended to be called externally, except via declare_drm_ioctls!() /// /// # Safety @@ -192,8 +204,11 @@ fn inc_ref(&self) { } unsafe fn dec_ref(obj: NonNull) { + // SAFETY: `obj` is a valid pointer to `Self`. + let drm_dev = unsafe { Self::into_drm_device(obj) }; + // SAFETY: The safety requirements guarantee that the refcount is non-zero. - unsafe { bindings::drm_dev_put(obj.cast().as_ptr()) }; + unsafe { bindings::drm_dev_put(drm_dev) }; } } From 7573980c7049450a0af22acd4f8e96f37ea30c48 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:45:05 -0700 Subject: [PATCH 0175/1307] MAINTAINERS: Mark Intel WWAN IOSM driver as orphaned This maintainer's email no longer works. Remove it from MAINTAINERS. I've been unable to locate a new maintainer for this at Intel. Mark the driver as Orphaned. Signed-off-by: Dave Hansen Cc: Loic Poulain Cc: Johannes Berg Cc: Andrew Lunn Acked-by: Sergey Ryazanov Link: https://patch.msgid.link/20250808174505.C9FF434F@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index eed16e53b9b2..ea82fc952c17 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12724,9 +12724,8 @@ S: Maintained F: drivers/platform/x86/intel/wmi/thunderbolt.c INTEL WWAN IOSM DRIVER -M: M Chetan Kumar L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/wwan/iosm/ INTEL(R) FLEXIBLE RETURN AND EVENT DELIVERY From b56e9fb1c9669ce460dc899cecb09a54a6d71cf4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:53:24 -0700 Subject: [PATCH 0176/1307] MAINTAINERS: Mark Intel PTP DFL ToD as orphaned This maintainer's email no longer works. Remove it from MAINTAINERS. Also mark the code as an Orphan. Signed-off-by: Dave Hansen Cc: Richard Cochran Cc: Tianfei Zhang Cc: Andrew Lunn Link: https://patch.msgid.link/20250808175324.8C4B7354@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ea82fc952c17..3a46ef3f8a77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12583,10 +12583,9 @@ S: Supported F: drivers/cpufreq/intel_pstate.c INTEL PTP DFL ToD DRIVER -M: Tianfei Zhang L: linux-fpga@vger.kernel.org L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/ptp/ptp_dfl_tod.c INTEL QUADRATURE ENCODER PERIPHERAL DRIVER From b132a3b0c228125dfda03ba7c7903ef133e1ee21 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:39:25 -0700 Subject: [PATCH 0177/1307] MAINTAINERS: Remove bouncing T7XX reviewer This reviewer's email no longer works. Remove it from MAINTAINERS. Signed-off-by: Dave Hansen Cc: Chandrashekar Devegowda Cc: Liu Haijun Cc: Ricardo Martinez Link: https://patch.msgid.link/20250808173925.FECE3782@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3a46ef3f8a77..6c7bc7b27eca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15672,7 +15672,6 @@ MEDIATEK T7XX 5G WWAN MODEM DRIVER M: Chandrashekar Devegowda R: Chiranjeevi Rapolu R: Liu Haijun -R: M Chetan Kumar R: Ricardo Martinez L: netdev@vger.kernel.org S: Supported From 61aaca8b89fb98be58b8df19f01181bb983cccff Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Fri, 8 Aug 2025 15:31:08 +0200 Subject: [PATCH 0178/1307] net: usb: qmi_wwan: add Telit Cinterion FN990A w/audio composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit Cinterion FN990A w/audio composition: 0x1077: tty (diag) + adb + rmnet + audio + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1077 Rev=05.04 S: Manufacturer=Telit Wireless Solutions S: Product=FN990 S: SerialNumber=67e04c35 C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 0 Cls=01(audio) Sub=01 Prot=20 Driver=snd-usb-audio I: If#= 4 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=03(O) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 5 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=84(I) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 9 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8c(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f5647ee0adde..e56901bb6ebc 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1361,6 +1361,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990A */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1077, 2)}, /* Telit FN990A w/audio */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ From 52966bf71de98fef4ca7b3be1349adc7459d6d53 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 8 Aug 2025 07:45:23 -0400 Subject: [PATCH 0179/1307] ref_tracker: use %p instead of %px in debugfs dentry name As Kees points out, this is a kernel address leak, and debugging is not a sufficiently good reason to expose the real kernel address. Fixes: 65b584f53611 ("ref_tracker: automatically register a file in debugfs for a ref_tracker_dir") Reported-by: Kees Cook Closes: https://lore.kernel.org/netdev/202507301603.62E553F93@keescook/ Signed-off-by: Jeff Layton Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- lib/ref_tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index a9e6ffcff04b..cce12287708e 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -434,7 +434,7 @@ void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) if (dentry && !xa_is_err(dentry)) return; - ret = snprintf(name, sizeof(name), "%s@%px", dir->class, dir); + ret = snprintf(name, sizeof(name), "%s@%p", dir->class, dir); name[sizeof(name) - 1] = '\0'; if (ret < sizeof(name)) { From de1e963ad064caf73ee2c7485b925f381a3aefbf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 8 Aug 2025 13:16:34 +0100 Subject: [PATCH 0180/1307] net: stmmac: rk: put the PHY clock on remove The PHY clock (bsp_priv->clk_phy) is obtained using of_clk_get(), which doesn't take part in the devm release. Therefore, when a device is unbound, this clock needs to be explicitly put. Fix this. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Fixes: fecd4d7eef8b ("net: stmmac: dwmac-rk: Add integrated PHY support") Link: https://patch.msgid.link/E1ukM1S-0086qo-PC@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 79b92130a03f..f6687c2f30f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1765,11 +1765,15 @@ static int rk_gmac_probe(struct platform_device *pdev) static void rk_gmac_remove(struct platform_device *pdev) { - struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev); + struct stmmac_priv *priv = netdev_priv(platform_get_drvdata(pdev)); + struct rk_priv_data *bsp_priv = priv->plat->bsp_priv; stmmac_dvr_remove(&pdev->dev); rk_gmac_powerdown(bsp_priv); + + if (priv->plat->phy_node && bsp_priv->integrated_phy) + clk_put(bsp_priv->clk_phy); } #ifdef CONFIG_PM_SLEEP From 89886abd073489e26614e4d80fb8eb70d3938a0b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 8 Aug 2025 13:16:39 +0100 Subject: [PATCH 0181/1307] net: stmmac: dwc-qos: fix clk prepare/enable leak on probe failure dwc_eth_dwmac_probe() gets bulk clocks, and then prepares and enables them. Unfortunately, if dwc_eth_dwmac_config_dt() or stmmac_dvr_probe() fail, we leave the clocks prepared and enabled. Fix this by using devm_clk_bulk_get_all_enabled() to combine the steps and provide devm based release of the prepare and enable state. This also fixes a similar leakin dwc_eth_dwmac_remove() which wasn't correctly retrieving the struct plat_stmmacenet_data. This becomes unnecessary. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Fixes: a045e40645df ("net: stmmac: refactor clock management in EQoS driver") Link: https://patch.msgid.link/E1ukM1X-0086qu-Td@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 09ae16e026eb..6c363f9b0ce2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -330,15 +330,11 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); - ret = devm_clk_bulk_get_all(&pdev->dev, &plat_dat->clks); + ret = devm_clk_bulk_get_all_enabled(&pdev->dev, &plat_dat->clks); if (ret < 0) - return dev_err_probe(&pdev->dev, ret, "Failed to retrieve all required clocks\n"); + return dev_err_probe(&pdev->dev, ret, "Failed to retrieve and enable all required clocks\n"); plat_dat->num_clks = ret; - ret = clk_bulk_prepare_enable(plat_dat->num_clks, plat_dat->clks); - if (ret) - return dev_err_probe(&pdev->dev, ret, "Failed to enable clocks\n"); - plat_dat->stmmac_clk = stmmac_pltfr_find_clk(plat_dat, data->stmmac_clk_name); @@ -346,7 +342,6 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) ret = data->probe(pdev, plat_dat, &stmmac_res); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n"); - clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); return ret; } @@ -370,15 +365,11 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) static void dwc_eth_dwmac_remove(struct platform_device *pdev) { const struct dwc_eth_dwmac_data *data = device_get_match_data(&pdev->dev); - struct plat_stmmacenet_data *plat_dat = dev_get_platdata(&pdev->dev); stmmac_dvr_remove(&pdev->dev); if (data->remove) data->remove(pdev); - - if (plat_dat) - clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); } static const struct of_device_id dwc_eth_dwmac_match[] = { From b63335fb3d32579c5ff0b7038b9cc23688fff528 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 11 Aug 2025 08:34:04 +0100 Subject: [PATCH 0182/1307] cifs: Fix collect_sample() to handle any iterator type collect_sample() is used to gather samples of the data in a Write op for analysis to try and determine if the compression algorithm is likely to achieve anything more quickly than actually running the compression algorithm. However, collect_sample() assumes that the data it is going to be sampling is stored in an ITER_XARRAY-type iterator (which it now should never be) and doesn't actually check that it is before accessing the underlying xarray directly. Fix this by replacing the code with a loop that just uses the standard iterator functions to sample every other 2KiB block, skipping the intervening ones. It's not quite the same as the previous algorithm as it doesn't necessarily align to the pages within an ordinary write from the pagecache. Note that the btrfs code from which this was derived samples the inode's pagecache directly rather than the iterator - but that doesn't necessarily work for network filesystems if O_DIRECT is in operation. Fixes: 94ae8c3fee94 ("smb: client: compress: LZ77 code improvements cleanup") Signed-off-by: David Howells Acked-by: Paulo Alcantara (Red Hat) cc: Enzo Matsumiya cc: Shyam Prasad N cc: Tom Talpey cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/compress.c | 61 +++++++++++----------------------------- 1 file changed, 16 insertions(+), 45 deletions(-) diff --git a/fs/smb/client/compress.c b/fs/smb/client/compress.c index 766b4de13da7..db709f5cd2e1 100644 --- a/fs/smb/client/compress.c +++ b/fs/smb/client/compress.c @@ -155,58 +155,29 @@ static int cmp_bkt(const void *_a, const void *_b) } /* - * TODO: - * Support other iter types, if required. - * Only ITER_XARRAY is supported for now. + * Collect some 2K samples with 2K gaps between. */ -static int collect_sample(const struct iov_iter *iter, ssize_t max, u8 *sample) +static int collect_sample(const struct iov_iter *source, ssize_t max, u8 *sample) { - struct folio *folios[16], *folio; - unsigned int nr, i, j, npages; - loff_t start = iter->xarray_start + iter->iov_offset; - pgoff_t last, index = start / PAGE_SIZE; - size_t len, off, foff; - void *p; - int s = 0; + struct iov_iter iter = *source; + size_t s = 0; - last = (start + max - 1) / PAGE_SIZE; - do { - nr = xa_extract(iter->xarray, (void **)folios, index, last, ARRAY_SIZE(folios), - XA_PRESENT); - if (nr == 0) - return -EIO; + while (iov_iter_count(&iter) >= SZ_2K) { + size_t part = umin(umin(iov_iter_count(&iter), SZ_2K), max); + size_t n; - for (i = 0; i < nr; i++) { - folio = folios[i]; - npages = folio_nr_pages(folio); - foff = start - folio_pos(folio); - off = foff % PAGE_SIZE; + n = copy_from_iter(sample + s, part, &iter); + if (n != part) + return -EFAULT; - for (j = foff / PAGE_SIZE; j < npages; j++) { - size_t len2; + s += n; + max -= n; - len = min_t(size_t, max, PAGE_SIZE - off); - len2 = min_t(size_t, len, SZ_2K); + if (iov_iter_count(&iter) < PAGE_SIZE - SZ_2K) + break; - p = kmap_local_page(folio_page(folio, j)); - memcpy(&sample[s], p, len2); - kunmap_local(p); - - s += len2; - - if (len2 < SZ_2K || s >= max - SZ_2K) - return s; - - max -= len; - if (max <= 0) - return s; - - start += len; - off = 0; - index++; - } - } - } while (nr == ARRAY_SIZE(folios)); + iov_iter_advance(&iter, SZ_2K); + } return s; } From d7f1affc556e07208453224a025bd67583671ae2 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 8 Aug 2025 17:52:21 +0300 Subject: [PATCH 0183/1307] cifs: avoid extra calls to strlen() in cifs_get_spnego_key() Since 'snprintf()' returns the number of characters emitted, an output position may be advanced with this return value rather than using an explicit calls to 'strlen()'. Compile tested only. Signed-off-by: Dmitry Antipov Signed-off-by: Steve French --- fs/smb/client/cifs_spnego.c | 47 ++++++++++++++----------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index bc1c1e9b288a..43b86fa4d695 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -124,55 +124,44 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, dp = description; /* start with version and hostname portion of UNC string */ spnego_key = ERR_PTR(-EINVAL); - sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION, - hostname); - dp = description + strlen(description); + dp += sprintf(dp, "ver=0x%x;host=%s;", CIFS_SPNEGO_UPCALL_VERSION, + hostname); /* add the server address */ if (server->dstaddr.ss_family == AF_INET) - sprintf(dp, "ip4=%pI4", &sa->sin_addr); + dp += sprintf(dp, "ip4=%pI4", &sa->sin_addr); else if (server->dstaddr.ss_family == AF_INET6) - sprintf(dp, "ip6=%pI6", &sa6->sin6_addr); + dp += sprintf(dp, "ip6=%pI6", &sa6->sin6_addr); else goto out; - dp = description + strlen(description); - /* for now, only sec=krb5 and sec=mskrb5 and iakerb are valid */ if (server->sec_kerberos) - sprintf(dp, ";sec=krb5"); + dp += sprintf(dp, ";sec=krb5"); else if (server->sec_mskerberos) - sprintf(dp, ";sec=mskrb5"); + dp += sprintf(dp, ";sec=mskrb5"); else if (server->sec_iakerb) - sprintf(dp, ";sec=iakerb"); + dp += sprintf(dp, ";sec=iakerb"); else { cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n"); - sprintf(dp, ";sec=krb5"); + dp += sprintf(dp, ";sec=krb5"); } - dp = description + strlen(description); - sprintf(dp, ";uid=0x%x", - from_kuid_munged(&init_user_ns, sesInfo->linux_uid)); + dp += sprintf(dp, ";uid=0x%x", + from_kuid_munged(&init_user_ns, sesInfo->linux_uid)); - dp = description + strlen(description); - sprintf(dp, ";creduid=0x%x", + dp += sprintf(dp, ";creduid=0x%x", from_kuid_munged(&init_user_ns, sesInfo->cred_uid)); - if (sesInfo->user_name) { - dp = description + strlen(description); - sprintf(dp, ";user=%s", sesInfo->user_name); - } + if (sesInfo->user_name) + dp += sprintf(dp, ";user=%s", sesInfo->user_name); - dp = description + strlen(description); - sprintf(dp, ";pid=0x%x", current->pid); + dp += sprintf(dp, ";pid=0x%x", current->pid); - if (sesInfo->upcall_target == UPTARGET_MOUNT) { - dp = description + strlen(description); - sprintf(dp, ";upcall_target=mount"); - } else { - dp = description + strlen(description); - sprintf(dp, ";upcall_target=app"); - } + if (sesInfo->upcall_target == UPTARGET_MOUNT) + dp += sprintf(dp, ";upcall_target=mount"); + else + dp += sprintf(dp, ";upcall_target=app"); cifs_dbg(FYI, "key description = %s\n", description); saved_cred = override_creds(spnego_cred); From ab5ac789efa985e42bdb5b5e8a7a4ad84935d44e Mon Sep 17 00:00:00 2001 From: Sukrut Heroorkar Date: Tue, 5 Aug 2025 00:56:14 +0200 Subject: [PATCH 0184/1307] selftests/proc: fix string literal warning in proc-maps-race.c This change resolves non literal string format warning invoked for proc-maps-race.c while compiling. proc-maps-race.c:205:17: warning: format not a string literal and no format arguments [-Wformat-security] 205 | printf(text); | ^~~~~~ proc-maps-race.c:209:17: warning: format not a string literal and no format arguments [-Wformat-security] 209 | printf(text); | ^~~~~~ proc-maps-race.c: In function `print_last_lines': proc-maps-race.c:224:9: warning: format not a string literal and no format arguments [-Wformat-security] 224 | printf(start); | ^~~~~~ Add string format specifier %s for the printf calls in both print_first_lines() and print_last_lines() thus resolving the warnings. The test executes fine after this change thus causing no effect to the functional behavior of the test. Link: https://lkml.kernel.org/r/20250804225633.841777-1-hsukrut3@gmail.com Fixes: aadc099c480f ("selftests/proc: add verbose mode for /proc/pid/maps tearing tests") Signed-off-by: Sukrut Heroorkar Acked-by: Suren Baghdasaryan Cc: David Hunter Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-maps-race.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 66773685a047..94bba4553130 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -202,11 +202,11 @@ static void print_first_lines(char *text, int nr) int offs = end - text; text[offs] = '\0'; - printf(text); + printf("%s", text); text[offs] = '\n'; printf("\n"); } else { - printf(text); + printf("%s", text); } } @@ -221,7 +221,7 @@ static void print_last_lines(char *text, int nr) nr--; start--; } - printf(start); + printf("%s", start); } static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self) From cf1b80dc31a1137b8b4568c138b453bf7453204a Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Wed, 6 Aug 2025 20:26:11 +0530 Subject: [PATCH 0185/1307] mm: pass page directly instead of using folio_page In commit_anon_folio_batch(), we iterate over all pages pointed to by the PTE batch. Therefore we need to know the first page of the batch; currently we derive that via folio_page(folio, 0), but, that takes us to the first (head) page of the folio instead - our PTE batch may lie in the middle of the folio, leading to incorrectness. Bite the bullet and throw away the micro-optimization of reusing the folio in favour of code simplicity. Derive the page and the folio in change_pte_range, and pass the page too to commit_anon_folio_batch to fix the aforementioned issue. Link: https://lkml.kernel.org/r/20250806145611.3962-1-dev.jain@arm.com Fixes: cac1db8c3aad ("mm: optimize mprotect() by PTE batching") Reported-by: syzbot+57bcc752f0df8bb1365c@syzkaller.appspotmail.com Signed-off-by: Dev Jain Reviewed-by: Lorenzo Stoakes Debugged-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: Christophe Leroy Cc: Hugh Dickins Cc: Jann Horn Cc: Joey Gouly Cc: Kevin Brodsky Cc: Lance Yang Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Ryan Roberts Cc: Vlastimil Babka Cc: Will Deacon Cc: Yang Shi Cc: Yicong Yang Cc: Zhenhua Huang Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/mprotect.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 78bded7acf79..113b48985834 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -120,9 +120,8 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, pte_t oldpte, pte_t *pte, int target_node, - struct folio **foliop) + struct folio *folio) { - struct folio *folio = NULL; bool ret = true; bool toptier; int nid; @@ -131,7 +130,6 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, if (pte_protnone(oldpte)) goto skip; - folio = vm_normal_folio(vma, addr, oldpte); if (!folio) goto skip; @@ -173,7 +171,6 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); skip: - *foliop = folio; return ret; } @@ -231,10 +228,9 @@ static int page_anon_exclusive_sub_batch(int start_idx, int max_len, * retrieve sub-batches. */ static void commit_anon_folio_batch(struct vm_area_struct *vma, - struct folio *folio, unsigned long addr, pte_t *ptep, + struct folio *folio, struct page *first_page, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) { - struct page *first_page = folio_page(folio, 0); bool expected_anon_exclusive; int sub_batch_idx = 0; int len; @@ -251,7 +247,7 @@ static void commit_anon_folio_batch(struct vm_area_struct *vma, } static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, - struct folio *folio, unsigned long addr, pte_t *ptep, + struct folio *folio, struct page *page, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) { bool set_write; @@ -270,7 +266,7 @@ static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, /* idx = */ 0, set_write, tlb); return; } - commit_anon_folio_batch(vma, folio, addr, ptep, oldpte, ptent, nr_ptes, tlb); + commit_anon_folio_batch(vma, folio, page, addr, ptep, oldpte, ptent, nr_ptes, tlb); } static long change_pte_range(struct mmu_gather *tlb, @@ -305,15 +301,19 @@ static long change_pte_range(struct mmu_gather *tlb, const fpb_t flags = FPB_RESPECT_SOFT_DIRTY | FPB_RESPECT_WRITE; int max_nr_ptes = (end - addr) >> PAGE_SHIFT; struct folio *folio = NULL; + struct page *page; pte_t ptent; + page = vm_normal_page(vma, addr, oldpte); + if (page) + folio = page_folio(page); /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ if (prot_numa) { int ret = prot_numa_skip(vma, addr, oldpte, pte, - target_node, &folio); + target_node, folio); if (ret) { /* determine batch to skip */ @@ -323,9 +323,6 @@ static long change_pte_range(struct mmu_gather *tlb, } } - if (!folio) - folio = vm_normal_folio(vma, addr, oldpte); - nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags); oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes); @@ -351,7 +348,7 @@ static long change_pte_range(struct mmu_gather *tlb, */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent)) - set_write_prot_commit_flush_ptes(vma, folio, + set_write_prot_commit_flush_ptes(vma, folio, page, addr, pte, oldpte, ptent, nr_ptes, tlb); else prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent, From aba6faec0103ed8f169be8dce2ead41fcb689446 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 6 Aug 2025 15:00:22 -0700 Subject: [PATCH 0186/1307] userfaultfd: fix a crash in UFFDIO_MOVE when PMD is a migration entry When UFFDIO_MOVE encounters a migration PMD entry, it proceeds with obtaining a folio and accessing it even though the entry is swp_entry_t. Add the missing check and let split_huge_pmd() handle migration entries. While at it also remove unnecessary folio check. [surenb@google.com: remove extra folio check, per David] Link: https://lkml.kernel.org/r/20250807200418.1963585-1-surenb@google.com Link: https://lkml.kernel.org/r/20250806220022.926763-1-surenb@google.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Suren Baghdasaryan Reported-by: syzbot+b446dbe27035ef6bd6c2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68794b5c.a70a0220.693ce.0050.GAE@google.com/ Reviewed-by: Peter Xu Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Lokesh Gidra Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index cbed91b09640..45e6290e2e8b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1821,13 +1821,16 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, /* Check if we can move the pmd without splitting it. */ if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || !pmd_none(dst_pmdval)) { - struct folio *folio = pmd_folio(*src_pmd); + /* Can be a migration entry */ + if (pmd_present(*src_pmd)) { + struct folio *folio = pmd_folio(*src_pmd); - if (!folio || (!is_huge_zero_folio(folio) && - !PageAnonExclusive(&folio->page))) { - spin_unlock(ptl); - err = -EBUSY; - break; + if (!is_huge_zero_folio(folio) && + !PageAnonExclusive(&folio->page)) { + spin_unlock(ptl); + err = -EBUSY; + break; + } } spin_unlock(ptl); From 0b5be138ce00f421bd7cc5a226061bd62c4ab850 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 7 Aug 2025 19:58:19 +0100 Subject: [PATCH 0187/1307] mm/mremap: avoid expensive folio lookup on mremap folio pte batch It was discovered in the attached report that commit f822a9a81a31 ("mm: optimize mremap() by PTE batching") introduced a significant performance regression on a number of metrics on x86-64, most notably stress-ng.bigheap.realloc_calls_per_sec - indicating a 37.3% regression in number of mremap() calls per second. I was able to reproduce this locally on an intel x86-64 raptor lake system, noting an average of 143,857 realloc calls/sec (with a stddev of 4,531 or 3.1%) prior to this patch being applied, and 81,503 afterwards (stddev of 2,131 or 2.6%) - a 43.3% regression. During testing I was able to determine that there was no meaningful difference in efforts to optimise the folio_pte_batch() operation, nor checking folio_test_large(). This is within expectation, as a regression this large is likely to indicate we are accessing memory that is not yet in a cache line (and perhaps may even cause a main memory fetch). The expectation by those discussing this from the start was that vm_normal_folio() (invoked by mremap_folio_pte_batch()) would likely be the culprit due to having to retrieve memory from the vmemmap (which mremap() page table moves does not otherwise do, meaning this is inevitably cold memory). I was able to definitively determine that this theory is indeed correct and the cause of the issue. The solution is to restore part of an approach previously discarded on review, that is to invoke pte_batch_hint() which explicitly determines, through reference to the PTE alone (thus no vmemmap lookup), what the PTE batch size may be. On platforms other than arm64 this is currently hardcoded to return 1, so this naturally resolves the issue for x86-64, and for arm64 introduces little to no overhead as the pte cache line will be hot. With this patch applied, we move from 81,503 realloc calls/sec to 138,701 (stddev of 496.1 or 0.4%), which is a -3.6% regression, however accounting for the variance in the original result, this is broadly restoring performance to its prior state. Link: https://lkml.kernel.org/r/20250807185819.199865-1-lorenzo.stoakes@oracle.com Fixes: f822a9a81a31 ("mm: optimize mremap() by PTE batching") Signed-off-by: Lorenzo Stoakes Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508071609.4e743d7c-lkp@intel.com Acked-by: David Hildenbrand Acked-by: Pedro Falcato Reviewed-by: Barry Song Acked-by: Vlastimil Babka Reviewed-by: Dev Jain Cc: Ryan Roberts Cc: Barry Song Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- mm/mremap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/mremap.c b/mm/mremap.c index 677a4d744df9..9afa8cd524f5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -179,6 +179,10 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr if (max_nr == 1) return 1; + /* Avoid expensive folio lookup if we stand no chance of benefit. */ + if (pte_batch_hint(ptep, pte) == 1) + return 1; + folio = vm_normal_folio(vma, addr, pte); if (!folio || !folio_test_large(folio)) return 1; From c0e1b774f68bdbea1618e356e30672c7f1e32509 Mon Sep 17 00:00:00 2001 From: Jialin Wang Date: Fri, 8 Aug 2025 00:54:55 +0800 Subject: [PATCH 0188/1307] proc: proc_maps_open allow proc_mem_open to return NULL The commit 65c66047259f ("proc: fix the issue of proc_mem_open returning NULL") caused proc_maps_open() to return -ESRCH when proc_mem_open() returns NULL. This breaks legitimate /proc//maps access for kernel threads since kernel threads have NULL mm_struct. The regression causes perf to fail and exit when profiling a kernel thread: # perf record -v -g -p $(pgrep kswapd0) ... couldn't open /proc/65/task/65/maps This patch partially reverts the commit to fix it. Link: https://lkml.kernel.org/r/20250807165455.73656-1-wjl.linux@gmail.com Fixes: 65c66047259f ("proc: fix the issue of proc_mem_open returning NULL") Signed-off-by: Jialin Wang Cc: Penglei Jiang Cc: Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ee1e4ccd33bd..29cca0e6d0ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -340,8 +340,8 @@ static int proc_maps_open(struct inode *inode, struct file *file, priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR_OR_NULL(priv->mm)) { - int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); seq_release_private(inode, file); return err; From 54d4f445517fe8350d735624d7f4225e7511d9eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Larumbe?= Date: Fri, 8 Aug 2025 02:02:34 +0100 Subject: [PATCH 0189/1307] drm/panfrost: Print RSS for tiler heap BO's in debugfs GEMS file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise it would display the virtual allocation size, which is often much bigger than the RSS. Signed-off-by: Adrián Larumbe Fixes: e48ade5e23ba ("drm/panfrost: show device-wide list of DRM GEM objects over DebugFS") Tested-by: Christopher Healy Reviewed-by: Daniel Stone Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250808010235.2831853-1-adrian.larumbe@collabora.com --- drivers/gpu/drm/panfrost/panfrost_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index bb73f2a68a12..85d6289a6eda 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -432,7 +432,7 @@ static void panfrost_gem_debugfs_bo_print(struct panfrost_gem_object *bo, if (!refcount) return; - resident_size = bo->base.pages ? bo->base.base.size : 0; + resident_size = panfrost_gem_rss(&bo->base.base); snprintf(creator_info, sizeof(creator_info), "%s/%d", bo->debugfs.creator.process_name, bo->debugfs.creator.tgid); From 9af8f2b469c0438620832f3729a3c5c03853b56b Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Fri, 27 Jun 2025 14:38:19 +0200 Subject: [PATCH 0190/1307] drm/panic: Add a u64 divide by 10 for arm32 On 32bits ARM, u64 divided by a constant is not optimized to a multiply by inverse by the compiler [1]. So do the multiply by inverse explicitly for this architecture. Link: https://github.com/llvm/llvm-project/issues/37280 [1] Reported-by: Andrei Lalaev Closes: https://lore.kernel.org/dri-devel/c0a2771c-f3f5-4d4c-aa82-d673b3c5cb46@gmail.com/ Fixes: 675008f196ca ("drm/panic: Use a decimal fifo to avoid u64 by u64 divide") Reviewed-by: Alice Ryhl Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic_qr.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 09a9b452e8b7..50c286c5cee8 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -381,6 +381,26 @@ struct DecFifo { len: usize, } +// On arm32 architecture, dividing an `u64` by a constant will generate a call +// to `__aeabi_uldivmod` which is not present in the kernel. +// So use the multiply by inverse method for this architecture. +fn div10(val: u64) -> u64 { + if cfg!(target_arch = "arm") { + let val_h = val >> 32; + let val_l = val & 0xFFFFFFFF; + let b_h: u64 = 0x66666666; + let b_l: u64 = 0x66666667; + + let tmp1 = val_h * b_l + ((val_l * b_l) >> 32); + let tmp2 = val_l * b_h + (tmp1 & 0xffffffff); + let tmp3 = val_h * b_h + (tmp1 >> 32) + (tmp2 >> 32); + + tmp3 >> 2 + } else { + val / 10 + } +} + impl DecFifo { fn push(&mut self, data: u64, len: usize) { let mut chunk = data; @@ -389,7 +409,7 @@ fn push(&mut self, data: u64, len: usize) { } for i in 0..len { self.decimals[i] = (chunk % 10) as u8; - chunk /= 10; + chunk = div10(chunk); } self.len += len; } From fd56b9c9507f32b16159f9a922e1af5628254567 Mon Sep 17 00:00:00 2001 From: Vinod Govindapillai Date: Tue, 29 Jul 2025 15:46:48 +0300 Subject: [PATCH 0191/1307] drm/i915/fbc: fix the implementation of wa_18038517565 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per the wa_18038517565, we need to disable FBC compressor clock gating before enabling FBC and enable after disabling FBC. Placing the enabling of clock gating in the fbc deactivate function can make the above wa logic go wrong in case of frontbuffer rendering FBC mechanism. FBC deactivate can get called during fb invalidate and then the corresponding FBC activate can get called without properly disabling the clock gating and can result in compression stalled. So move the enable clock gating at the end of one FBC session after FBC is completely disabled for a pipe. Bspec: 74212, 72197, 69741, 65555 Fixes: 010363c46189 ("drm/i915/display: implement wa_18038517565") Signed-off-by: Vinod Govindapillai Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20250729124648.288497-1-vinod.govindapillai@intel.com (cherry picked from commit 82dde0407ab126f8413fd6c51429e5057ced5ba2) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_fbc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 6e26cb4c5724..685ac98bd001 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -552,10 +552,6 @@ static void ilk_fbc_deactivate(struct intel_fbc *fbc) if (dpfc_ctl & DPFC_CTL_EN) { dpfc_ctl &= ~DPFC_CTL_EN; intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); - - /* wa_18038517565 Enable DPFC clock gating after FBC disable */ - if (display->platform.dg2 || DISPLAY_VER(display) >= 14) - fbc_compressor_clkgate_disable_wa(fbc, false); } } @@ -1710,6 +1706,10 @@ static void __intel_fbc_disable(struct intel_fbc *fbc) __intel_fbc_cleanup_cfb(fbc); + /* wa_18038517565 Enable DPFC clock gating after FBC disable */ + if (display->platform.dg2 || DISPLAY_VER(display) >= 14) + fbc_compressor_clkgate_disable_wa(fbc, false); + fbc->state.plane = NULL; fbc->flip_pending = false; fbc->busy_bits = 0; From 184889dfe0568528fd6d14bba864dd57ed45bbf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Fri, 1 Aug 2025 09:29:05 +0300 Subject: [PATCH 0192/1307] drm/i915/psr: Do not trigger Frame Change events from frontbuffer flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to get rid of triggering "Frame Change" events from frontbuffer flush calls. We are about to move using TRANS_PUSH register for this on LunarLake and onwards. Touching TRANS_PUSH register from fronbuffer flush would be problematic as it's written by DSB as well. Fix this by using intel_psr_exit when flush or invalidate is done on LunarLake and onwards. This is not possible on AlderLake and MeteorLake due to HW bug in PSR2 disable. This patch is also fixing problems with cursor plane where cursor is disappearing or duplicate cursor is seen on the screen. v2: Commit message updated Bspec: 68927, 68934, 66624 Reported-by: Janna Martl Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5522 Fixes: 411ad63877bb ("drm/i915/psr: Use SFF_CTL on invalidate/flush for LunarLake onwards") Tested-by: Janna Martl Signed-off-by: Jouni Högander Reviewed-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250801062905.564453-1-jouni.hogander@intel.com (cherry picked from commit 46fb38cb20c0d185a6391ab524b23e0e0219c41f) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_psr.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index ae9053919211..41988e193a41 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -3275,7 +3275,9 @@ static void intel_psr_configure_full_frame_update(struct intel_dp *intel_dp) static void _psr_invalidate_handle(struct intel_dp *intel_dp) { - if (intel_dp->psr.psr2_sel_fetch_enabled) { + struct intel_display *display = to_intel_display(intel_dp); + + if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { if (!intel_dp->psr.psr2_sel_fetch_cff_enabled) { intel_dp->psr.psr2_sel_fetch_cff_enabled = true; intel_psr_configure_full_frame_update(intel_dp); @@ -3361,7 +3363,7 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - if (intel_dp->psr.psr2_sel_fetch_enabled) { + if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { if (intel_dp->psr.psr2_sel_fetch_cff_enabled) { /* can we turn CFF off? */ if (intel_dp->psr.busy_frontbuffer_bits == 0) @@ -3378,11 +3380,13 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) * existing SU configuration */ intel_psr_configure_full_frame_update(intel_dp); + + intel_psr_force_update(intel_dp); + } else { + intel_psr_exit(intel_dp); } - intel_psr_force_update(intel_dp); - - if (!intel_dp->psr.psr2_sel_fetch_enabled && !intel_dp->psr.active && + if ((!intel_dp->psr.psr2_sel_fetch_enabled || DISPLAY_VER(display) >= 20) && !intel_dp->psr.busy_frontbuffer_bits) queue_work(display->wq.unordered, &intel_dp->psr.work); } From 8ee90742cf29427683294a6a80f1e2b7f4af1cff Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Thu, 7 Aug 2025 12:08:32 +0800 Subject: [PATCH 0193/1307] net: phy: nxp-c45-tja11xx: fix the PHY ID mismatch issue when using C45 TJA1103/04/20/21 support both C22 and C45 accessing methods. The TJA11xx driver has implemented the match_phy_device() API. However, it does not handle the C45 ID. If C45 was used to access TJA11xx, match_phy_device() would always return false due to phydev->phy_id only used by C22 being empty, resulting in the generic phy driver being used for TJA11xx PHYs. Therefore, check phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] when using C45. Fixes: 1b76b2497aba ("net: phy: nxp-c45-tja11xx: simplify .match_phy_device OP") Signed-off-by: Clark Wang Link: https://patch.msgid.link/20250807040832.2455306-1-xiaoning.wang@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/phy/nxp-c45-tja11xx.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 4c6d905f0a9f..87adb6508017 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1965,24 +1965,27 @@ static int nxp_c45_macsec_ability(struct phy_device *phydev) return macsec_ability; } +static bool tja11xx_phy_id_compare(struct phy_device *phydev, + const struct phy_driver *phydrv) +{ + u32 id = phydev->is_c45 ? phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] : + phydev->phy_id; + + return phy_id_compare(id, phydrv->phy_id, phydrv->phy_id_mask); +} + static int tja11xx_no_macsec_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - if (!phy_id_compare(phydev->phy_id, phydrv->phy_id, - phydrv->phy_id_mask)) - return 0; - - return !nxp_c45_macsec_ability(phydev); + return tja11xx_phy_id_compare(phydev, phydrv) && + !nxp_c45_macsec_ability(phydev); } static int tja11xx_macsec_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - if (!phy_id_compare(phydev->phy_id, phydrv->phy_id, - phydrv->phy_id_mask)) - return 0; - - return nxp_c45_macsec_ability(phydev); + return tja11xx_phy_id_compare(phydev, phydrv) && + nxp_c45_macsec_ability(phydev); } static const struct nxp_c45_regmap tja1120_regmap = { From 8ea25274ebaf2f6be8be374633b2ed8348ec0e70 Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Thu, 7 Aug 2025 15:54:49 +0200 Subject: [PATCH 0194/1307] net: mdiobus: release reset_gpio in mdiobus_unregister_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reset_gpio is claimed in mdiobus_register_device(), but it is not released in mdiobus_unregister_device(). It is instead only released when the whole MDIO bus is unregistered. When a device uses the reset_gpio property, it becomes impossible to unregister it and register it again, because the GPIO remains claimed. This patch resolves that issue. Fixes: bafbdd527d56 ("phylib: Add device reset GPIO support") # see notes Reviewed-by: Andrew Lunn Cc: Csókás Bence [ csokas.bence: Resolve rebase conflict and clarify msg ] Signed-off-by: Buday Csaba Link: https://patch.msgid.link/20250807135449.254254-2-csokas.bence@prolan.hu Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 1 + drivers/net/phy/mdio_bus_provider.c | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index fda2e27c1810..cad6ed3aa10b 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -91,6 +91,7 @@ int mdiobus_unregister_device(struct mdio_device *mdiodev) if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev) return -EINVAL; + gpiod_put(mdiodev->reset_gpio); reset_control_put(mdiodev->reset_ctrl); mdiodev->bus->mdio_map[mdiodev->addr] = NULL; diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index 48dc4bf85125..f43973e73ea3 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -443,9 +443,6 @@ void mdiobus_unregister(struct mii_bus *bus) if (!mdiodev) continue; - if (mdiodev->reset_gpio) - gpiod_put(mdiodev->reset_gpio); - mdiodev->device_remove(mdiodev); mdiodev->device_free(mdiodev); } From 5eb1bcdb6a8c088514019c3a9bda5d565beed1af Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Tue, 22 Jul 2025 13:18:53 +0530 Subject: [PATCH 0195/1307] x86/sev: Improve handling of writes to intercepted TSC MSRs Currently, when a Secure TSC enabled SNP guest attempts to write to the intercepted GUEST_TSC_FREQ MSR (a read-only MSR), the guest kernel response incorrectly implies a VMM configuration error, when in fact it is the usual VMM configuration to intercept writes to read-only MSRs, unless explicitly documented. Modify the intercepted TSC MSR #VC handling: * Write to GUEST_TSC_FREQ will generate a #GP instead of terminating the guest * Write to MSR_IA32_TSC will generate a #GP instead of silently ignoring it However, continue to terminate the guest when reading from intercepted GUEST_TSC_FREQ MSR with Secure TSC enabled, as intercepted reads indicate an improper VMM configuration for Secure TSC enabled SNP guests. [ bp: simplify comment. ] Fixes: 38cc6495cdec ("x86/sev: Prevent GUEST_TSC_FREQ MSR interception for Secure TSC enabled guests") Suggested-by: Sean Christopherson Signed-off-by: Nikunj A Dadhania Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/20250722074853.22253-1-nikunj@amd.com --- arch/x86/coco/sev/vc-handle.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c index faf1fce89ed4..c3b4acbde0d8 100644 --- a/arch/x86/coco/sev/vc-handle.c +++ b/arch/x86/coco/sev/vc-handle.c @@ -371,29 +371,30 @@ static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) * executing with Secure TSC enabled, so special handling is required for * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. */ -static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) +static enum es_result __vc_handle_secure_tsc_msrs(struct es_em_ctxt *ctxt, bool write) { + struct pt_regs *regs = ctxt->regs; u64 tsc; /* - * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. - * Terminate the SNP guest when the interception is enabled. + * Writing to MSR_IA32_TSC can cause subsequent reads of the TSC to + * return undefined values, and GUEST_TSC_FREQ is read-only. Generate + * a #GP on all writes. + */ + if (write) { + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + return ES_EXCEPTION; + } + + /* + * GUEST_TSC_FREQ read should not be intercepted when Secure TSC is + * enabled. Terminate the guest if a read is attempted. */ if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) return ES_VMM_ERROR; - /* - * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC - * to return undefined values, so ignore all writes. - * - * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use - * the value returned by rdtsc_ordered(). - */ - if (write) { - WARN_ONCE(1, "TSC MSR writes are verboten!\n"); - return ES_OK; - } - + /* Reads of MSR_IA32_TSC should return the current TSC value. */ tsc = rdtsc_ordered(); regs->ax = lower_32_bits(tsc); regs->dx = upper_32_bits(tsc); @@ -416,7 +417,7 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) case MSR_IA32_TSC: case MSR_AMD64_GUEST_TSC_FREQ: if (sev_status & MSR_AMD64_SNP_SECURE_TSC) - return __vc_handle_secure_tsc_msrs(regs, write); + return __vc_handle_secure_tsc_msrs(ctxt, write); break; default: break; From 942e47ab228c7dd27c2ae043c17e7aab2028082c Mon Sep 17 00:00:00 2001 From: Pengyu Luo Date: Tue, 12 Aug 2025 17:39:56 +0800 Subject: [PATCH 0196/1307] phy: qualcomm: phy-qcom-eusb2-repeater: fix override properties property "qcom,tune-usb2-preem" is for EUSB2_TUNE_USB2_PREEM property "qcom,tune-usb2-amplitude" is for EUSB2_TUNE_IUSB2 The downstream correspondence is as follows: EUSB2_TUNE_USB2_PREEM: Tx pre-emphasis tuning EUSB2_TUNE_IUSB2: HS trasmit amplitude EUSB2_TUNE_SQUELCH_U: Squelch detection threshold EUSB2_TUNE_HSDISC: HS disconnect threshold EUSB2_TUNE_EUSB_SLEW: slew rate Fixes: 31bc94de7602 ("phy: qualcomm: phy-qcom-eusb2-repeater: Don't zero-out registers") Signed-off-by: Pengyu Luo Reviewed-by: Konrad Dybcio Reviewed-by: Luca Weiss Link: https://lore.kernel.org/r/20250812093957.32235-1-mitltlatltl@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c index e0f2acc8109c..8fcbc312fd61 100644 --- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c +++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c @@ -127,13 +127,13 @@ static int eusb2_repeater_init(struct phy *phy) rptr->cfg->init_tbl[i].value); /* Override registers from devicetree values */ - if (!of_property_read_u8(np, "qcom,tune-usb2-amplitude", &val)) + if (!of_property_read_u8(np, "qcom,tune-usb2-preem", &val)) regmap_write(regmap, base + EUSB2_TUNE_USB2_PREEM, val); if (!of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &val)) regmap_write(regmap, base + EUSB2_TUNE_HSDISC, val); - if (!of_property_read_u8(np, "qcom,tune-usb2-preem", &val)) + if (!of_property_read_u8(np, "qcom,tune-usb2-amplitude", &val)) regmap_write(regmap, base + EUSB2_TUNE_IUSB2, val); /* Wait for status OK */ From c8a9a619c072e9a45e9a9b4b035269427dc00aa8 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:54 +0000 Subject: [PATCH 0197/1307] dt-bindings: net: thead,th1520-gmac: Describe APB interface clock Besides ones for GMAC core and peripheral registers, the TH1520 GMAC requires one more clock for configuring APB glue registers. Describe it in the binding. Fixes: f920ce04c399 ("dt-bindings: net: Add T-HEAD dwmac support") Signed-off-by: Yao Zi Acked-by: Krzysztof Kozlowski Reviewed-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-3-ziyao@disroot.org Signed-off-by: Paolo Abeni --- .../devicetree/bindings/net/thead,th1520-gmac.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml index 6d9de3303762..b3492a9aa4ef 100644 --- a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml +++ b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml @@ -62,11 +62,13 @@ properties: items: - description: GMAC main clock - description: Peripheral registers interface clock + - description: APB glue registers interface clock clock-names: items: - const: stmmaceth - const: pclk + - const: apb interrupts: items: @@ -88,8 +90,8 @@ examples: compatible = "thead,th1520-gmac", "snps,dwmac-3.70a"; reg = <0xe7070000 0x2000>, <0xec003000 0x1000>; reg-names = "dwmac", "apb"; - clocks = <&clk 1>, <&clk 2>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk 1>, <&clk 2>, <&clk 3>; + clock-names = "stmmaceth", "pclk", "apb"; interrupts = <66>; interrupt-names = "macirq"; phy-mode = "rgmii-id"; From 4cc339ce482ba78589a2d5cbe1c84b735d263383 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:55 +0000 Subject: [PATCH 0198/1307] net: stmmac: thead: Get and enable APB clock on initialization It's necessary to adjust the MAC TX clock when the linkspeed changes, but it's noted such adjustment always fails on TH1520 SoC, and reading back from APB glue registers that control clock generation results in garbage, causing broken link. With some testing, it's found a clock must be ungated for access to APB glue registers. Without any consumer, the clock is automatically disabled during late kernel startup. Let's get and enable it if it's described in devicetree. For backward compatibility with older devicetrees, probing won't fail if the APB clock isn't found. In this case, we emit a warning since the link will break if the speed changes. Fixes: 33a1a01e3afa ("net: stmmac: Add glue layer for T-HEAD TH1520 SoC") Signed-off-by: Yao Zi Tested-by: Drew Fustini Reviewed-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-4-ziyao@disroot.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c index c72ee759aae5..f2946bea0bc2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c @@ -211,6 +211,7 @@ static int thead_dwmac_probe(struct platform_device *pdev) struct stmmac_resources stmmac_res; struct plat_stmmacenet_data *plat; struct thead_dwmac *dwmac; + struct clk *apb_clk; void __iomem *apb; int ret; @@ -224,6 +225,19 @@ static int thead_dwmac_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(plat), "dt configuration failed\n"); + /* + * The APB clock is essential for accessing glue registers. However, + * old devicetrees don't describe it correctly. We continue to probe + * and emit a warning if it isn't present. + */ + apb_clk = devm_clk_get_enabled(&pdev->dev, "apb"); + if (PTR_ERR(apb_clk) == -ENOENT) + dev_warn(&pdev->dev, + "cannot get apb clock, link may break after speed changes\n"); + else if (IS_ERR(apb_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(apb_clk), + "failed to get apb clock\n"); + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); if (!dwmac) return -ENOMEM; From a7f75e2883c4bd57b12c3be61bb926929adad9c0 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:56 +0000 Subject: [PATCH 0199/1307] riscv: dts: thead: Add APB clocks for TH1520 GMACs Describe perisys-apb4-hclk as the APB clock for TH1520 SoC, which is essential for accessing GMAC glue registers. Fixes: 7e756671a664 ("riscv: dts: thead: Add TH1520 ethernet nodes") Signed-off-by: Yao Zi Reviewed-by: Drew Fustini Tested-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-5-ziyao@disroot.org Signed-off-by: Paolo Abeni --- arch/riscv/boot/dts/thead/th1520.dtsi | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/riscv/boot/dts/thead/th1520.dtsi b/arch/riscv/boot/dts/thead/th1520.dtsi index 42724bf7e90e..03f1d7319049 100644 --- a/arch/riscv/boot/dts/thead/th1520.dtsi +++ b/arch/riscv/boot/dts/thead/th1520.dtsi @@ -297,8 +297,9 @@ gmac1: ethernet@ffe7060000 { reg-names = "dwmac", "apb"; interrupts = <67 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; - clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC1>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC1>, + <&clk CLK_PERISYS_APB4_HCLK>; + clock-names = "stmmaceth", "pclk", "apb"; snps,pbl = <32>; snps,fixed-burst; snps,multicast-filter-bins = <64>; @@ -319,8 +320,9 @@ gmac0: ethernet@ffe7070000 { reg-names = "dwmac", "apb"; interrupts = <66 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; - clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC0>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC0>, + <&clk CLK_PERISYS_APB4_HCLK>; + clock-names = "stmmaceth", "pclk", "apb"; snps,pbl = <32>; snps,fixed-burst; snps,multicast-filter-bins = <64>; From e93f7af148222303c4632318536c0f649b4ee5b1 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Fri, 8 Aug 2025 11:57:56 -0700 Subject: [PATCH 0200/1307] docs: Fix name for net.ipv4.udp_child_hash_entries udp_child_ehash_entries -> udp_child_hash_entries Fixes: 9804985bf27f ("udp: Introduce optional per-netns hash table.") Signed-off-by: Jordan Rife Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250808185800.1189042-1-jordan@jrife.io Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bb620f554598..9756d16e3df1 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1420,7 +1420,7 @@ udp_hash_entries - INTEGER A negative value means the networking namespace does not own its hash buckets and shares the initial networking namespace's one. -udp_child_ehash_entries - INTEGER +udp_child_hash_entries - INTEGER Control the number of hash buckets for UDP sockets in the child networking namespace, which must be set before clone() or unshare(). From dcb82900b12f5809e66835918d4043284ce1d39c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 10 Aug 2025 23:41:05 +0200 Subject: [PATCH 0201/1307] ASoC: codecs: Call strscpy() with correct size argument In aw8xxxx_profile_info(), strscpy() is called with the length of the source string "null" rather than the size of the destination buffer. This is fine as long as the destination buffer is larger than the source string, but we should still use the destination buffer size instead to call strscpy() as intended. And since 'name' points to the fixed-size buffer 'uinfo->value.enumerated.name', we can safely omit the size argument and let strscpy() infer it using sizeof() and remove 'name'. Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250810214144.1985-2-thorsten.blum@linux.dev Signed-off-by: Mark Brown --- sound/soc/codecs/aw87390.c | 8 +++----- sound/soc/codecs/aw88081.c | 5 ++--- sound/soc/codecs/aw88166.c | 8 +++----- sound/soc/codecs/aw88261.c | 8 +++----- sound/soc/codecs/aw88395/aw88395.c | 8 +++----- sound/soc/codecs/aw88399.c | 8 +++----- 6 files changed, 17 insertions(+), 28 deletions(-) diff --git a/sound/soc/codecs/aw87390.c b/sound/soc/codecs/aw87390.c index 110009616966..ef6f64856988 100644 --- a/sound/soc/codecs/aw87390.c +++ b/sound/soc/codecs/aw87390.c @@ -177,7 +177,7 @@ static int aw87390_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw87390 *aw87390 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -194,17 +194,15 @@ static int aw87390_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw87390_dev_get_prof_name(aw87390->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88081.c b/sound/soc/codecs/aw88081.c index 3dd8428f08cc..d61a7b8c5470 100644 --- a/sound/soc/codecs/aw88081.c +++ b/sound/soc/codecs/aw88081.c @@ -914,12 +914,11 @@ static int aw88081_profile_info(struct snd_kcontrol *kcontrol, ret = aw88081_dev_get_prof_name(aw88081->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(uinfo->value.enumerated.name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88166.c b/sound/soc/codecs/aw88166.c index 4f76ebe11cc7..28f62b991ef2 100644 --- a/sound/soc/codecs/aw88166.c +++ b/sound/soc/codecs/aw88166.c @@ -1478,7 +1478,7 @@ static int aw88166_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw88166 *aw88166 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -1495,17 +1495,15 @@ static int aw88166_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw88166_dev_get_prof_name(aw88166->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88261.c b/sound/soc/codecs/aw88261.c index fb99871578c5..de11ae8dd9d9 100644 --- a/sound/soc/codecs/aw88261.c +++ b/sound/soc/codecs/aw88261.c @@ -819,7 +819,7 @@ static int aw88261_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw88261 *aw88261 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -836,17 +836,15 @@ static int aw88261_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw88261_dev_get_prof_name(aw88261->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88395/aw88395.c b/sound/soc/codecs/aw88395/aw88395.c index aea44a199b98..fb563b4c6971 100644 --- a/sound/soc/codecs/aw88395/aw88395.c +++ b/sound/soc/codecs/aw88395/aw88395.c @@ -175,7 +175,7 @@ static int aw88395_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw88395 *aw88395 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -192,17 +192,15 @@ static int aw88395_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw88395_dev_get_prof_name(aw88395->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c index c23e70d64d0c..58846feb013d 100644 --- a/sound/soc/codecs/aw88399.c +++ b/sound/soc/codecs/aw88399.c @@ -1831,7 +1831,7 @@ static int aw88399_profile_info(struct snd_kcontrol *kcontrol, { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct aw88399 *aw88399 = snd_soc_component_get_drvdata(codec); - char *prof_name, *name; + char *prof_name; int count, ret; uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -1848,17 +1848,15 @@ static int aw88399_profile_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item >= count) uinfo->value.enumerated.item = count - 1; - name = uinfo->value.enumerated.name; count = uinfo->value.enumerated.item; ret = aw88399_dev_get_prof_name(aw88399->aw_pa, count, &prof_name); if (ret) { - strscpy(uinfo->value.enumerated.name, "null", - strlen("null") + 1); + strscpy(uinfo->value.enumerated.name, "null"); return 0; } - strscpy(name, prof_name, sizeof(uinfo->value.enumerated.name)); + strscpy(uinfo->value.enumerated.name, prof_name); return 0; } From d26a9f4f0a7745f0d5127344379a62007df68dcd Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 22 Jul 2025 20:38:41 +0200 Subject: [PATCH 0202/1307] platform/x86: dell-smbios-wmi: Stop touching WMI device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Dell SMBIOS driver uses the "id" field inside struct device for prioritizing the WMI backend over the SMM backend. Because of this the WMI backend modifies the "id" field of the underlying WMI device. However the WMI core itself uses wdev->dev.id internally to track device IDs, so modifying this value will result in a resource leak. Fix this by not using the "id" field inside struct device for SMBIOS prioritization. Instead extend struct smbios_device with a separate "priority" field. Tested on a Dell Inspiron 3505. Fixes: 73f0f2b52c5e ("platform/x86: wmi: Fix WMI device naming issue") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20250722183841.9552-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell-smbios-base.c | 19 +++++++++---------- drivers/platform/x86/dell/dell-smbios-smm.c | 3 +-- drivers/platform/x86/dell/dell-smbios-wmi.c | 4 +--- drivers/platform/x86/dell/dell-smbios.h | 2 +- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c index 01c72b91a50d..444786102f02 100644 --- a/drivers/platform/x86/dell/dell-smbios-base.c +++ b/drivers/platform/x86/dell/dell-smbios-base.c @@ -39,6 +39,7 @@ struct token_sysfs_data { struct smbios_device { struct list_head list; struct device *device; + int priority; int (*call_fn)(struct calling_interface_buffer *arg); }; @@ -145,7 +146,7 @@ int dell_smbios_error(int value) } EXPORT_SYMBOL_GPL(dell_smbios_error); -int dell_smbios_register_device(struct device *d, void *call_fn) +int dell_smbios_register_device(struct device *d, int priority, void *call_fn) { struct smbios_device *priv; @@ -154,6 +155,7 @@ int dell_smbios_register_device(struct device *d, void *call_fn) return -ENOMEM; get_device(d); priv->device = d; + priv->priority = priority; priv->call_fn = call_fn; mutex_lock(&smbios_mutex); list_add_tail(&priv->list, &smbios_device_list); @@ -292,28 +294,25 @@ EXPORT_SYMBOL_GPL(dell_smbios_call_filter); int dell_smbios_call(struct calling_interface_buffer *buffer) { - int (*call_fn)(struct calling_interface_buffer *) = NULL; - struct device *selected_dev = NULL; + struct smbios_device *selected = NULL; struct smbios_device *priv; int ret; mutex_lock(&smbios_mutex); list_for_each_entry(priv, &smbios_device_list, list) { - if (!selected_dev || priv->device->id >= selected_dev->id) { - dev_dbg(priv->device, "Trying device ID: %d\n", - priv->device->id); - call_fn = priv->call_fn; - selected_dev = priv->device; + if (!selected || priv->priority >= selected->priority) { + dev_dbg(priv->device, "Trying device ID: %d\n", priv->priority); + selected = priv; } } - if (!selected_dev) { + if (!selected) { ret = -ENODEV; pr_err("No dell-smbios drivers are loaded\n"); goto out_smbios_call; } - ret = call_fn(buffer); + ret = selected->call_fn(buffer); out_smbios_call: mutex_unlock(&smbios_mutex); diff --git a/drivers/platform/x86/dell/dell-smbios-smm.c b/drivers/platform/x86/dell/dell-smbios-smm.c index 4d375985c85f..7055e2c40f34 100644 --- a/drivers/platform/x86/dell/dell-smbios-smm.c +++ b/drivers/platform/x86/dell/dell-smbios-smm.c @@ -125,8 +125,7 @@ int init_dell_smbios_smm(void) if (ret) goto fail_platform_device_add; - ret = dell_smbios_register_device(&platform_device->dev, - &dell_smbios_smm_call); + ret = dell_smbios_register_device(&platform_device->dev, 0, &dell_smbios_smm_call); if (ret) goto fail_register; diff --git a/drivers/platform/x86/dell/dell-smbios-wmi.c b/drivers/platform/x86/dell/dell-smbios-wmi.c index ae9012549560..a7dca8c59d60 100644 --- a/drivers/platform/x86/dell/dell-smbios-wmi.c +++ b/drivers/platform/x86/dell/dell-smbios-wmi.c @@ -264,9 +264,7 @@ static int dell_smbios_wmi_probe(struct wmi_device *wdev, const void *context) if (ret) return ret; - /* ID is used by dell-smbios to set priority of drivers */ - wdev->dev.id = 1; - ret = dell_smbios_register_device(&wdev->dev, &dell_smbios_wmi_call); + ret = dell_smbios_register_device(&wdev->dev, 1, &dell_smbios_wmi_call); if (ret) return ret; diff --git a/drivers/platform/x86/dell/dell-smbios.h b/drivers/platform/x86/dell/dell-smbios.h index 77baa15eb523..f421b8533a9e 100644 --- a/drivers/platform/x86/dell/dell-smbios.h +++ b/drivers/platform/x86/dell/dell-smbios.h @@ -64,7 +64,7 @@ struct calling_interface_structure { struct calling_interface_token tokens[]; } __packed; -int dell_smbios_register_device(struct device *d, void *call_fn); +int dell_smbios_register_device(struct device *d, int priority, void *call_fn); void dell_smbios_unregister_device(struct device *d); int dell_smbios_error(int value); From 5b9e07551faa7bb2f26cb039cc6e8d00bc4d0831 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 24 Jul 2025 13:51:08 -0500 Subject: [PATCH 0203/1307] platform/x86/amd: pmc: Drop SMU F/W match for Cezanne MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chris reported that even on a BIOS that has a new enough SMU F/W version there is still a spurious IRQ1. Although the solution was added to SMU F/W 64.66.0 it turns out there needs to be a matching SBIOS change to activate it. Thus Linux shouldn't be avoiding the IRQ1 workaround on newer SMU F/W because there is no indication the BIOS change is in place. Drop the match for 64.66.0+ and instead match all RN/CZN/BRC (they all share same SMU F/W). Adjust the quirk infrastructure to allow quirking the workaround on or off and also adjust existing quirks to match properly. Unfortunately this may cause some systems that did have the SBIOS change in place to regress in keyboard wakeup but we don't have a way to know. If a user reports a keyboard wakeup regression they can run with amd_pmc.disable_workarounds=1 to deactivate the workaround and share DMI data so that their system can be quirked not to use the workaround in the upstream kernel. Reported-by: Chris Bainbridge Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4449 Tested-by: Chris Bainbridge Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20250724185156.1827592-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc-quirks.c | 54 ++++++++++++++--------- drivers/platform/x86/amd/pmc/pmc.c | 13 ------ 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index ded4c84f5ed1..7ffc659b2794 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -28,10 +28,15 @@ static struct quirk_entry quirk_spurious_8042 = { .spurious_8042 = true, }; +static struct quirk_entry quirk_s2idle_spurious_8042 = { + .s2idle_bug_mmio = FCH_PM_BASE + FCH_PM_SCRATCH, + .spurious_8042 = true, +}; + static const struct dmi_system_id fwbug_list[] = { { .ident = "L14 Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20X5"), @@ -39,7 +44,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14s Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20XF"), @@ -47,7 +52,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "X13 Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20XH"), @@ -55,7 +60,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14 Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20XK"), @@ -63,7 +68,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14 Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20UD"), @@ -71,7 +76,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14 Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20UE"), @@ -79,7 +84,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14s Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20UH"), @@ -87,7 +92,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "T14s Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20UJ"), @@ -95,7 +100,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "P14s Gen1 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"), @@ -103,7 +108,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "P14s Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21A0"), @@ -111,7 +116,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "P14s Gen2 AMD", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21A1"), @@ -152,7 +157,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad 1 14AMN7", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82VF"), @@ -160,7 +165,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad 1 15AMN7", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82VG"), @@ -168,7 +173,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad 1 15AMN7", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82X5"), @@ -176,7 +181,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad Slim 3 14AMN8", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82XN"), @@ -184,7 +189,7 @@ static const struct dmi_system_id fwbug_list[] = { }, { .ident = "IdeaPad Slim 3 15AMN8", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82XQ"), @@ -193,7 +198,7 @@ static const struct dmi_system_id fwbug_list[] = { /* https://gitlab.freedesktop.org/drm/amd/-/issues/4434 */ { .ident = "Lenovo Yoga 6 13ALC6", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82ND"), @@ -202,7 +207,7 @@ static const struct dmi_system_id fwbug_list[] = { /* https://gitlab.freedesktop.org/drm/amd/-/issues/2684 */ { .ident = "HP Laptop 15s-eq2xxx", - .driver_data = &quirk_s2idle_bug, + .driver_data = &quirk_s2idle_spurious_8042, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15s-eq2xxx"), @@ -285,6 +290,16 @@ void amd_pmc_quirks_init(struct amd_pmc_dev *dev) { const struct dmi_system_id *dmi_id; + /* + * IRQ1 may cause an interrupt during resume even without a keyboard + * press. + * + * Affects Renoir, Cezanne and Barcelo SoCs + * + * A solution is available in PMFW 64.66.0, but it must be activated by + * SBIOS. If SBIOS is known to have the fix a quirk can be added for + * a given system to avoid workaround. + */ if (dev->cpu_id == AMD_CPU_ID_CZN) dev->disable_8042_wakeup = true; @@ -295,6 +310,5 @@ void amd_pmc_quirks_init(struct amd_pmc_dev *dev) if (dev->quirks->s2idle_bug_mmio) pr_info("Using s2idle quirk to avoid %s platform firmware bug\n", dmi_id->ident); - if (dev->quirks->spurious_8042) - dev->disable_8042_wakeup = true; + dev->disable_8042_wakeup = dev->quirks->spurious_8042; } diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index 0b9b23eb7c2c..bd318fd02ccf 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -530,19 +530,6 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) static int amd_pmc_wa_irq1(struct amd_pmc_dev *pdev) { struct device *d; - int rc; - - /* cezanne platform firmware has a fix in 64.66.0 */ - if (pdev->cpu_id == AMD_CPU_ID_CZN) { - if (!pdev->major) { - rc = amd_pmc_get_smu_version(pdev); - if (rc) - return rc; - } - - if (pdev->major > 64 || (pdev->major == 64 && pdev->minor > 65)) - return 0; - } d = bus_find_device_by_name(&serio_bus, NULL, "serio0"); if (!d) From dff6f36878799a5ffabd15336ce993dc737374dc Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 27 Jul 2025 14:05:13 -0700 Subject: [PATCH 0204/1307] platform/x86/intel-uncore-freq: Check write blocked for ELC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing write_blocked check for updating sysfs related to uncore efficiency latency control (ELC). If write operation is blocked return error. Fixes: bb516dc79c4a ("platform/x86/intel-uncore-freq: Add support for efficiency latency control") Signed-off-by: Srinivas Pandruvada Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250727210513.2898630-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index 6df55c8e16b7..bfcf92aa4d69 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -192,9 +192,14 @@ static int uncore_read_control_freq(struct uncore_data *data, unsigned int *valu static int write_eff_lat_ctrl(struct uncore_data *data, unsigned int val, enum uncore_index index) { struct tpmi_uncore_cluster_info *cluster_info; + struct tpmi_uncore_struct *uncore_root; u64 control; cluster_info = container_of(data, struct tpmi_uncore_cluster_info, uncore_data); + uncore_root = cluster_info->uncore_root; + + if (uncore_root->write_blocked) + return -EPERM; if (cluster_info->root_domain) return -ENODATA; From 2c78fb287e1f430b929f2e49786518350d15605c Mon Sep 17 00:00:00 2001 From: Suma Hegde Date: Thu, 7 Aug 2025 10:06:37 +0000 Subject: [PATCH 0205/1307] platform/x86/amd/hsmp: Ensure sock->metric_tbl_addr is non-NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If metric table address is not allocated, accessing metrics_bin will result in a NULL pointer dereference, so add a check. Fixes: 5150542b8ec5 ("platform/x86/amd/hsmp: add support for metrics tbl") Signed-off-by: Suma Hegde Link: https://lore.kernel.org/r/20250807100637.952729-1-suma.hegde@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/hsmp/hsmp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 885e2f8136fd..19f82c1d3090 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -356,6 +356,11 @@ ssize_t hsmp_metric_tbl_read(struct hsmp_socket *sock, char *buf, size_t size) if (!sock || !buf) return -EINVAL; + if (!sock->metric_tbl_addr) { + dev_err(sock->dev, "Metrics table address not available\n"); + return -ENOMEM; + } + /* Do not support lseek(), also don't allow more than the size of metric table */ if (size != sizeof(struct hsmp_metric_table)) { dev_err(sock->dev, "Wrong buffer size\n"); From de5cec220e4d45d7129e76f7d985c7b01f10f8d9 Mon Sep 17 00:00:00 2001 From: Suma Hegde Date: Mon, 4 Aug 2025 10:15:51 +0000 Subject: [PATCH 0206/1307] platform/x86/amd/hsmp: Ensure success even if hwmon registration fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if hwmon registration fails, HSMP remains accessible through the device file, so the operation should return success. Signed-off-by: Suma Hegde Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250804101551.89866-1-suma.hegde@amd.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/hsmp/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c index 54986a752f7d..a94009203e01 100644 --- a/drivers/platform/x86/amd/hsmp/acpi.c +++ b/drivers/platform/x86/amd/hsmp/acpi.c @@ -504,7 +504,7 @@ static int init_acpi(struct device *dev) dev_set_drvdata(dev, &hsmp_pdev->sock[sock_ind]); - return ret; + return 0; } static const struct bin_attribute hsmp_metric_tbl_attr = { From 748f897511446c7578ca5f6d2ff099916bad6e28 Mon Sep 17 00:00:00 2001 From: Edip Hazuri Date: Mon, 28 Jul 2025 14:58:06 +0300 Subject: [PATCH 0207/1307] platform/x86: hp-wmi: mark Victus 16-r1xxx for victus_s fan and thermal profile support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds Victus 16-r1xxx laptop DMI board name into existing list. Tested on 16-r1077nt and works without any problem. Signed-off-by: Edip Hazuri Link: https://lore.kernel.org/r/20250728115805.20954-2-edip@medip.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index db5fdee2109c..60c8ac8d902c 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -92,9 +92,9 @@ static const char * const victus_thermal_profile_boards[] = { "8A25" }; -/* DMI Board names of Victus 16-s1000 laptops */ +/* DMI Board names of Victus 16-r1000 and Victus 16-s1000 laptops */ static const char * const victus_s_thermal_profile_boards[] = { - "8C9C" + "8C99", "8C9C" }; enum hp_wmi_radio { From bda053d6445717f8a4cd76f88caea2e39299fe07 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:03 -0700 Subject: [PATCH 0208/1307] selftests: drv-net: don't assume device has only 2 queues The test is implicitly assuming the device only has 2 queues. A real device will likely have more. The exact problem is that because NAPIs get added to the list from the head, the netlink dump reports them in reverse order. So the naive napis[0] will actually likely give us the _last_ NAPI, not the first one. Re-enable all the NAPIs instead of hard-coding 2 in the test. This way the NAPIs we operated on will always reappear, doesn't matter where they were in the registration order. Fixes: e6d76268813d ("net: Update threaded state in napi config in netif_set_threaded") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/napi_threaded.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index b2698db39817..9699a100a87d 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -35,6 +35,8 @@ def _setup_deferred_cleanup(cfg) -> None: threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout defer(_set_threaded_state, cfg, threaded) + return combined + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ @@ -49,7 +51,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -62,7 +64,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") _assert_napi_threaded_enabled(nl, napi0_id) _assert_napi_threaded_disabled(nl, napi1_id) @@ -80,7 +82,7 @@ def change_num_queues(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -90,7 +92,7 @@ def change_num_queues(cfg, nl) -> None: _assert_napi_threaded_enabled(nl, napi1_id) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") # check napi threaded is set for both napis _assert_napi_threaded_enabled(nl, napi0_id) From ccba9f6baa900e31ad1a4c36e6f3c176694f9eac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:04 -0700 Subject: [PATCH 0209/1307] net: update NAPI threaded config even for disabled NAPIs We have to make sure that all future NAPIs will have the right threaded state when the state is configured on the device level. We chose not to have an "unset" state for threaded, and not to wipe the NAPI config clean when channels are explicitly disabled. This means the persistent config structs "exist" even when their NAPIs are not instantiated. Differently put - the NAPI persistent state lives in the net_device (ncfg == struct napi_config): ,--- [napi 0] - [napi 1] [dev] | | `--- [ncfg 0] - [ncfg 1] so say we a device with 2 queues but only 1 enabled: ,--- [napi 0] [dev] | `--- [ncfg 0] - [ncfg 1] now we set the device to threaded=1: ,---------- [napi 0 (thr:1)] [dev(thr:1)] | `---------- [ncfg 0 (thr:1)] - [ncfg 1 (thr:?)] Since [ncfg 1] was not attached to a NAPI during configuration we skipped it. If we create a NAPI for it later it will have the old setting (presumably disabled). One could argue if this is right or not "in principle", but it's definitely not how things worked before per-NAPI config.. Fixes: 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-3-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 5 ++++- net/core/dev.c | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e5de4b0a433..f3a3b761abfb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2071,6 +2071,8 @@ enum netdev_reg_state { * @max_pacing_offload_horizon: max EDT offload horizon in nsec. * @napi_config: An array of napi_config structures containing per-NAPI * settings. + * @num_napi_configs: number of allocated NAPI config structs, + * always >= max(num_rx_queues, num_tx_queues). * @gro_flush_timeout: timeout for GRO layer in NAPI * @napi_defer_hard_irqs: If not zero, provides a counter that would * allow to avoid NIC hard IRQ, on busy queues. @@ -2482,8 +2484,9 @@ struct net_device { u64 max_pacing_offload_horizon; struct napi_config *napi_config; - unsigned long gro_flush_timeout; + u32 num_napi_configs; u32 napi_defer_hard_irqs; + unsigned long gro_flush_timeout; /** * @up: copy of @state's IFF_UP, but safe to read with just @lock. diff --git a/net/core/dev.c b/net/core/dev.c index 68dc47d7e700..f180746382a1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6999,7 +6999,7 @@ int netif_set_threaded(struct net_device *dev, enum netdev_napi_threaded threaded) { struct napi_struct *napi; - int err = 0; + int i, err = 0; netdev_assert_locked_or_invisible(dev); @@ -7021,6 +7021,10 @@ int netif_set_threaded(struct net_device *dev, list_for_each_entry(napi, &dev->napi_list, dev_list) WARN_ON_ONCE(napi_set_threaded(napi, threaded)); + /* Override the config for all NAPIs even if currently not listed */ + for (i = 0; i < dev->num_napi_configs; i++) + dev->napi_config[i].threaded = threaded; + return err; } @@ -11873,6 +11877,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, goto free_all; dev->cfg_pending = dev->cfg; + dev->num_napi_configs = maxqs; napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); if (!dev->napi_config) From b3fc08ab9a565efb42fe08be046a0d203b82cdb8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:05 -0700 Subject: [PATCH 0210/1307] net: prevent deadlocks when enabling NAPIs with mixed kthread config The following order of calls currently deadlocks if: - device has threaded=1; and - NAPI has persistent config with threaded=0. netif_napi_add_weight_config() dev->threaded == 1 napi_kthread_create() napi_enable() napi_restore_config() napi_set_threaded(0) napi_stop_kthread() while (NAPIF_STATE_SCHED) msleep(20) We deadlock because disabled NAPI has STATE_SCHED set. Creating a thread in netif_napi_add() just to destroy it in napi_disable() is fairly ugly in the first place. Let's read both the device config and the NAPI config in netif_napi_add(). Fixes: e6d76268813d ("net: Update threaded state in napi config in netif_set_threaded") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-4-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/core/dev.c | 5 +++-- net/core/dev.h | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index f180746382a1..5a3c0f40a93f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7357,8 +7357,9 @@ void netif_napi_add_weight_locked(struct net_device *dev, * Clear dev->threaded if kthread creation failed so that * threaded mode will not be enabled in napi_enable(). */ - if (dev->threaded && napi_kthread_create(napi)) - dev->threaded = NETDEV_NAPI_THREADED_DISABLED; + if (napi_get_threaded_config(dev, napi)) + if (napi_kthread_create(napi)) + dev->threaded = NETDEV_NAPI_THREADED_DISABLED; netif_napi_set_irq_locked(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight_locked); diff --git a/net/core/dev.h b/net/core/dev.h index ab69edc0c3e3..d6b08d435479 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -323,6 +323,14 @@ static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n) return NETDEV_NAPI_THREADED_DISABLED; } +static inline enum netdev_napi_threaded +napi_get_threaded_config(struct net_device *dev, struct napi_struct *n) +{ + if (n->config) + return n->config->threaded; + return dev->threaded; +} + int napi_set_threaded(struct napi_struct *n, enum netdev_napi_threaded threaded); From dfbd535db74df0343ca39670e06326d7aee8c8f4 Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 23 Jul 2025 09:31:32 +0800 Subject: [PATCH 0211/1307] HID: intel-ish-hid: Increase ISHTP resume ack timeout to 300ms During s2idle suspend/resume testing on some systems, occasional several tens of seconds delays were observed in HID sensor resume handling. Trace analysis revealed repeated "link not ready" timeout errors during set/get_report operations, which were traced to the hid_ishtp_cl_resume_handler() timing out while waiting for the ISHTP resume acknowledgment. The previous timeout was set to 50ms, which proved insufficient on affected machines. Empirical measurements on failing systems showed that the time from ISH resume initiation to receiving the ISHTP resume ack could be as long as 180ms. As a result, the 50ms timeout caused failures. To address this, increase the wait timeout for ISHTP resume ack from 50ms to 300ms, providing a safer margin for slower hardware. Additionally, add error logging when a timeout occurs to aid future debugging and issue triage. No functional changes are made beyond the timeout adjustment and improved error reporting. Signed-off-by: Zhang Lixu Reviewed-by: Andy Shevchenko Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/pci-ish.c | 3 --- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 3 +++ drivers/hid/intel-ish-hid/ishtp/bus.c | 3 --- drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h | 3 +++ 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index c57483224db6..9d150ce234f2 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -264,9 +264,6 @@ static void ish_shutdown(struct pci_dev *pdev) static struct device __maybe_unused *ish_resume_device; -/* 50ms to get resume response */ -#define WAIT_FOR_RESUME_ACK_MS 50 - /** * ish_resume_handler() - Work function to complete resume * @work: work struct diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index 6550ad5bfbb5..d8c3c54a8c0f 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -759,6 +759,9 @@ static void hid_ishtp_cl_resume_handler(struct work_struct *work) if (ishtp_wait_resume(ishtp_get_ishtp_device(hid_ishtp_cl))) { client_data->suspended = false; wake_up_interruptible(&client_data->ishtp_resume_wait); + } else { + hid_ishtp_trace(client_data, "hid client: wait for resume timed out"); + dev_err(cl_data_to_dev(client_data), "wait for resume timed out"); } } diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index 5ac7d70a7c84..93a0432e7058 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -852,9 +852,6 @@ EXPORT_SYMBOL(ishtp_device); */ bool ishtp_wait_resume(struct ishtp_device *dev) { - /* 50ms to get resume response */ - #define WAIT_FOR_RESUME_ACK_MS 50 - /* Waiting to get resume response */ if (dev->resume_flag) wait_event_interruptible_timeout(dev->resume_wait, diff --git a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h index ec9f6e87aaf2..23db97ecf21c 100644 --- a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h +++ b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h @@ -47,6 +47,9 @@ #define MAX_DMA_DELAY 20 +/* 300ms to get resume response */ +#define WAIT_FOR_RESUME_ACK_MS 300 + /* ISHTP device states */ enum ishtp_dev_state { ISHTP_DEV_INITIALIZING = 0, From ac143d499479ca33b6f8d16395c32394089979cf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Jul 2025 12:11:18 +0100 Subject: [PATCH 0212/1307] HID: Kconfig: Fix spelling mistake "enthropy" -> "entropy" There is a spelling mistake in the HID_U2FZERO description. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index a57901203aeb..79997553d8f9 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -1243,7 +1243,7 @@ config HID_U2FZERO U2F Zero supports custom commands for blinking the LED and getting data from the internal hardware RNG. - The internal hardware can be used to feed the enthropy pool. + The internal hardware can be used to feed the entropy pool. U2F Zero only supports blinking its LED, so this driver doesn't allow setting the brightness to anything but 1, which will From 0379eb8691b9c4477da0277ae0832036ca4410b4 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Sun, 10 Aug 2025 19:09:24 +0100 Subject: [PATCH 0213/1307] HID: multitouch: fix slab out-of-bounds access in mt_report_fixup() A malicious HID device can trigger a slab out-of-bounds during mt_report_fixup() by passing in report descriptor smaller than 607 bytes. mt_report_fixup() attempts to patch byte offset 607 of the descriptor with 0x25 by first checking if byte offset 607 is 0x15 however it lacks bounds checks to verify if the descriptor is big enough before conducting this check. Fix this bug by ensuring the descriptor size is at least 608 bytes before accessing it. Below is the KASAN splat after the out of bounds access happens: [ 13.671954] ================================================================== [ 13.672667] BUG: KASAN: slab-out-of-bounds in mt_report_fixup+0x103/0x110 [ 13.673297] Read of size 1 at addr ffff888103df39df by task kworker/0:1/10 [ 13.673297] [ 13.673297] CPU: 0 UID: 0 PID: 10 Comm: kworker/0:1 Not tainted 6.15.0-00005-gec5d573d83f4-dirty #3 [ 13.673297] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/04 [ 13.673297] Call Trace: [ 13.673297] [ 13.673297] dump_stack_lvl+0x5f/0x80 [ 13.673297] print_report+0xd1/0x660 [ 13.673297] kasan_report+0xe5/0x120 [ 13.673297] __asan_report_load1_noabort+0x18/0x20 [ 13.673297] mt_report_fixup+0x103/0x110 [ 13.673297] hid_open_report+0x1ef/0x810 [ 13.673297] mt_probe+0x422/0x960 [ 13.673297] hid_device_probe+0x2e2/0x6f0 [ 13.673297] really_probe+0x1c6/0x6b0 [ 13.673297] __driver_probe_device+0x24f/0x310 [ 13.673297] driver_probe_device+0x4e/0x220 [ 13.673297] __device_attach_driver+0x169/0x320 [ 13.673297] bus_for_each_drv+0x11d/0x1b0 [ 13.673297] __device_attach+0x1b8/0x3e0 [ 13.673297] device_initial_probe+0x12/0x20 [ 13.673297] bus_probe_device+0x13d/0x180 [ 13.673297] device_add+0xe3a/0x1670 [ 13.673297] hid_add_device+0x31d/0xa40 [...] Fixes: c8000deb6836 ("HID: multitouch: Add support for GT7868Q") Cc: stable@vger.kernel.org Signed-off-by: Qasim Ijaz Reviewed-by: Jiri Slaby Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 294516a8f541..22c6314a8843 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1503,6 +1503,14 @@ static const __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, if (hdev->vendor == I2C_VENDOR_ID_GOODIX && (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { + if (*size < 608) { + dev_info( + &hdev->dev, + "GT7868Q fixup: report descriptor is only %u bytes, skipping\n", + *size); + return rdesc; + } + if (rdesc[607] == 0x15) { rdesc[607] = 0x25; dev_info( From 9fc51941d9e7793da969b2c66e6f8213c5b1237f Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Sun, 10 Aug 2025 22:40:30 -0700 Subject: [PATCH 0214/1307] HID: wacom: Add a new Art Pen 2 Signed-off-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 955b39d22524..9b2c710f8da1 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -684,6 +684,7 @@ static bool wacom_is_art_pen(int tool_id) case 0x885: /* Intuos3 Marker Pen */ case 0x804: /* Intuos4/5 13HD/24HD Marker Pen */ case 0x10804: /* Intuos4/5 13HD/24HD Art Pen */ + case 0x204: /* Art Pen 2 */ is_art_pen = true; break; } From 82b3644d3deab496cc09f29f3449ede6824b3e8e Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 22 Jul 2025 16:59:59 +0200 Subject: [PATCH 0215/1307] device: rust: expand documentation for DeviceContext Expand the documentation around DeviceContext states and types, in order to provide detailed information about their purpose and relationship with each other. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alexandre Courbot Reviewed-by: Alice Ryhl Reviewed-by: Daniel Almeida Link: https://lore.kernel.org/r/20250722150110.23565-2-dakr@kernel.org [ Fix two minor typos. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 69 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index b8613289de8e..fe095a8eccb1 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -311,28 +311,75 @@ unsafe impl Send for Device {} // synchronization in `struct device`. unsafe impl Sync for Device {} -/// Marker trait for the context of a bus specific device. +/// Marker trait for the context or scope of a bus specific device. /// -/// Some functions of a bus specific device should only be called from a certain context, i.e. bus -/// callbacks, such as `probe()`. +/// [`DeviceContext`] is a marker trait for types representing the context of a bus specific +/// [`Device`]. /// -/// This is the marker trait for structures representing the context of a bus specific device. +/// The specific device context types are: [`CoreInternal`], [`Core`], [`Bound`] and [`Normal`]. +/// +/// [`DeviceContext`] types are hierarchical, which means that there is a strict hierarchy that +/// defines which [`DeviceContext`] type can be derived from another. For instance, any +/// [`Device`] can dereference to a [`Device`]. +/// +/// The following enumeration illustrates the dereference hierarchy of [`DeviceContext`] types. +/// +/// - [`CoreInternal`] => [`Core`] => [`Bound`] => [`Normal`] +/// +/// Bus devices can automatically implement the dereference hierarchy by using +/// [`impl_device_context_deref`]. +/// +/// Note that the guarantee for a [`Device`] reference to have a certain [`DeviceContext`] comes +/// from the specific scope the [`Device`] reference is valid in. +/// +/// [`impl_device_context_deref`]: kernel::impl_device_context_deref pub trait DeviceContext: private::Sealed {} -/// The [`Normal`] context is the context of a bus specific device when it is not an argument of -/// any bus callback. +/// The [`Normal`] context is the default [`DeviceContext`] of any [`Device`]. +/// +/// The normal context does not indicate any specific context. Any `Device` is also a valid +/// [`Device`]. It is the only [`DeviceContext`] for which it is valid to implement +/// [`AlwaysRefCounted`] for. +/// +/// [`AlwaysRefCounted`]: kernel::types::AlwaysRefCounted pub struct Normal; -/// The [`Core`] context is the context of a bus specific device when it is supplied as argument of -/// any of the bus callbacks, such as `probe()`. +/// The [`Core`] context is the context of a bus specific device when it appears as argument of +/// any bus specific callback, such as `probe()`. +/// +/// The core context indicates that the [`Device`] reference's scope is limited to the bus +/// callback it appears in. It is intended to be used for synchronization purposes. Bus device +/// implementations can implement methods for [`Device`], such that they can only be called +/// from bus callbacks. pub struct Core; -/// Semantically the same as [`Core`] but reserved for internal usage of the corresponding bus +/// Semantically the same as [`Core`], but reserved for internal usage of the corresponding bus /// abstraction. +/// +/// The internal core context is intended to be used in exactly the same way as the [`Core`] +/// context, with the difference that this [`DeviceContext`] is internal to the corresponding bus +/// abstraction. +/// +/// This context mainly exists to share generic [`Device`] infrastructure that should only be called +/// from bus callbacks with bus abstractions, but without making them accessible for drivers. pub struct CoreInternal; -/// The [`Bound`] context is the context of a bus specific device reference when it is guaranteed to -/// be bound for the duration of its lifetime. +/// The [`Bound`] context is the [`DeviceContext`] of a bus specific device when it is guaranteed to +/// be bound to a driver. +/// +/// The bound context indicates that for the entire duration of the lifetime of a [`Device`] +/// reference, the [`Device`] is guaranteed to be bound to a driver. +/// +/// Some APIs, such as [`dma::CoherentAllocation`] or [`Devres`] rely on the [`Device`] to be bound, +/// which can be proven with the [`Bound`] device context. +/// +/// Any abstraction that can guarantee a scope where the corresponding bus device is bound, should +/// provide a [`Device`] reference to its users for this scope. This allows users to benefit +/// from optimizations for accessing device resources, see also [`Devres::access`]. +/// +/// [`Devres`]: kernel::devres::Devres +/// [`Devres::access`]: kernel::devres::Devres::access +/// [`dma::CoherentAllocation`]: kernel::dma::CoherentAllocation pub struct Bound; mod private { From d6e26c1ae4a602d8b7eeb39e23514f6f98d91eb5 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 22 Jul 2025 17:00:00 +0200 Subject: [PATCH 0216/1307] device: rust: expand documentation for Device The documentation for the generic Device type is outdated and deserves much more detail. Hence, expand the documentation and cover topics such as device types, device contexts, as well as information on how to use the generic device infrastructure to implement bus and class specific device types. Reviewed-by: Daniel Almeida Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alexandre Courbot Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250722150110.23565-3-dakr@kernel.org [ Add empty line after code blocks, "in" -> "within", remove unnecessary pin annotations in class device example. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 139 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 126 insertions(+), 13 deletions(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index fe095a8eccb1..5902b3714a16 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -15,23 +15,130 @@ pub mod property; -/// A reference-counted device. +/// The core representation of a device in the kernel's driver model. /// -/// This structure represents the Rust abstraction for a C `struct device`. This implementation -/// abstracts the usage of an already existing C `struct device` within Rust code that we get -/// passed from the C side. +/// This structure represents the Rust abstraction for a C `struct device`. A [`Device`] can either +/// exist as temporary reference (see also [`Device::from_raw`]), which is only valid within a +/// certain scope or as [`ARef`], owning a dedicated reference count. /// -/// An instance of this abstraction can be obtained temporarily or permanent. +/// # Device Types /// -/// A temporary one is bound to the lifetime of the C `struct device` pointer used for creation. -/// A permanent instance is always reference-counted and hence not restricted by any lifetime -/// boundaries. +/// A [`Device`] can represent either a bus device or a class device. /// -/// For subsystems it is recommended to create a permanent instance to wrap into a subsystem -/// specific device structure (e.g. `pci::Device`). This is useful for passing it to drivers in -/// `T::probe()`, such that a driver can store the `ARef` (equivalent to storing a -/// `struct device` pointer in a C driver) for arbitrary purposes, e.g. allocating DMA coherent -/// memory. +/// ## Bus Devices +/// +/// A bus device is a [`Device`] that is associated with a physical or virtual bus. Examples of +/// buses include PCI, USB, I2C, and SPI. Devices attached to a bus are registered with a specific +/// bus type, which facilitates matching devices with appropriate drivers based on IDs or other +/// identifying information. Bus devices are visible in sysfs under `/sys/bus//devices/`. +/// +/// ## Class Devices +/// +/// A class device is a [`Device`] that is associated with a logical category of functionality +/// rather than a physical bus. Examples of classes include block devices, network interfaces, sound +/// cards, and input devices. Class devices are grouped under a common class and exposed to +/// userspace via entries in `/sys/class//`. +/// +/// # Device Context +/// +/// [`Device`] references are generic over a [`DeviceContext`], which represents the type state of +/// a [`Device`]. +/// +/// As the name indicates, this type state represents the context of the scope the [`Device`] +/// reference is valid in. For instance, the [`Bound`] context guarantees that the [`Device`] is +/// bound to a driver for the entire duration of the existence of a [`Device`] reference. +/// +/// Other [`DeviceContext`] types besides [`Bound`] are [`Normal`], [`Core`] and [`CoreInternal`]. +/// +/// Unless selected otherwise [`Device`] defaults to the [`Normal`] [`DeviceContext`], which by +/// itself has no additional requirements. +/// +/// It is always up to the caller of [`Device::from_raw`] to select the correct [`DeviceContext`] +/// type for the corresponding scope the [`Device`] reference is created in. +/// +/// All [`DeviceContext`] types other than [`Normal`] are intended to be used with +/// [bus devices](#bus-devices) only. +/// +/// # Implementing Bus Devices +/// +/// This section provides a guideline to implement bus specific devices, such as [`pci::Device`] or +/// [`platform::Device`]. +/// +/// A bus specific device should be defined as follows. +/// +/// ```ignore +/// #[repr(transparent)] +/// pub struct Device( +/// Opaque, +/// PhantomData, +/// ); +/// ``` +/// +/// Since devices are reference counted, [`AlwaysRefCounted`] should be implemented for `Device` +/// (i.e. `Device`). Note that [`AlwaysRefCounted`] must not be implemented for any other +/// [`DeviceContext`], since all other device context types are only valid within a certain scope. +/// +/// In order to be able to implement the [`DeviceContext`] dereference hierarchy, bus device +/// implementations should call the [`impl_device_context_deref`] macro as shown below. +/// +/// ```ignore +/// // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s +/// // generic argument. +/// kernel::impl_device_context_deref!(unsafe { Device }); +/// ``` +/// +/// In order to convert from a any [`Device`] to [`ARef`], bus devices can implement +/// the following macro call. +/// +/// ```ignore +/// kernel::impl_device_context_into_aref!(Device); +/// ``` +/// +/// Bus devices should also implement the following [`AsRef`] implementation, such that users can +/// easily derive a generic [`Device`] reference. +/// +/// ```ignore +/// impl AsRef> for Device { +/// fn as_ref(&self) -> &device::Device { +/// ... +/// } +/// } +/// ``` +/// +/// # Implementing Class Devices +/// +/// Class device implementations require less infrastructure and depend slightly more on the +/// specific subsystem. +/// +/// An example implementation for a class device could look like this. +/// +/// ```ignore +/// #[repr(C)] +/// pub struct Device { +/// dev: Opaque, +/// data: T::Data, +/// } +/// ``` +/// +/// This class device uses the sub-classing pattern to embed the driver's private data within the +/// allocation of the class device. For this to be possible the class device is generic over the +/// class specific `Driver` trait implementation. +/// +/// Just like any device, class devices are reference counted and should hence implement +/// [`AlwaysRefCounted`] for `Device`. +/// +/// Class devices should also implement the following [`AsRef`] implementation, such that users can +/// easily derive a generic [`Device`] reference. +/// +/// ```ignore +/// impl AsRef for Device { +/// fn as_ref(&self) -> &device::Device { +/// ... +/// } +/// } +/// ``` +/// +/// An example for a class device implementation is [`drm::Device`]. /// /// # Invariants /// @@ -42,6 +149,12 @@ /// /// `bindings::device::release` is valid to be called from any thread, hence `ARef` can be /// dropped from any thread. +/// +/// [`AlwaysRefCounted`]: kernel::types::AlwaysRefCounted +/// [`drm::Device`]: kernel::drm::Device +/// [`impl_device_context_deref`]: kernel::impl_device_context_deref +/// [`pci::Device`]: kernel::pci::Device +/// [`platform::Device`]: kernel::platform::Device #[repr(transparent)] pub struct Device(Opaque, PhantomData); From 970a7c68788e3fec237713eef22ace46507bcf9c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 22 Jul 2025 17:00:01 +0200 Subject: [PATCH 0217/1307] driver: rust: expand documentation for driver infrastructure Add documentation about generic driver infrastructure, representing a guideline on how the generic driver infrastructure is intended to be used to implement bus specific driver APIs. This covers aspects such as the bus specific driver trait, adapter implementation, driver registration and custom device ID types. Reviewed-by: Daniel Almeida Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alexandre Courbot Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250722150110.23565-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/driver.rs | 89 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs index a8f2675ba7a7..279e3af20682 100644 --- a/rust/kernel/driver.rs +++ b/rust/kernel/driver.rs @@ -2,8 +2,93 @@ //! Generic support for drivers of different buses (e.g., PCI, Platform, Amba, etc.). //! -//! Each bus / subsystem is expected to implement [`RegistrationOps`], which allows drivers to -//! register using the [`Registration`] class. +//! This documentation describes how to implement a bus specific driver API and how to align it with +//! the design of (bus specific) devices. +//! +//! Note: Readers are expected to know the content of the documentation of [`Device`] and +//! [`DeviceContext`]. +//! +//! # Driver Trait +//! +//! The main driver interface is defined by a bus specific driver trait. For instance: +//! +//! ```ignore +//! pub trait Driver: Send { +//! /// The type holding information about each device ID supported by the driver. +//! type IdInfo: 'static; +//! +//! /// The table of OF device ids supported by the driver. +//! const OF_ID_TABLE: Option> = None; +//! +//! /// The table of ACPI device ids supported by the driver. +//! const ACPI_ID_TABLE: Option> = None; +//! +//! /// Driver probe. +//! fn probe(dev: &Device, id_info: &Self::IdInfo) -> Result>>; +//! +//! /// Driver unbind (optional). +//! fn unbind(dev: &Device, this: Pin<&Self>) { +//! let _ = (dev, this); +//! } +//! } +//! ``` +//! +//! For specific examples see [`auxiliary::Driver`], [`pci::Driver`] and [`platform::Driver`]. +//! +//! The `probe()` callback should return a `Result>>`, i.e. the driver's private +//! data. The bus abstraction should store the pointer in the corresponding bus device. The generic +//! [`Device`] infrastructure provides common helpers for this purpose on its +//! [`Device`] implementation. +//! +//! All driver callbacks should provide a reference to the driver's private data. Once the driver +//! is unbound from the device, the bus abstraction should take back the ownership of the driver's +//! private data from the corresponding [`Device`] and [`drop`] it. +//! +//! All driver callbacks should provide a [`Device`] reference (see also [`device::Core`]). +//! +//! # Adapter +//! +//! The adapter implementation of a bus represents the abstraction layer between the C bus +//! callbacks and the Rust bus callbacks. It therefore has to be generic over an implementation of +//! the [driver trait](#driver-trait). +//! +//! ```ignore +//! pub struct Adapter; +//! ``` +//! +//! There's a common [`Adapter`] trait that can be implemented to inherit common driver +//! infrastructure, such as finding the ID info from an [`of::IdTable`] or [`acpi::IdTable`]. +//! +//! # Driver Registration +//! +//! In order to register C driver types (such as `struct platform_driver`) the [adapter](#adapter) +//! should implement the [`RegistrationOps`] trait. +//! +//! This trait implementation can be used to create the actual registration with the common +//! [`Registration`] type. +//! +//! Typically, bus abstractions want to provide a bus specific `module_bus_driver!` macro, which +//! creates a kernel module with exactly one [`Registration`] for the bus specific adapter. +//! +//! The generic driver infrastructure provides a helper for this with the [`module_driver`] macro. +//! +//! # Device IDs +//! +//! Besides the common device ID types, such as [`of::DeviceId`] and [`acpi::DeviceId`], most buses +//! may need to implement their own device ID types. +//! +//! For this purpose the generic infrastructure in [`device_id`] should be used. +//! +//! [`auxiliary::Driver`]: kernel::auxiliary::Driver +//! [`Core`]: device::Core +//! [`Device`]: device::Device +//! [`Device`]: device::Device +//! [`Device`]: device::Device +//! [`DeviceContext`]: device::DeviceContext +//! [`device_id`]: kernel::device_id +//! [`module_driver`]: kernel::module_driver +//! [`pci::Driver`]: kernel::pci::Driver +//! [`platform::Driver`]: kernel::platform::Driver use crate::error::{Error, Result}; use crate::{acpi, device, of, str::CStr, try_pin_init, types::Opaque, ThisModule}; From d405ec23df13e6df599f5bd965a55d13420366b8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 Aug 2025 14:57:06 +0200 Subject: [PATCH 0218/1307] ACPI: processor: perflib: Move problematic pr->performance check Commit d33bd88ac0eb ("ACPI: processor: perflib: Fix initial _PPC limit application") added a pr->performance check that prevents the frequency QoS request from being added when the given processor has no performance object. Unfortunately, this causes a WARN() in freq_qos_remove_request() to trigger on an attempt to take the given CPU offline later because the frequency QoS object has not been added for it due to the missing performance object. Address this by moving the pr->performance check before calling acpi_processor_get_platform_limit() so it only prevents a limit from being set for the CPU if the performance object is not present. This way, the frequency QoS request is added as it was before the above commit and it is present all the time along with the CPU's cpufreq policy regardless of whether or not the CPU is online. Fixes: d33bd88ac0eb ("ACPI: processor: perflib: Fix initial _PPC limit application") Tested-by: Rafael J. Wysocki Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2801421.mvXUDI8C0e@rafael.j.wysocki --- drivers/acpi/processor_perflib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 755003bf3a45..8972446b7162 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -180,7 +180,7 @@ void acpi_processor_ppc_init(struct cpufreq_policy *policy) struct acpi_processor *pr = per_cpu(processors, cpu); int ret; - if (!pr || !pr->performance) + if (!pr) continue; /* @@ -197,6 +197,9 @@ void acpi_processor_ppc_init(struct cpufreq_policy *policy) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); + if (!pr->performance) + continue; + ret = acpi_processor_get_platform_limit(pr); if (ret) pr_err("Failed to update freq constraint for CPU%d (%d)\n", From 56bdf7270ff4f870e2d4bfacdc00161e766dba2d Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 11 Aug 2025 13:50:44 -0400 Subject: [PATCH 0219/1307] Revert "gpio: mlxbf3: only get IRQ for device instance 0" This reverts commit 10af0273a35ab4513ca1546644b8c853044da134. While this change was merged, it is not the preferred solution. During review of a similar change to the gpio-mlxbf2 driver, the use of "platform_get_irq_optional" was identified as the preferred solution, so let's use it for gpio-mlxbf3 driver as well. Cc: stable@vger.kernel.org Fixes: 10af0273a35a ("gpio: mlxbf3: only get IRQ for device instance 0") Signed-off-by: David Thompson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/8d2b630c71b3742f2c74242cf7d602706a6108e6.1754928650.git.davthompson@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf3.c | 52 +++++++++++++------------------------- 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 9875e34bde72..10ea71273c89 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -190,9 +190,7 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) struct mlxbf3_gpio_context *gs; struct gpio_irq_chip *girq; struct gpio_chip *gc; - char *colon_ptr; int ret, irq; - long num; gs = devm_kzalloc(dev, sizeof(*gs), GFP_KERNEL); if (!gs) @@ -229,39 +227,25 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges; - colon_ptr = strchr(dev_name(dev), ':'); - if (!colon_ptr) { - dev_err(dev, "invalid device name format\n"); - return -EINVAL; - } + irq = platform_get_irq(pdev, 0); + if (irq >= 0) { + girq = &gs->gc.irq; + gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); + girq->default_type = IRQ_TYPE_NONE; + /* This will let us handle the parent IRQ in the driver */ + girq->num_parents = 0; + girq->parents = NULL; + girq->parent_handler = NULL; + girq->handler = handle_bad_irq; - ret = kstrtol(++colon_ptr, 16, &num); - if (ret) { - dev_err(dev, "invalid device instance\n"); - return ret; - } - - if (!num) { - irq = platform_get_irq(pdev, 0); - if (irq >= 0) { - girq = &gs->gc.irq; - gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); - girq->default_type = IRQ_TYPE_NONE; - /* This will let us handle the parent IRQ in the driver */ - girq->num_parents = 0; - girq->parents = NULL; - girq->parent_handler = NULL; - girq->handler = handle_bad_irq; - - /* - * Directly request the irq here instead of passing - * a flow-handler because the irq is shared. - */ - ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, - IRQF_SHARED, dev_name(dev), gs); - if (ret) - return dev_err_probe(dev, ret, "failed to request IRQ"); - } + /* + * Directly request the irq here instead of passing + * a flow-handler because the irq is shared. + */ + ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, + IRQF_SHARED, dev_name(dev), gs); + if (ret) + return dev_err_probe(dev, ret, "failed to request IRQ"); } platform_set_drvdata(pdev, gs); From 810bd9066fb1871b8a9528f31f2fdbf2a8b73bf2 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 11 Aug 2025 13:50:45 -0400 Subject: [PATCH 0220/1307] gpio: mlxbf3: use platform_get_irq_optional() The gpio-mlxbf3 driver interfaces with two GPIO controllers, device instance 0 and 1. There is a single IRQ resource shared between the two controllers, and it is found in the ACPI table for device instance 0. The driver should not use platform_get_irq(), otherwise this error is logged when probing instance 1: mlxbf3_gpio MLNXBF33:01: error -ENXIO: IRQ index 0 not found Cc: stable@vger.kernel.org Fixes: cd33f216d241 ("gpio: mlxbf3: Add gpio driver support") Signed-off-by: David Thompson Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/ce70b98a201ce82b9df9aa80ac7a5eeaa2268e52.1754928650.git.davthompson@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 10ea71273c89..ed29b07d16c1 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -227,7 +227,7 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges; - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq >= 0) { girq = &gs->gc.irq; gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); From 5cfdfc623835d40664f642b75a56d9934f24c5ff Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 6 Aug 2025 15:01:37 -0500 Subject: [PATCH 0221/1307] dt-bindings: phy: marvell,comphy-cp110: Fix clock and child node constraints In converting marvell,comphy-cp110 to schema, the constraints for clocks on marvell,comphy-a3700 are wrong, the maximum number of child nodes are wrong, and the phy nodes may have a 'connector' child node: phy@18300 (marvell,comphy-a3700): clock-names: False schema does not allow ['xtal'] phy@120000 (marvell,comphy-cp110): 'phy@3', 'phy@4', 'phy@5' do not match any of the regexes: '^phy@[0-2]$', '^pinctrl-[0-9]+$' phy@120000 (marvell,comphy-cp110): phy@2: 'connector' does not match any of the regexes: '^pinctrl-[0-9]+$' Fixes: 50355ac70d4f ("dt-bindings: phy: Convert marvell,comphy-cp110 to DT schema") Signed-off-by: Rob Herring (Arm) Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20250806200138.1366189-1-robh@kernel.org Signed-off-by: Vinod Koul --- .../bindings/phy/marvell,comphy-cp110.yaml | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml b/Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml index d9501df42886..c35d31642805 100644 --- a/Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml +++ b/Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml @@ -47,21 +47,19 @@ properties: const: 0 clocks: + minItems: 1 maxItems: 3 - description: Reference clocks for CP110; MG clock, MG Core clock, AXI clock clock-names: - items: - - const: mg_clk - - const: mg_core_clk - - const: axi_clk + minItems: 1 + maxItems: 3 marvell,system-controller: description: Phandle to the Marvell system controller (CP110 only) $ref: /schemas/types.yaml#/definitions/phandle patternProperties: - '^phy@[0-2]$': + '^phy@[0-5]$': description: A COMPHY lane child node type: object additionalProperties: false @@ -69,10 +67,14 @@ patternProperties: properties: reg: description: COMPHY lane number + maximum: 5 '#phy-cells': const: 1 + connector: + type: object + required: - reg - '#phy-cells' @@ -91,13 +93,24 @@ allOf: then: properties: - clocks: false - clock-names: false + clocks: + maxItems: 1 + clock-names: + const: xtal required: - reg-names else: + properties: + clocks: + minItems: 3 + clock-names: + items: + - const: mg_clk + - const: mg_core_clk + - const: axi_clk + required: - marvell,system-controller From bca065733afd1e3a89a02f05ffe14e966cd5f78e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 24 Jul 2025 15:12:04 +0200 Subject: [PATCH 0222/1307] phy: tegra: xusb: fix device and OF node leak at probe Make sure to drop the references taken to the PMC OF node and device by of_parse_phandle() and of_find_device_by_node() during probe. Note the holding a reference to the PMC device does not prevent the PMC regmap from going away (e.g. if the PMC driver is unbound) so there is no need to keep the reference. Fixes: 2d1021487273 ("phy: tegra: xusb: Add wake/sleepwalk for Tegra210") Cc: stable@vger.kernel.org # 5.14 Cc: JC Kuo Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250724131206.2211-2-johan@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb-tegra210.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/phy/tegra/xusb-tegra210.c b/drivers/phy/tegra/xusb-tegra210.c index ebc8a7e21a31..3409924498e9 100644 --- a/drivers/phy/tegra/xusb-tegra210.c +++ b/drivers/phy/tegra/xusb-tegra210.c @@ -3164,18 +3164,22 @@ tegra210_xusb_padctl_probe(struct device *dev, } pdev = of_find_device_by_node(np); + of_node_put(np); if (!pdev) { dev_warn(dev, "PMC device is not available\n"); goto out; } - if (!platform_get_drvdata(pdev)) + if (!platform_get_drvdata(pdev)) { + put_device(&pdev->dev); return ERR_PTR(-EPROBE_DEFER); + } padctl->regmap = dev_get_regmap(&pdev->dev, "usb_sleepwalk"); if (!padctl->regmap) dev_info(dev, "failed to find PMC regmap\n"); + put_device(&pdev->dev); out: return &padctl->base; } From 64961557efa1b98f375c0579779e7eeda1a02c42 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 24 Jul 2025 15:12:05 +0200 Subject: [PATCH 0223/1307] phy: ti: omap-usb2: fix device leak at unbind Make sure to drop the reference to the control device taken by of_find_device_by_node() during probe when the driver is unbound. Fixes: 478b6c7436c2 ("usb: phy: omap-usb2: Don't use omap_get_control_dev()") Cc: stable@vger.kernel.org # 3.13 Cc: Roger Quadros Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250724131206.2211-3-johan@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-omap-usb2.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index c1a0ef979142..c444bb2530ca 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -363,6 +363,13 @@ static void omap_usb2_init_errata(struct omap_usb *phy) phy->flags |= OMAP_USB2_DISABLE_CHRG_DET; } +static void omap_usb2_put_device(void *_dev) +{ + struct device *dev = _dev; + + put_device(dev); +} + static int omap_usb2_probe(struct platform_device *pdev) { struct omap_usb *phy; @@ -373,6 +380,7 @@ static int omap_usb2_probe(struct platform_device *pdev) struct device_node *control_node; struct platform_device *control_pdev; const struct usb_phy_data *phy_data; + int ret; phy_data = device_get_match_data(&pdev->dev); if (!phy_data) @@ -423,6 +431,11 @@ static int omap_usb2_probe(struct platform_device *pdev) return -EINVAL; } phy->control_dev = &control_pdev->dev; + + ret = devm_add_action_or_reset(&pdev->dev, omap_usb2_put_device, + phy->control_dev); + if (ret) + return ret; } else { if (of_property_read_u32_index(node, "syscon-phy-power", 1, From e19bcea99749ce8e8f1d359f68ae03210694ad56 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 24 Jul 2025 15:12:06 +0200 Subject: [PATCH 0224/1307] phy: ti-pipe3: fix device leak at unbind Make sure to drop the reference to the control device taken by of_find_device_by_node() during probe when the driver is unbound. Fixes: 918ee0d21ba4 ("usb: phy: omap-usb3: Don't use omap_get_control_dev()") Cc: stable@vger.kernel.org # 3.13 Cc: Roger Quadros Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250724131206.2211-4-johan@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-ti-pipe3.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/phy/ti/phy-ti-pipe3.c b/drivers/phy/ti/phy-ti-pipe3.c index da2cbacb982c..ae764d6524c9 100644 --- a/drivers/phy/ti/phy-ti-pipe3.c +++ b/drivers/phy/ti/phy-ti-pipe3.c @@ -667,12 +667,20 @@ static int ti_pipe3_get_clk(struct ti_pipe3 *phy) return 0; } +static void ti_pipe3_put_device(void *_dev) +{ + struct device *dev = _dev; + + put_device(dev); +} + static int ti_pipe3_get_sysctrl(struct ti_pipe3 *phy) { struct device *dev = phy->dev; struct device_node *node = dev->of_node; struct device_node *control_node; struct platform_device *control_pdev; + int ret; phy->phy_power_syscon = syscon_regmap_lookup_by_phandle(node, "syscon-phy-power"); @@ -704,6 +712,11 @@ static int ti_pipe3_get_sysctrl(struct ti_pipe3 *phy) } phy->control_dev = &control_pdev->dev; + + ret = devm_add_action_or_reset(dev, ti_pipe3_put_device, + phy->control_dev); + if (ret) + return ret; } if (phy->mode == PIPE3_MODE_PCIE) { From aac1256a41cfbbaca12d6c0a5753d1e3b8d2d8bf Mon Sep 17 00:00:00 2001 From: Ziyue Zhang Date: Fri, 25 Jul 2025 18:22:29 +0800 Subject: [PATCH 0225/1307] dt-bindings: phy: qcom,sc8280xp-qmp-pcie-phy: Update pcie phy bindings The gcc_aux_clk is required by the PCIe controller but not by the PCIe PHY. In PCIe PHY, the source of aux_clk used in low-power mode should be gcc_phy_aux_clk. Hence, remove gcc_aux_clk and replace it with gcc_phy_aux_clk. Fixes: fd2d4e4c1986 ("dt-bindings: phy: qcom,qmp: Add sa8775p QMP PCIe PHY") Signed-off-by: Ziyue Zhang Acked-by: Rob Herring (Arm) Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20250725102231.3608298-2-ziyue.zhang@oss.qualcomm.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml index a1ae8c7988c8..b6f140bf5b3b 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml @@ -176,6 +176,8 @@ allOf: compatible: contains: enum: + - qcom,sa8775p-qmp-gen4x2-pcie-phy + - qcom,sa8775p-qmp-gen4x4-pcie-phy - qcom,sc8280xp-qmp-gen3x1-pcie-phy - qcom,sc8280xp-qmp-gen3x2-pcie-phy - qcom,sc8280xp-qmp-gen3x4-pcie-phy @@ -197,8 +199,6 @@ allOf: contains: enum: - qcom,qcs8300-qmp-gen4x2-pcie-phy - - qcom,sa8775p-qmp-gen4x2-pcie-phy - - qcom,sa8775p-qmp-gen4x4-pcie-phy then: properties: clocks: From 3f69f2e78799bf76e5dfe74f2eda4d67812d4edc Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 11 Aug 2025 07:41:44 +0200 Subject: [PATCH 0226/1307] PCI: xilinx: Fix NULL pointer dereference in xilinx_pcie_intr_handler() f29861aa301c5 ("PCI: xilinx: Switch to msi_create_parent_irq_domain()") changed xilinx_pcie::msi_domain from child devices' interrupt domain to Xilinx AXI bridge's interrupt domain. However, xilinx_pcie_intr_handler() wasn't changed and still reads Xilinx AXI bridge's interrupt domain from xilinx_pcie::msi_domain->parent. This pointer is NULL now. Update xilinx_pcie_intr_handler() to read the correct interrupt domain pointer. Fixes: f29861aa301c5 ("PCI: xilinx: Switch to msi_create_parent_irq_domain()") Signed-off-by: Nam Cao Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250811054144.4049448-1-namcao@linutronix.de --- drivers/pci/controller/pcie-xilinx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c index f121836c3cf4..937ea6ae1ac4 100644 --- a/drivers/pci/controller/pcie-xilinx.c +++ b/drivers/pci/controller/pcie-xilinx.c @@ -400,7 +400,7 @@ static irqreturn_t xilinx_pcie_intr_handler(int irq, void *data) if (val & XILINX_PCIE_RPIFR1_MSI_INTR) { val = pcie_read(pcie, XILINX_PCIE_REG_RPIFR2) & XILINX_PCIE_RPIFR2_MSG_DATA; - domain = pcie->msi_domain->parent; + domain = pcie->msi_domain; } else { val = (val & XILINX_PCIE_RPIFR1_INTR_MASK) >> XILINX_PCIE_RPIFR1_INTR_SHIFT; From 9d7a1cbebbb691891671def57407ba2f8ee914e8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:09 +0100 Subject: [PATCH 0227/1307] drm/xe/migrate: prevent infinite recursion If the buf + offset is not aligned to XE_CAHELINE_BYTES we fallback to using a bounce buffer. However the bounce buffer here is allocated on the stack, and the only alignment requirement here is that it's naturally aligned to u8, and not XE_CACHELINE_BYTES. If the bounce buffer is also misaligned we then recurse back into the function again, however the new bounce buffer might also not be aligned, and might never be until we eventually blow through the stack, as we keep recursing. Instead of using the stack use kmalloc, which should respect the power-of-two alignment request here. Fixes a kernel panic when triggering this path through eudebug. v2 (Stuart): - Add build bug check for power-of-two restriction - s/EINVAL/ENOMEM/ Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Stuart Summers Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-6-matthew.auld@intel.com (cherry picked from commit 38b34e928a08ba594c4bbf7118aa3aadacd62fff) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index ba1cff2e4cda..6193e2ca3741 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1820,15 +1820,19 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; + void *bounce; + int err; + + BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES)); + bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL); + if (!bounce) + return -ENOMEM; /* * Less than ideal for large unaligned access but this should be * fairly rare, can fixup if this becomes common. */ do { - u8 bounce[XE_CACHELINE_BYTES]; - void *ptr = (void *)bounce; - int err; int copy_bytes = min_t(int, bytes_left, XE_CACHELINE_BYTES - (offset & XE_CACHELINE_MASK)); @@ -1837,22 +1841,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, 0); if (err) - return err; + break; if (write) { - memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes); err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), write); + bounce, + XE_CACHELINE_BYTES, write); if (err) - return err; + break; } else { - memcpy(buf + buf_offset, ptr + ptr_offset, + memcpy(buf + buf_offset, bounce + ptr_offset, copy_bytes); } @@ -1861,7 +1865,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, offset += copy_bytes; } while (bytes_left); - return 0; + kfree(bounce); + return err; } dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); From 4126cb327a2e3273c81fcef1c594c5b7b645c44c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:10 +0100 Subject: [PATCH 0228/1307] drm/xe/migrate: don't overflow max copy size With non-page aligned copy, we need to use 4 byte aligned pitch, however the size itself might still be close to our maximum of ~8M, and so the dimensions of the copy can easily exceed the S16_MAX limit of the copy command leading to the following assert: xe 0000:03:00.0: [drm] Assertion `size / pitch <= ((s16)(((u16)~0U) >> 1))` failed! platform: BATTLEMAGE subplatform: 1 graphics: Xe2_HPG 20.01 step A0 media: Xe2_HPM 13.01 step A1 tile: 0 VRAM 10.0 GiB GT: 0 type 1 WARNING: CPU: 23 PID: 10605 at drivers/gpu/drm/xe/xe_migrate.c:673 emit_copy+0x4b5/0x4e0 [xe] To fix this account for the pitch when calculating the number of current bytes to copy. Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-7-matthew.auld@intel.com (cherry picked from commit 8c2d61e0e916e077fda7e7b8e67f25ffe0f361fc) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6193e2ca3741..95bcaa427d26 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1887,6 +1887,12 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, else current_bytes = min_t(int, bytes_left, cursor.size); + if (current_bytes & ~PAGE_MASK) { + int pitch = 4; + + current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + } + if (fence) dma_fence_put(fence); From 145832fbdd17b1d77ffd6cdd1642259e101d1b7e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 31 Jul 2025 10:38:11 +0100 Subject: [PATCH 0229/1307] drm/xe/migrate: prevent potential UAF If we hit the error path, the previous fence (if there is one) has already been put() prior to this, so doing a fence_wait could lead to UAF. Tweak the flow to do to the put() until after we do the wait. Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Maciej Patelczyk Cc: Matthew Brost Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250731093807.207572-8-matthew.auld@intel.com (cherry picked from commit 9b7ca35ed28fe5fad86e9d9c24ebd1271e4c9c3e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 95bcaa427d26..7d20ac4bb633 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1893,9 +1893,6 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, current_bytes = min_t(int, current_bytes, S16_MAX * pitch); } - if (fence) - dma_fence_put(fence); - __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, dma_addr + current_page, @@ -1903,11 +1900,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); if (IS_ERR(__fence)) { - if (fence) + if (fence) { dma_fence_wait(fence, false); + dma_fence_put(fence); + } fence = __fence; goto out_err; } + + dma_fence_put(fence); fence = __fence; buf += current_bytes; From 2dd7a47669ae6c1da18c55f8e89c4a44418c7006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 5 Aug 2025 09:48:42 +0200 Subject: [PATCH 0230/1307] drm/xe: Defer buffer object shrinker write-backs and GPU waits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the xe buffer-object shrinker allows GPU waits and write-back, (typically from kswapd), perform multiple passes, skipping subsequent passes if the shrinker number of scanned objects target is reached. 1) Without GPU waits and write-back 2) Without write-back 3) With both GPU-waits and write-back This is to avoid stalls and costly write- and readbacks unless they are really necessary. v2: - Don't test for scan completion twice. (Stuart Summers) - Update tags. Reported-by: melvyn Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557 Cc: Summers Stuart Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos") Cc: # v6.15+ Signed-off-by: Thomas Hellström Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250805074842.11359-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 80944d334182ce5eb27d00e2bf20a88bfc32dea1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 1c3c04d52f55..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); } -static s64 xe_shrinker_walk(struct xe_device *xe, - struct ttm_operation_ctx *ctx, - const struct xe_bo_shrink_flags flags, - unsigned long to_scan, unsigned long *scanned) +static s64 __xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) { unsigned int mem_type; s64 freed = 0, lret; @@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe, return freed; } +/* + * Try shrinking idle objects without writeback first, then if not sufficient, + * try also non-idle objects and finally if that's not sufficient either, + * add writeback. This avoids stalls and explicit writebacks with light or + * moderate memory pressure. + */ +static s64 xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) +{ + bool no_wait_gpu = true; + struct xe_bo_shrink_flags save_flags = flags; + s64 lret, freed; + + swap(no_wait_gpu, ctx->no_wait_gpu); + save_flags.writeback = false; + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + swap(no_wait_gpu, ctx->no_wait_gpu); + if (lret < 0 || *scanned >= to_scan) + return lret; + + freed = lret; + if (!ctx->no_wait_gpu) { + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + if (*scanned >= to_scan) + return freed; + } + + if (flags.writeback) { + lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + } + + return freed; +} + static unsigned long xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { @@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup); shrink_flags.purge = false; + lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0) From 55d49f06162e45686399df4ae6292167f0deab7c Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Sat, 9 Aug 2025 00:23:10 +0530 Subject: [PATCH 0231/1307] drm/xe/hwmon: Add SW clamp for power limits writes Clamp writes to power limits powerX_crit/currX_crit, powerX_cap, powerX_max, to the maximum supported by the pcode mailbox when sysfs-provided values exceed this limit. Although the pcode already performs clamping, values beyond the pcode mailbox's supported range get truncated, leading to incorrect critical power settings. This patch ensures proper clamping to prevent such truncation. v2: - Address below review comments. (Riana) - Split comments into multiple sentences. - Use local variables for readability. - Add a debug log. - Use u64 instead of unsigned long. v3: - Change drm_dbg logs to drm_info. (Badal) v4: - Rephrase the drm_info log. (Rodrigo, Riana) - Rename variable max_mbx_power_limit to max_supp_power_limit, as limit is same for platforms with and without mailbox power limit support. Signed-off-by: Karthik Poosa Fixes: 92d44a422d0d ("drm/xe/hwmon: Expose card reactive critical power") Fixes: fb1b70607f73 ("drm/xe/hwmon: Expose power attributes") Reviewed-by: Riana Tauro Link: https://lore.kernel.org/r/20250808185310.3466529-1-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit d301eb950da59f962bafe874cf5eb6d61a85b2c2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index f08fc4377d25..c17ed1ae8649 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -332,6 +332,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe int ret = 0; u32 reg_val, max; struct xe_reg rapl_limit; + u64 max_supp_power_limit = 0; mutex_lock(&hwmon->hwmon_lock); @@ -356,6 +357,20 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe goto unlock; } + /* + * If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to + * the supported maximum (U12.3 format). + * This is to avoid truncation during reg_val calculation below and ensure the valid + * power limit is sent for pcode which would clamp it to card-supported value. + */ + max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER; + if (value > max_supp_power_limit) { + value = max_supp_power_limit; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected %s exceeds channel %d limit\n", + PWR_ATTR_TO_STR(attr), channel); + } + /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); @@ -739,9 +754,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, { int ret; u32 uval; + u64 max_crit_power_curr = 0; mutex_lock(&hwmon->hwmon_lock); + /* + * If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1 + * max supported value, clamp it to the command's max (U10.6 format). + * This is to avoid truncation during uval calculation below and ensure the valid power + * limit is sent for pcode which would clamp it to card-supported value. + */ + max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor; + if (value > max_crit_power_curr) { + value = max_crit_power_curr; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected exceeds channel %d limit\n", + channel); + } uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); ret = xe_hwmon_pcode_write_i1(hwmon, uval); From 963e22c084c2b6097e1e635d29c6336881f67708 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 29 Jul 2025 08:20:38 +0200 Subject: [PATCH 0232/1307] ACPI: EC: Relax sanity check of the ECDT ID string It turns out that the ECDT table inside the ThinkBook 14 G7 IML contains a valid EC description but an invalid ID string ("_SB.PC00.LPCB.EC0"). Ignoring this ECDT based on the invalid ID string prevents the kernel from detecting the built-in touchpad, so relax the sanity check of the ID string and only reject ECDTs with empty ID strings. Reported-by: Ilya K Fixes: 7a0d59f6a913 ("ACPI: EC: Ignore ECDT tables with an invalid ID string") Signed-off-by: Armin Wolf Tested-by: Ilya K Link: https://patch.msgid.link/20250729062038.303734-1-W_Armin@gmx.de Cc: 6.16+ # 6.16+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 75c7db8b156a..7855bbf752b1 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2033,7 +2033,7 @@ void __init acpi_ec_ecdt_probe(void) goto out; } - if (!strstarts(ecdt_ptr->id, "\\")) { + if (!strlen(ecdt_ptr->id)) { /* * The ECDT table on some MSI notebooks contains invalid data, together * with an empty ID string (""). @@ -2042,9 +2042,13 @@ void __init acpi_ec_ecdt_probe(void) * a "fully qualified reference to the (...) embedded controller device", * so this string always has to start with a backslash. * - * By verifying this we can avoid such faulty ECDT tables in a safe way. + * However some ThinkBook machines have a ECDT table with a valid EC + * description but an invalid ID string ("_SB.PC00.LPCB.EC0"). + * + * Because of this we only check if the ID string is empty in order to + * avoid the obvious cases. */ - pr_err(FW_BUG "Ignoring ECDT due to invalid ID string \"%s\"\n", ecdt_ptr->id); + pr_err(FW_BUG "Ignoring ECDT due to empty ID string\n"); goto out; } From 5149bbb56bdcf5c5f72904025fbb502217580b63 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 11 Aug 2025 07:39:35 +0200 Subject: [PATCH 0233/1307] PCI: vmd: Remove MSI-X check on child devices d7d8ab87e3e7 ("PCI: vmd: Switch to msi_create_parent_irq_domain()") added a WARN_ON sanity check that child devices support MSI-X, because VMD document says [1]: Intel VMD only supports MSIx Interrupts from child devices and therefore the BIOS must enable PCIe Hot Plug and MSIx interrups [sic]. However, the VMD device can't even tell the difference between a child device using MSI and one using MSI-X. Per 185a383ada2e ("x86/PCI: Add driver for Intel Volume Management Device (VMD)"), VMD does not support INTx interrupts, but does support child devices using either MSI or MSI-X. Remove the sanity check to avoid the WARN_ON and allow child devices to use MSI, reported by Ammar. Fixes: d7d8ab87e3e7 ("PCI: vmd: Switch to msi_create_parent_irq_domain()") Link: https://cdrdv2-public.intel.com/776857/VMD_White_Paper.pdf [1] Reported-by: Ammar Faizi Closes: https://lore.kernel.org/linux-pci/aJXYhfc%2F6DfcqfqF@linux.gnuweeb.org/ Signed-off-by: Nam Cao [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Tested-by: Ammar Faizi Link: https://patch.msgid.link/20250811053935.4049211-1-namcao@linutronix.de --- drivers/pci/controller/vmd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index b679c7f28f51..1bd5bf4a6097 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -306,9 +306,6 @@ static bool vmd_init_dev_msi_info(struct device *dev, struct irq_domain *domain, struct irq_domain *real_parent, struct msi_domain_info *info) { - if (WARN_ON_ONCE(info->bus_token != DOMAIN_BUS_PCI_DEVICE_MSIX)) - return false; - if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) return false; From abbf9a44944171ca99c150adad9361a2f517d3b6 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 27 Jul 2025 11:23:17 +0200 Subject: [PATCH 0234/1307] rust: workaround `rustdoc` target modifiers bug Starting with Rust 1.88.0 (released 2025-06-26), `rustdoc` complains about a target modifier mismatch in configurations where `-Zfixed-x18` is passed: error: mixing `-Zfixed-x18` will cause an ABI mismatch in crate `rust_out` | = help: the `-Zfixed-x18` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely = note: unset `-Zfixed-x18` in this crate is incompatible with `-Zfixed-x18=` in dependency `core` = help: set `-Zfixed-x18=` in this crate or unset `-Zfixed-x18` in `core` = help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=fixed-x18` to silence this error The reason is that `rustdoc` was not passing the target modifiers when configuring the session options, and thus it would report a mismatch that did not exist as soon as a target modifier is used in a dependency. We did not notice it in the kernel until now because `-Zfixed-x18` has been a target modifier only since 1.88.0 (and it is the only one we use so far). The issue has been reported upstream [1] and a fix has been submitted [2], including a test similar to the kernel case. [ This is now fixed upstream (thanks Guillaume for the quick review), so it will be fixed in Rust 1.90.0 (expected 2025-09-18). - Miguel ] Meanwhile, conditionally pass `-Cunsafe-allow-abi-mismatch=fixed-x18` to workaround the issue on our side. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Reported-by: Konrad Dybcio Closes: https://lore.kernel.org/rust-for-linux/36cdc798-524f-4910-8b77-d7b9fac08d77@oss.qualcomm.com/ Link: https://github.com/rust-lang/rust/issues/144521 [1] Link: https://github.com/rust-lang/rust/pull/144523 [2] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250727092317.2930617-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rust/Makefile b/rust/Makefile index 4263462b8470..d47f82588d78 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -65,6 +65,10 @@ core-cfgs = \ core-edition := $(if $(call rustc-min-version,108700),2024,2021) +# `rustdoc` did not save the target modifiers, thus workaround for +# the time being (https://github.com/rust-lang/rust/issues/144521). +rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18) + # `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only # since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust # 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both @@ -77,6 +81,7 @@ quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< -Zunstable-options --generate-link-to-definition \ --output $(rustdoc_output) \ --crate-name $(subst rustdoc-,,$@) \ + $(rustdoc_modifiers_workaround) \ $(if $(rustdoc_host),,--sysroot=/dev/null) \ @$(objtree)/include/generated/rustc_cfg $< @@ -215,6 +220,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< --extern bindings --extern uapi \ --no-run --crate-name kernel -Zunstable-options \ --sysroot=/dev/null \ + $(rustdoc_modifiers_workaround) \ --test-builder $(objtree)/scripts/rustdoc_test_builder \ $< $(rustdoc_test_kernel_quiet); \ $(objtree)/scripts/rustdoc_test_gen From 252fea131e15aba2cd487119d1a8f546471199e2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 26 Jul 2025 15:34:35 +0200 Subject: [PATCH 0235/1307] rust: kbuild: clean output before running `rustdoc` `rustdoc` can get confused when generating documentation into a folder that contains generated files from other `rustdoc` versions. For instance, running something like: rustup default 1.78.0 make LLVM=1 rustdoc rustup default 1.88.0 make LLVM=1 rustdoc may generate errors like: error: couldn't generate documentation: invalid template: last line expected to start with a comment | = note: failed to create or modify "./Documentation/output/rust/rustdoc/src-files.js" Thus just always clean the output folder before generating the documentation -- we are anyway regenerating it every time the `rustdoc` target gets called, at least for the time being. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Reported-by: Daniel Almeida Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/288089/topic/x/near/527201113 Reviewed-by: Tamir Duberstein Link: https://lore.kernel.org/r/20250726133435.2460085-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index d47f82588d78..bfa915b0e588 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -111,14 +111,14 @@ rustdoc: rustdoc-core rustdoc-macros rustdoc-compiler_builtins \ rustdoc-macros: private rustdoc_host = yes rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ --extern proc_macro -rustdoc-macros: $(src)/macros/lib.rs FORCE +rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean FORCE +$(call if_changed,rustdoc) # Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should # not be needed -- see https://github.com/rust-lang/rust/pull/128307. rustdoc-core: private skip_flags = --edition=2021 -Wrustdoc::unescaped_backticks rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs) -rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs FORCE +rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs rustdoc-clean FORCE +$(call if_changed,rustdoc) rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE @@ -130,7 +130,8 @@ rustdoc-ffi: $(src)/ffi.rs rustdoc-core FORCE rustdoc-pin_init_internal: private rustdoc_host = yes rustdoc-pin_init_internal: private rustc_target_flags = --cfg kernel \ --extern proc_macro --crate-type proc-macro -rustdoc-pin_init_internal: $(src)/pin-init/internal/src/lib.rs FORCE +rustdoc-pin_init_internal: $(src)/pin-init/internal/src/lib.rs \ + rustdoc-clean FORCE +$(call if_changed,rustdoc) rustdoc-pin_init: private rustdoc_host = yes @@ -148,6 +149,9 @@ rustdoc-kernel: $(src)/kernel/lib.rs rustdoc-core rustdoc-ffi rustdoc-macros \ $(obj)/bindings.o FORCE +$(call if_changed,rustdoc) +rustdoc-clean: FORCE + $(Q)rm -rf $(rustdoc_output) + quiet_cmd_rustc_test_library = $(RUSTC_OR_CLIPPY_QUIET) TL $< cmd_rustc_test_library = \ OBJTREE=$(abspath $(objtree)) \ From 41b70df5b38bc80967d2e0ed55cc3c3896bba781 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Aug 2025 08:30:11 -0600 Subject: [PATCH 0236/1307] io_uring/net: commit partial buffers on retry Ring provided buffers are potentially only valid within the single execution context in which they were acquired. io_uring deals with this and invalidates them on retry. But on the networking side, if MSG_WAITALL is set, or if the socket is of the streaming type and too little was processed, then it will hang on to the buffer rather than recycle or commit it. This is problematic for two reasons: 1) If someone unregisters the provided buffer ring before a later retry, then the req->buf_list will no longer be valid. 2) If multiple sockers are using the same buffer group, then multiple receives can consume the same memory. This can cause data corruption in the application, as either receive could land in the same userspace buffer. Fix this by disallowing partial retries from pinning a provided buffer across multiple executions, if ring provided buffers are used. Cc: stable@vger.kernel.org Reported-by: pt x Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring") Signed-off-by: Jens Axboe --- io_uring/net.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index dd96e355982f..d69f2afa4f7a 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -494,6 +494,15 @@ static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret) return nbufs; } +static int io_net_kbuf_recyle(struct io_kiocb *req, + struct io_async_msghdr *kmsg, int len) +{ + req->flags |= REQ_F_BL_NO_RECYCLE; + if (req->flags & REQ_F_BUFFERS_COMMIT) + io_kbuf_commit(req, req->buf_list, len, io_bundle_nbufs(kmsg, len)); + return IOU_RETRY; +} + static inline bool io_send_finish(struct io_kiocb *req, int *ret, struct io_async_msghdr *kmsg, unsigned issue_flags) @@ -562,8 +571,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) kmsg->msg.msg_controllen = 0; kmsg->msg.msg_control = NULL; sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -674,8 +682,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) sr->len -= ret; sr->buf += ret; sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1071,8 +1078,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) } if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return IOU_RETRY; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1218,8 +1224,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) sr->len -= ret; sr->buf += ret; sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1500,8 +1505,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) zc->len -= ret; zc->buf += ret; zc->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; @@ -1571,8 +1575,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; - req->flags |= REQ_F_BL_NO_RECYCLE; - return -EAGAIN; + return io_net_kbuf_recyle(req, kmsg, ret); } if (ret == -ERESTARTSYS) ret = -EINTR; From e67b8afcb6d86f1bc6a69e4e52cf9dcfe636f995 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Tue, 5 Aug 2025 22:30:54 +0800 Subject: [PATCH 0237/1307] drm/amdgpu: Add PSP fw version check for fw reserve GFX command The fw reserved GFX command is only supported starting from PSP fw version 0x3a0e14 and 0x3b0e0d. Older versions do not support this command. Add a version guard to ensure the command is only used when the running PSP fw meets the minimum version requirement. This ensures backward compatibility and safe operation across fw revisions. Fixes: a3b7f9c306e1 ("drm/amdgpu: reclaim psp fw reservation memory region") Signed-off-by: Frank Min Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 065e23170a1e09bc9104b761183e59562a029619) --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0bd51a04be79..23484317a5fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1039,15 +1039,28 @@ int psp_update_fw_reservation(struct psp_context *psp) { int ret; uint64_t reserv_addr, reserv_addr_ext; - uint32_t reserv_size, reserv_size_ext; + uint32_t reserv_size, reserv_size_ext, mp0_ip_ver; struct amdgpu_device *adev = psp->adev; + mp0_ip_ver = amdgpu_ip_version(adev, MP0_HWIP, 0); + if (amdgpu_sriov_vf(psp->adev)) return 0; - if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(14, 0, 2)) && - (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(14, 0, 3))) + switch (mp0_ip_ver) { + case IP_VERSION(14, 0, 2): + if (adev->psp.sos.fw_version < 0x3b0e0d) + return 0; + break; + + case IP_VERSION(14, 0, 3): + if (adev->psp.sos.fw_version < 0x3a0e14) + return 0; + break; + + default: return 0; + } ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_ADDR, &reserv_addr, &reserv_size); if (ret) From 10ef476aad1c848449934e7bec2ab2374333c7b6 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 12 Aug 2025 09:17:58 +0800 Subject: [PATCH 0238/1307] drm/amdgpu: fix vram reservation issue The vram block allocation flag must be cleared before making vram reservation, otherwise reserving addresses within the currently freed memory range will always fail. Fixes: c9cad937c0c5 ("drm/amdgpu: add drm buddy support to amdgpu") Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit d38eaf27de1b8584f42d6fb3f717b7ec44b3a7a1) --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 07c936e90d8e..78f9e86ccc09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -648,9 +648,8 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, list_for_each_entry(block, &vres->blocks, link) vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks, vres->flags); + amdgpu_vram_mgr_do_reserve(man); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); From 040bc6d0e0e9c814c9c663f6f1544ebaff6824a8 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Mon, 11 Aug 2025 15:20:55 +0800 Subject: [PATCH 0239/1307] drm/amdgpu: fix incorrect vm flags to map bo It should use vm flags instead of pte flags to specify bo vm attributes. Fixes: 7946340fa389 ("drm/amdgpu: Move csa related code to separate file") Signed-off-by: Jack Xiao Reviewed-by: Likun Gao Signed-off-by: Alex Deucher (cherry picked from commit b08425fa77ad2f305fe57a33dceb456be03b653f) --- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 02138aa55793..dfb6cfd83760 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -88,8 +88,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); From 0ebbab41fba1bae6ccd96c0eec17026700ac6534 Mon Sep 17 00:00:00 2001 From: Sergio Perez Gonzalez Date: Mon, 28 Jul 2025 20:00:49 -0600 Subject: [PATCH 0240/1307] ASoC: stm: stm32_i2s: Fix calc_clk_div() error handling in determine_rate() calc_clk_div() will only return a non-zero value (-EINVAL) in case of error. On the other hand, req->rate is an unsigned long. It seems quite odd that req->rate would be assigned a negative value, which is clearly not a rate, and success would be returned. Reinstate previous logic, which would just return error. Fixes: afd529d74002 ("ASoC: stm: stm32_i2s: convert from round_rate() to determine_rate()") Link: https://scan7.scan.coverity.com/#/project-view/53936/11354?selectedIssue=1647702 Signed-off-by: Sergio Perez Gonzalez Link: https://patch.msgid.link/20250729020052.404617-1-sperezglz@gmail.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_i2s.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index 0e489097d9c1..6ca21780f21d 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -469,11 +469,8 @@ static int stm32_i2smclk_determine_rate(struct clk_hw *hw, int ret; ret = stm32_i2s_calc_clk_div(i2s, req->best_parent_rate, req->rate); - if (ret) { - req->rate = ret; - - return 0; - } + if (ret) + return ret; mclk->freq = req->best_parent_rate / i2s->divider; From aa5fc4362fac9351557eb27c745579159a2e4520 Mon Sep 17 00:00:00 2001 From: Liu01 Tong Date: Mon, 11 Aug 2025 14:52:37 +0800 Subject: [PATCH 0241/1307] drm/amdgpu: fix task hang from failed job submission during process kill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During process kill, drm_sched_entity_flush() will kill the vm entities. The following job submissions of this process will fail, and the resources of these jobs have not been released, nor have the fences been signalled, causing tasks to hang and timeout. Fix by check entity status in amdgpu_vm_ready() and avoid submit jobs to stopped entity. v2: add amdgpu_vm_ready() check before amdgpu_vm_clear_freed() in function amdgpu_cs_vm_handling(). Fixes: 1f02f2044bda ("drm/amdgpu: Avoid extra evict-restore process.") Signed-off-by: Liu01 Tong Signed-off-by: Lin.Cao Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit f101c13a8720c73e67f8f9d511fbbeda95bcedb1) --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a2adaacf6adb..d3f220be2ef9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1139,6 +1139,9 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } } + if (!amdgpu_vm_ready(vm)) + return -EINVAL; + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5cacf5717016..0b87798daebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -654,11 +654,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, * Check if all VM PDs/PTs are ready for updates * * Returns: - * True if VM is not evicting. + * True if VM is not evicting and all VM entities are not stopped */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - bool empty; bool ret; amdgpu_vm_eviction_lock(vm); @@ -666,10 +665,18 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) amdgpu_vm_eviction_unlock(vm); spin_lock(&vm->status_lock); - empty = list_empty(&vm->evicted); + ret &= list_empty(&vm->evicted); spin_unlock(&vm->status_lock); - return ret && empty; + spin_lock(&vm->immediate.lock); + ret &= !vm->immediate.stopped; + spin_unlock(&vm->immediate.lock); + + spin_lock(&vm->delayed.lock); + ret &= !vm->delayed.stopped; + spin_unlock(&vm->delayed.lock); + + return ret; } /** From c5ec7f49b480db0dfc83f395755b1c2a7c979920 Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 4 Jul 2025 13:17:47 +0200 Subject: [PATCH 0242/1307] devlink: let driver opt out of automatic phys_port_name generation Currently when adding devlink port, phys_port_name is automatically generated within devlink port initialization flow. As a result adding devlink port support to driver may result in forced changes of interface names, which breaks already existing network configs. This is an expected behavior but in some scenarios it would not be preferable to provide such limitation for legacy driver not being able to keep 'pre-devlink' interface name. Add flag no_phys_port_name to devlink_port_attrs struct which indicates if devlink should not alter name of interface. Suggested-by: Jiri Pirko Link: https://lore.kernel.org/all/nbwrfnjhvrcduqzjl4a2jafnvvud6qsbxlvxaxilnryglf4j7r@btuqrimnfuly/ Signed-off-by: Jedrzej Jagielski Signed-off-by: Tony Nguyen --- include/net/devlink.h | 6 +++++- net/devlink/port.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 93640a29427c..b32c9ceeb81d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -78,6 +78,9 @@ struct devlink_port_pci_sf_attrs { * @flavour: flavour of the port * @split: indicates if this is split port * @splittable: indicates if the port can be split. + * @no_phys_port_name: skip automatic phys_port_name generation; for + * compatibility only, newly added driver/port instance + * should never set this. * @lanes: maximum number of lanes the port supports. 0 value is not passed to netlink. * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL * @phys: physical port attributes @@ -87,7 +90,8 @@ struct devlink_port_pci_sf_attrs { */ struct devlink_port_attrs { u8 split:1, - splittable:1; + splittable:1, + no_phys_port_name:1; u32 lanes; enum devlink_port_flavour flavour; struct netdev_phys_item_id switch_id; diff --git a/net/devlink/port.c b/net/devlink/port.c index 939081a0e615..cb8d4df61619 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -1519,7 +1519,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, struct devlink_port_attrs *attrs = &devlink_port->attrs; int n = 0; - if (!devlink_port->attrs_set) + if (!devlink_port->attrs_set || devlink_port->attrs.no_phys_port_name) return -EOPNOTSUPP; switch (attrs->flavour) { From e67a0bc3ed4fd8ee1697cb6d937e2b294ec13b5e Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 3 Jul 2025 12:41:15 +0200 Subject: [PATCH 0243/1307] ixgbe: prevent from unwanted interface name changes Users of the ixgbe driver report that after adding devlink support by the commit a0285236ab93 ("ixgbe: add initial devlink support") their configs got broken due to unwanted changes of interface names. It's caused by automatic phys_port_name generation during devlink port initialization flow. To prevent from that set no_phys_port_name flag for ixgbe devlink ports. Reported-by: David Howells Closes: https://lore.kernel.org/netdev/3452224.1745518016@warthog.procyon.org.uk/ Reported-by: David Kaplan Closes: https://lore.kernel.org/netdev/LV3PR12MB92658474624CCF60220157199470A@LV3PR12MB9265.namprd12.prod.outlook.com/ Fixes: a0285236ab93 ("ixgbe: add initial devlink support") Signed-off-by: Jedrzej Jagielski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/devlink/devlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c index 54f1b83dfe42..d227f4d2a2d1 100644 --- a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c @@ -543,6 +543,7 @@ int ixgbe_devlink_register_port(struct ixgbe_adapter *adapter) attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = adapter->hw.bus.func; + attrs.no_phys_port_name = 1; ixgbe_devlink_set_switch_id(adapter, &attrs.switch_id); devlink_port_attrs_set(devlink_port, &attrs); From 6563623e604e3e235b2cee71190a4972be8f986b Mon Sep 17 00:00:00 2001 From: Soham Metha Date: Wed, 13 Aug 2025 02:19:46 +0530 Subject: [PATCH 0244/1307] docs: cgroup: fixed spelling mistakes in documentation found/fixed the following typo - Availablity -> Availability in `Documentation/admin-guide/cgroup-v2.rst` Signed-off-by: Soham Metha Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index d9d3cc7df348..51c0bc4c2dc5 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -435,8 +435,8 @@ both cgroups. Controlling Controllers ----------------------- -Availablity -~~~~~~~~~~~ +Availability +~~~~~~~~~~~~ A controller is available in a cgroup when it is supported by the kernel (i.e., compiled in, not disabled and not attached to a v1 hierarchy) and listed in the From 2efe41234dbd0a83fdb7cd38226c2f70039a2cd3 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Mon, 28 Jul 2025 15:26:49 +0900 Subject: [PATCH 0245/1307] ptp: prevent possible ABBA deadlock in ptp_clock_freerun() syzbot reported the following ABBA deadlock: CPU0 CPU1 ---- ---- n_vclocks_store() lock(&ptp->n_vclocks_mux) [1] (physical clock) pc_clock_adjtime() lock(&clk->rwsem) [2] (physical clock) ... ptp_clock_freerun() ptp_vclock_in_use() lock(&ptp->n_vclocks_mux) [3] (physical clock) ptp_clock_unregister() posix_clock_unregister() lock(&clk->rwsem) [4] (virtual clock) Since ptp virtual clock is registered only under ptp physical clock, both ptp_clock and posix_clock must be physical clocks for ptp_vclock_in_use() to lock &ptp->n_vclocks_mux and check ptp->n_vclocks. However, when unregistering vclocks in n_vclocks_store(), the locking ptp->n_vclocks_mux is a physical clock lock, but clk->rwsem of ptp_clock_unregister() called through device_for_each_child_reverse() is a virtual clock lock. Therefore, clk->rwsem used in CPU0 and clk->rwsem used in CPU1 are different locks, but in lockdep, a false positive occurs because the possibility of deadlock is determined through lock-class. To solve this, lock subclass annotation must be added to the posix_clock rwsem of the vclock. Reported-by: syzbot+7cfb66a237c4a5fb22ad@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7cfb66a237c4a5fb22ad Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Jeongjun Park Acked-by: Richard Cochran Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250728062649.469882-1-aha310510@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_private.h | 5 +++++ drivers/ptp/ptp_vclock.c | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index a6aad743c282..b352df4cd3f9 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -24,6 +24,11 @@ #define PTP_DEFAULT_MAX_VCLOCKS 20 #define PTP_MAX_CHANNELS 2048 +enum { + PTP_LOCK_PHYSICAL = 0, + PTP_LOCK_VIRTUAL, +}; + struct timestamp_event_queue { struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS]; int head; diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index 2fdeedd60e21..64c950456517 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -154,6 +154,11 @@ static long ptp_vclock_refresh(struct ptp_clock_info *ptp) return PTP_VCLOCK_REFRESH_INTERVAL; } +static void ptp_vclock_set_subclass(struct ptp_clock *ptp) +{ + lockdep_set_subclass(&ptp->clock.rwsem, PTP_LOCK_VIRTUAL); +} + static const struct ptp_clock_info ptp_vclock_info = { .owner = THIS_MODULE, .name = "ptp virtual clock", @@ -213,6 +218,8 @@ struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock) return NULL; } + ptp_vclock_set_subclass(vclock->clock); + timecounter_init(&vclock->tc, &vclock->cc, 0); ptp_schedule_worker(vclock->clock, PTP_VCLOCK_REFRESH_INTERVAL); From 6a912e8aa2b2fba2519e93a2eac197d16f137c9a Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 3 Jul 2025 15:39:02 +0800 Subject: [PATCH 0246/1307] ext4: show the default enabled i_version option Display `i_version` in `/proc/fs/ext4/sdx/options`, even though it's default enabled. This aids users managing multi-version scenarios and simplifies debugging. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://patch.msgid.link/20250703073903.6952-1-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c7d39da7e733..9203518786e4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2975,6 +2975,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); + if (nodefs && sb->s_flags & SB_I_VERSION) + SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); if (nodefs || EXT4_MOUNT_DATA_FLAGS & From 6db015fc4b5d5f63a64a193f65d98da3a7fc811d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Aug 2025 16:29:06 -0700 Subject: [PATCH 0247/1307] tls: handle data disappearing from under the TLS ULP TLS expects that it owns the receive queue of the TCP socket. This cannot be guaranteed in case the reader of the TCP socket entered before the TLS ULP was installed, or uses some non-standard read API (eg. zerocopy ones). Replace the WARN_ON() and a buggy early exit (which leaves anchor pointing to a freed skb) with real error handling. Wipe the parsing state and tell the reader to retry. We already reload the anchor every time we (re)acquire the socket lock, so the only condition we need to avoid is an out of bounds read (not having enough bytes in the socket for previously parsed record len). If some data was read from under TLS but there's enough in the queue we'll reload and decrypt what is most likely not a valid TLS record. Leading to some undefined behavior from TLS perspective (corrupting a stream? missing an alert? missing an attack?) but no kernel crash should take place. Reported-by: William Liu Reported-by: Savino Dicanosa Link: https://lore.kernel.org/tFjq_kf7sWIG3A7CrCg_egb8CVsT_gsmHAK0_wxDPJXfIzxFAMxqmLwp3MlU5EHiet0AwwJldaaFdgyHpeIUCS-3m3llsmRzp9xIOBR4lAI=@syst3mfailure.io Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250807232907.600366-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls.h | 2 +- net/tls/tls_strp.c | 11 ++++++++--- net/tls/tls_sw.c | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/tls/tls.h b/net/tls/tls.h index 774859b63f0d..4e077068e6d9 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -196,7 +196,7 @@ void tls_strp_msg_done(struct tls_strparser *strp); int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); void tls_rx_msg_ready(struct tls_strparser *strp); -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); int tls_strp_msg_cow(struct tls_sw_context_rx *ctx); struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx); int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 095cf31bae0b..d71643b494a1 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -475,7 +475,7 @@ static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) strp->stm.offset = offset; } -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) { struct strp_msg *rxm; struct tls_msg *tlm; @@ -484,8 +484,11 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len); if (!strp->copy_mode && force_refresh) { - if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len)) - return; + if (unlikely(tcp_inq(strp->sk) < strp->stm.full_len)) { + WRITE_ONCE(strp->msg_ready, 0); + memset(&strp->stm, 0, sizeof(strp->stm)); + return false; + } tls_strp_load_anchor_with_queue(strp, strp->stm.full_len); } @@ -495,6 +498,8 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) rxm->offset = strp->stm.offset; tlm = tls_msg(strp->anchor); tlm->control = strp->mark; + + return true; } /* Called with lock held on lower socket */ diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 549d1ea01a72..51c98a007dda 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1384,7 +1384,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); } - tls_strp_msg_load(&ctx->strp, released); + if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) + return tls_rx_rec_wait(sk, psock, nonblock, false); return 1; } From d7e82594a45c5cb270940ac469846e8026c7db0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Aug 2025 16:29:07 -0700 Subject: [PATCH 0248/1307] selftests: tls: test TCP stealing data from under the TLS socket Check a race where data disappears from the TCP socket after TLS signaled that its ready to receive. ok 6 global.data_steal # RUN tls_basic.base_base ... # OK tls_basic.base_base Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250807232907.600366-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 63 +++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5ded3b3a7538..d8cfcf9bb825 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2708,6 +2708,69 @@ TEST(prequeue) { close(cfd); } +TEST(data_steal) { + struct tls_crypto_info_keys tls; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + int pid, status; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + + /* Spawn a child and get it into the read wait path of the underlying + * TCP socket. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (!pid) { + EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), + sizeof(buf)); + exit(!__test_passed(_metadata)); + } + + usleep(2000); + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); + + EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); + usleep(2000); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); + /* Don't check errno, the error will be different depending + * on what random bytes TLS interpreted as the record length. + */ + + close(fd); + close(cfd); + + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); +} + static void __attribute__((constructor)) fips_check(void) { int res; FILE *f; From f2326fd14a224e4cccbab89e14c52279ff79b7ec Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 3 Jul 2025 15:39:03 +0800 Subject: [PATCH 0249/1307] ext4: preserve SB_I_VERSION on remount IMA testing revealed that after an ext4 remount, file accesses triggered full measurements even without modifications, instead of skipping as expected when i_version is unchanged. Debugging showed `SB_I_VERSION` was cleared in reconfigure_super() during remount due to commit 1ff20307393e ("ext4: unconditionally enable the i_version counter") removing the fix from commit 960e0ab63b2e ("ext4: fix i_version handling on remount"). To rectify this, `SB_I_VERSION` is always set for `fc->sb_flags` in ext4_init_fs_context(), instead of `sb->s_flags` in __ext4_fill_super(), ensuring it persists across all mounts. Cc: stable@kernel.org Fixes: 1ff20307393e ("ext4: unconditionally enable the i_version counter") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://patch.msgid.link/20250703073903.6952-2-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9203518786e4..ed1b36bd51c8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1998,6 +1998,9 @@ int ext4_init_fs_context(struct fs_context *fc) fc->fs_private = ctx; fc->ops = &ext4_context_ops; + /* i_version is always enabled now */ + fc->sb_flags |= SB_I_VERSION; + return 0; } @@ -5316,9 +5319,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); - /* i_version is always enabled now */ - sb->s_flags |= SB_I_VERSION; - /* HSM events are allowed by default. */ sb->s_iflags |= SB_I_ALLOW_HSM; From b4cc4a4077268522e3d0d34de4b2dc144e2330fa Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 16 Jul 2025 19:36:42 -0600 Subject: [PATCH 0250/1307] ext4: check fast symlink for ea_inode correctly The check for a fast symlink in the presence of only an external xattr inode is incorrect. If a fast symlink does not have an xattr block (i_file_acl == 0), but does have an external xattr inode that increases inode i_blocks, then the check for a fast symlink will incorrectly fail and __ext4_iget()->ext4_ind_check_inode() will report the inode is corrupt when it "validates" i_data[] on the next read: # ln -s foo /mnt/tmp/bar # setfattr -h -n trusted.test \ -v "$(yes | head -n 4000)" /mnt/tmp/bar # umount /mnt/tmp # mount /mnt/tmp # ls -l /mnt/tmp ls: cannot access '/mnt/tmp/bar': Structure needs cleaning total 4 ? l?????????? ? ? ? ? ? bar # dmesg | tail -1 EXT4-fs error (device dm-8): __ext4_iget:5098: inode #24578: block 7303014: comm ls: invalid block (note that "block 7303014" = 0x6f6f66 = "foo" in LE order). ext4_inode_is_fast_symlink() should check the superblock EXT4_FEATURE_INCOMPAT_EA_INODE feature flag, not the inode EXT4_EA_INODE_FL, since the latter is only set on the xattr inode itself, and not on the inode that uses this xattr. Cc: stable@vger.kernel.org Fixes: fc82228a5e38 ("ext4: support fast symlinks from ext3 file systems") Signed-off-by: Andreas Dilger Reviewed-by: Li Dongyang Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin Reviewed-on: https://review.whamcloud.com/59879 Lustre-bug-id: https://jira.whamcloud.com/browse/LU-19121 Link: https://patch.msgid.link/20250717063709.757077-1-adilger@dilger.ca Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index eeccb6f588f9..731a800d9c14 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -146,7 +146,7 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, */ int ext4_inode_is_fast_symlink(struct inode *inode) { - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + if (!ext4_has_feature_ea_inode(inode->i_sb)) { int ea_blocks = EXT4_I(inode)->i_file_acl ? EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; From f3fbaa74d999c16b5caeca779e6d7e6e6e7feed8 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 21 Jul 2025 22:09:02 +0200 Subject: [PATCH 0251/1307] ext4: remove useless if check This if branch is only jumping to 'out' which is defined just after the branch itself. Hence this is if-check is a no-op and can be removed. Address-Coverity-ID: 1647981 ("Incorrect expression (IDENTICAL_BRANCHES)") Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20250721200902.1071-1-antonio@mandelbit.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d83f91b62317..01f379f5fd04 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2984,8 +2984,6 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir, return PTR_ERR(dir_block); de = (struct ext4_dir_entry_2 *)dir_block->b_data; err = ext4_init_dirblock(handle, inode, dir_block, dir->i_ino, NULL, 0); - if (err) - goto out; out: brelse(dir_block); return err; From 59d8731c887bf2f5bb8406ace26cbb6f6b36d6cc Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 31 Jul 2025 16:00:07 -0400 Subject: [PATCH 0252/1307] ext4: fix unused variable warning in ext4_init_new_dir Cc: stable@kernel.org Fixes: 90f097b1403f ("ext4: refactor the inline directory conversion and...") Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 01f379f5fd04..2cd36f59c9e3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2965,7 +2965,6 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir, struct inode *inode) { struct buffer_head *dir_block = NULL; - struct ext4_dir_entry_2 *de; ext4_lblk_t block = 0; int err; @@ -2982,7 +2981,6 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir, dir_block = ext4_append(handle, inode, &block); if (IS_ERR(dir_block)) return PTR_ERR(dir_block); - de = (struct ext4_dir_entry_2 *)dir_block->b_data; err = ext4_init_dirblock(handle, inode, dir_block, dir->i_ino, NULL, 0); out: brelse(dir_block); From 4ba97589ed19210ff808929052696f5636139823 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Sun, 3 Aug 2025 18:22:41 +0800 Subject: [PATCH 0253/1307] ext4: remove redundant __GFP_NOWARN GFP_NOWAIT already includes __GFP_NOWARN, so let's remove the redundant __GFP_NOWARN. Signed-off-by: Qianfeng Rong Link: https://patch.msgid.link/20250803102243.623705-4-rongqianfeng@vivo.com Signed-off-by: Theodore Ts'o --- fs/ext4/page-io.c | 2 +- fs/ext4/super.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 3d8b0f6d2dea..39abfeec5f36 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -547,7 +547,7 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, * first page of the bio. Otherwise it can deadlock. */ if (io->io_bio) - gfp_flags = GFP_NOWAIT | __GFP_NOWARN; + gfp_flags = GFP_NOWAIT; retry_encrypt: bounce_page = fscrypt_encrypt_pagecache_blocks(folio, enc_bytes, 0, gfp_flags); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ed1b36bd51c8..b16ffa507b84 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -268,7 +268,7 @@ struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) { struct buffer_head *bh = bdev_getblk(sb->s_bdev, block, - sb->s_blocksize, GFP_NOWAIT | __GFP_NOWARN); + sb->s_blocksize, GFP_NOWAIT); if (likely(bh)) { if (trylock_buffer(bh)) From bae76c035bf0852844151e68098c9b7cd63ef238 Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Tue, 5 Aug 2025 14:00:30 +0530 Subject: [PATCH 0254/1307] ext4: fix fsmap end of range reporting with bigalloc With bigalloc enabled, the logic to report last extent has a bug since we try to use cluster units instead of block units. This can cause an issue where extra incorrect entries might be returned back to the user. This was flagged by generic/365 with 64k bs and -O bigalloc. ** Details of issue ** The issue was noticed on 5G 64k blocksize FS with -O bigalloc which has only 1 bg. $ xfs_io -c "fsmap -d" /mnt/scratch 0: 253:48 [0..127]: static fs metadata 128 /* sb */ 1: 253:48 [128..255]: special 102:1 128 /* gdt */ 3: 253:48 [256..383]: special 102:3 128 /* block bitmap */ 4: 253:48 [384..2303]: unknown 1920 /* flex bg empty space */ 5: 253:48 [2304..2431]: special 102:4 128 /* inode bitmap */ 6: 253:48 [2432..4351]: unknown 1920 /* flex bg empty space */ 7: 253:48 [4352..6911]: inodes 2560 8: 253:48 [6912..538623]: unknown 531712 9: 253:48 [538624..10485759]: free space 9947136 The issue can be seen with: $ xfs_io -c "fsmap -d 0 3" /mnt/scratch 0: 253:48 [0..127]: static fs metadata 128 1: 253:48 [384..2047]: unknown 1664 Only the first entry was expected to be returned but we get 2. This is because: ext4_getfsmap_datadev() first_cluster, last_cluster = 0 ... info->gfi_last = true; ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster + 1, 0, info); fsb = C2B(1) = 16 fslen = 0 ... /* Merge in any relevant extents from the meta_list */ list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) { ... // since fsb = 16, considers all metadata which starts before 16 blockno iter 1: error = ext4_getfsmap_helper(sb, info, p); // p = sb (0,1), nop info->gfi_next_fsblk = 1 iter 2: error = ext4_getfsmap_helper(sb, info, p); // p = gdt (1,2), nop info->gfi_next_fsblk = 2 iter 3: error = ext4_getfsmap_helper(sb, info, p); // p = blk bitmap (2,3), nop info->gfi_next_fsblk = 3 iter 4: error = ext4_getfsmap_helper(sb, info, p); // p = ino bitmap (18,19) if (rec_blk > info->gfi_next_fsblk) { // (18 > 3) // emits an extra entry ** BUG ** } } Fix this by directly calling ext4_getfsmap_datadev() with a dummy record that has fmr_physical set to (end_fsb + 1) instead of last_cluster + 1. By using the block instead of cluster we get the correct behavior. Replacing ext4_getfsmap_datadev_helper() with ext4_getfsmap_helper() is okay since the gfi_lastfree and metadata checks in ext4_getfsmap_datadev_helper() are anyways redundant when we only want to emit the last allocated block of the range, as we have already taken care of emitting metadata and any last free blocks. Cc: stable@kernel.org Reported-by: Disha Goel Fixes: 4a622e4d477b ("ext4: fix FS_IOC_GETFSMAP handling") Signed-off-by: Ojaswin Mujoo Reviewed-by: Darrick J. Wong Link: https://patch.msgid.link/e7472c8535c9c5ec10f425f495366864ea12c9da.1754377641.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/fsmap.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 383c6edea6dd..9d63c39f6077 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -526,6 +526,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb, ext4_group_t end_ag; ext4_grpblk_t first_cluster; ext4_grpblk_t last_cluster; + struct ext4_fsmap irec; int error = 0; bofs = le32_to_cpu(sbi->s_es->s_first_data_block); @@ -609,10 +610,18 @@ static int ext4_getfsmap_datadev(struct super_block *sb, goto err; } - /* Report any gaps at the end of the bg */ + /* + * The dummy record below will cause ext4_getfsmap_helper() to report + * any allocated blocks at the end of the range. + */ + irec.fmr_device = 0; + irec.fmr_physical = end_fsb + 1; + irec.fmr_length = 0; + irec.fmr_owner = EXT4_FMR_OWN_FREE; + irec.fmr_flags = 0; + info->gfi_last = true; - error = ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster + 1, - 0, info); + error = ext4_getfsmap_helper(sb, info, &irec); if (error) goto err; From 3ffbdd1f1165f1b2d6a94d1b1aabef57120deaf7 Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Tue, 5 Aug 2025 14:00:31 +0530 Subject: [PATCH 0255/1307] ext4: fix reserved gdt blocks handling in fsmap In some cases like small FSes with no meta_bg and where the resize doesn't need extra gdt blocks as it can fit in the current one, s_reserved_gdt_blocks is set as 0, which causes fsmap to emit a 0 length entry, which is incorrect. $ mkfs.ext4 -b 65536 -O bigalloc /dev/sda 5G $ mount /dev/sda /mnt/scratch $ xfs_io -c "fsmap -d" /mnt/scartch 0: 253:48 [0..127]: static fs metadata 128 1: 253:48 [128..255]: special 102:1 128 2: 253:48 [256..255]: special 102:2 0 <---- 0 len entry 3: 253:48 [256..383]: special 102:3 128 Fix this by adding a check for this case. Cc: stable@kernel.org Fixes: 0c9ec4beecac ("ext4: support GETFSMAP ioctls") Signed-off-by: Ojaswin Mujoo Reviewed-by: Darrick J. Wong Link: https://patch.msgid.link/08781b796453a5770112aa96ad14c864fbf31935.1754377641.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/fsmap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 9d63c39f6077..91185c40f755 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -393,6 +393,14 @@ static unsigned int ext4_getfsmap_find_sb(struct super_block *sb, /* Reserved GDT blocks */ if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) { len = le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + + /* + * mkfs.ext4 can set s_reserved_gdt_blocks as 0 in some cases, + * check for that. + */ + if (!len) + return 0; + error = ext4_getfsmap_fill(meta_list, fsb, len, EXT4_FMR_OWN_RESV_GDT); if (error) From c5e104a91e7b6fa12c1dc2d8bf84abb7ef9b89ad Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 7 Aug 2025 09:35:20 -0400 Subject: [PATCH 0256/1307] ext4: don't try to clear the orphan_present feature block device is r/o When the file system is frozen in preparation for taking an LVM snapshot, the journal is checkpointed and if the orphan_file feature is enabled, and the orphan file is empty, we clear the orphan_present feature flag. But if there are pending inodes that need to be removed the orphan_present feature flag can't be cleared. The problem comes if the block device is read-only. In that case, we can't process the orphan inode list, so it is skipped in ext4_orphan_cleanup(). But then in ext4_mark_recovery_complete(), this results in the ext4 error "Orphan file not empty on read-only fs" firing and the file system mount is aborted. Fix this by clearing the needs_recovery flag in the block device is read-only. We do this after the call to ext4_load_and_init-journal() since there are some error checks need to be done in case the journal needs to be replayed and the block device is read-only, or if the block device containing the externa journal is read-only, etc. Cc: stable@kernel.org Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1108271 Cc: stable@vger.kernel.org Fixes: 02f310fcf47f ("ext4: Speedup ext4 orphan inode handling") Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b16ffa507b84..699c15db28a8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5416,6 +5416,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) err = ext4_load_and_init_journal(sb, es, ctx); if (err) goto failed_mount3a; + if (bdev_read_only(sb->s_bdev)) + needs_recovery = 0; } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " From 02c7f7219ac0e2277b3379a3a0e9841ef464b6d4 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Mon, 11 Aug 2025 14:45:32 +0800 Subject: [PATCH 0257/1307] ext4: fix hole length calculation overflow in non-extent inodes In a filesystem with a block size larger than 4KB, the hole length calculation for a non-extent inode in ext4_ind_map_blocks() can easily exceed INT_MAX. Then it could return a zero length hole and trigger the following waring and infinite in the iomap infrastructure. ------------[ cut here ]------------ WARNING: CPU: 3 PID: 434101 at fs/iomap/iter.c:34 iomap_iter_done+0x148/0x190 CPU: 3 UID: 0 PID: 434101 Comm: fsstress Not tainted 6.16.0-rc7+ #128 PREEMPT(voluntary) Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : iomap_iter_done+0x148/0x190 lr : iomap_iter+0x174/0x230 sp : ffff8000880af740 x29: ffff8000880af740 x28: ffff0000db8e6840 x27: 0000000000000000 x26: 0000000000000000 x25: ffff8000880af830 x24: 0000004000000000 x23: 0000000000000002 x22: 000001bfdbfa8000 x21: ffffa6a41c002e48 x20: 0000000000000001 x19: ffff8000880af808 x18: 0000000000000000 x17: 0000000000000000 x16: ffffa6a495ee6cd0 x15: 0000000000000000 x14: 00000000000003d4 x13: 00000000fa83b2da x12: 0000b236fc95f18c x11: ffffa6a4978b9c08 x10: 0000000000001da0 x9 : ffffa6a41c1a2a44 x8 : ffff8000880af5c8 x7 : 0000000001000000 x6 : 0000000000000000 x5 : 0000000000000004 x4 : 000001bfdbfa8000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000004004030000 x0 : 0000000000000000 Call trace: iomap_iter_done+0x148/0x190 (P) iomap_iter+0x174/0x230 iomap_fiemap+0x154/0x1d8 ext4_fiemap+0x110/0x140 [ext4] do_vfs_ioctl+0x4b8/0xbc0 __arm64_sys_ioctl+0x8c/0x120 invoke_syscall+0x6c/0x100 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x38/0x120 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x198/0x1a0 ---[ end trace 0000000000000000 ]--- Cc: stable@kernel.org Fixes: facab4d9711e ("ext4: return hole from ext4_map_blocks()") Reported-by: Qu Wenruo Closes: https://lore.kernel.org/linux-ext4/9b650a52-9672-4604-a765-bb6be55d1e4a@gmx.com/ Tested-by: Qu Wenruo Signed-off-by: Zhang Yi Link: https://patch.msgid.link/20250811064532.1788289-1-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/indirect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 7de327fa7b1c..d45124318200 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -539,7 +539,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, int indirect_blks; int blocks_to_boundary = 0; int depth; - int count = 0; + u64 count = 0; ext4_fsblk_t first_block = 0; trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); @@ -588,7 +588,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, count++; /* Fill in size of a hole we found */ map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, count); + map->m_len = umin(map->m_len, count); goto cleanup; } From 76dba1fe277f6befd6ef650e1946f626c547387a Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Mon, 11 Aug 2025 20:58:16 +0800 Subject: [PATCH 0258/1307] ext4: use kmalloc_array() for array space allocation Replace kmalloc(size * sizeof) with kmalloc_array() for safer memory allocation and overflow prevention. Cc: stable@kernel.org Signed-off-by: Liao Yuanhong Link: https://patch.msgid.link/20250811125816.570142-1-liaoyuanhong@vivo.com Signed-off-by: Theodore Ts'o --- fs/ext4/orphan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 7c7f792ad6ab..524d4658fa40 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -589,8 +589,9 @@ int ext4_init_orphan_info(struct super_block *sb) } oi->of_blocks = inode->i_size >> sb->s_blocksize_bits; oi->of_csum_seed = EXT4_I(inode)->i_csum_seed; - oi->of_binfo = kmalloc(oi->of_blocks*sizeof(struct ext4_orphan_block), - GFP_KERNEL); + oi->of_binfo = kmalloc_array(oi->of_blocks, + sizeof(struct ext4_orphan_block), + GFP_KERNEL); if (!oi->of_binfo) { ret = -ENOMEM; goto out_put; From 30c1d25b9870d551be42535067d5481668b5e6f3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Aug 2025 12:26:10 +0200 Subject: [PATCH 0259/1307] netfilter: nft_set_pipapo: fix null deref for empty set Blamed commit broke the check for a null scratch map: - if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) + if (unlikely(!raw_cpu_ptr(m->scratch))) This should have been "if (!*raw_ ...)". Use the pattern of the avx2 version which is more readable. This can only be reproduced if avx2 support isn't available. Fixes: d8d871a35ca9 ("netfilter: nft_set_pipapo: merge pipapo_get/lookup") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 1a19649c2851..9a10251228fd 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -426,10 +426,9 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, local_bh_disable(); - if (unlikely(!raw_cpu_ptr(m->scratch))) - goto out; - scratch = *raw_cpu_ptr(m->scratch); + if (unlikely(!scratch)) + goto out; map_index = scratch->map_index; From c0a23bbc98e93704a1f4fb5e7e7bb2d7c0fb6eb3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Jul 2025 14:26:11 +0200 Subject: [PATCH 0260/1307] ipvs: Fix estimator kthreads preferred affinity The estimator kthreads' affinity are defined by sysctl overwritten preferences and applied through a plain call to the scheduler's affinity API. However since the introduction of managed kthreads preferred affinity, such a practice shortcuts the kthreads core code which eventually overwrites the target to the default unbound affinity. Fix this with using the appropriate kthread's API. Fixes: d1a89197589c ("kthread: Default affine kthread to its preferred NUMA node") Signed-off-by: Frederic Weisbecker Acked-by: Julian Anastasov Signed-off-by: Florian Westphal --- include/net/ip_vs.h | 13 +++++++++++++ kernel/kthread.c | 1 + net/netfilter/ipvs/ip_vs_est.c | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ff406ef4fd4a..29a36709e7f3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1163,6 +1163,14 @@ static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) return housekeeping_cpumask(HK_TYPE_KTHREAD); } +static inline const struct cpumask *sysctl_est_preferred_cpulist(struct netns_ipvs *ipvs) +{ + if (ipvs->est_cpulist_valid) + return ipvs->sysctl_est_cpulist; + else + return NULL; +} + static inline int sysctl_est_nice(struct netns_ipvs *ipvs) { return ipvs->sysctl_est_nice; @@ -1270,6 +1278,11 @@ static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) return housekeeping_cpumask(HK_TYPE_KTHREAD); } +static inline const struct cpumask *sysctl_est_preferred_cpulist(struct netns_ipvs *ipvs) +{ + return NULL; +} + static inline int sysctl_est_nice(struct netns_ipvs *ipvs) { return IPVS_EST_NICE; diff --git a/kernel/kthread.c b/kernel/kthread.c index 0e98b228a8ef..31b072e8d427 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -893,6 +893,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) return ret; } +EXPORT_SYMBOL_GPL(kthread_affine_preferred); /* * Re-affine kthreads according to their preferences diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index f821ad2e19b3..15049b826732 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -265,7 +265,8 @@ int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, } set_user_nice(kd->task, sysctl_est_nice(ipvs)); - set_cpus_allowed_ptr(kd->task, sysctl_est_cpulist(ipvs)); + if (sysctl_est_preferred_cpulist(ipvs)) + kthread_affine_preferred(kd->task, sysctl_est_preferred_cpulist(ipvs)); pr_info("starting estimator thread %d...\n", kd->id); wake_up_process(kd->task); From cf5fb87fcdaaaafec55dcc0dc5a9e15ead343973 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 13 Aug 2025 02:38:50 +0200 Subject: [PATCH 0261/1307] netfilter: nf_tables: reject duplicate device on updates A chain/flowtable update with duplicated devices in the same batch is possible. Unfortunately, netdev event path only removes the first device that is found, leaving unregistered the hook of the duplicated device. Check if a duplicated device exists in the transaction batch, bail out with EEXIST in such case. WARNING is hit when unregistering the hook: [49042.221275] WARNING: CPU: 4 PID: 8425 at net/netfilter/core.c:340 nf_hook_entry_head+0xaa/0x150 [49042.221375] CPU: 4 UID: 0 PID: 8425 Comm: nft Tainted: G S 6.16.0+ #170 PREEMPT(full) [...] [49042.221382] RIP: 0010:nf_hook_entry_head+0xaa/0x150 Fixes: 78d9f48f7f44 ("netfilter: nf_tables: add devices to existing flowtable") Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 13d0ed9d1895..58c5425d61c2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2803,6 +2803,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, struct nft_chain *chain = ctx->chain; struct nft_chain_hook hook = {}; struct nft_stats __percpu *stats = NULL; + struct nftables_pernet *nft_net; struct nft_hook *h, *next; struct nf_hook_ops *ops; struct nft_trans *trans; @@ -2845,6 +2846,20 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (nft_hook_list_find(&basechain->hook_list, h)) { list_del(&h->list); nft_netdev_hook_free(h); + continue; + } + + nft_net = nft_pernet(ctx->net); + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->msg_type != NFT_MSG_NEWCHAIN || + trans->table != ctx->table || + !nft_trans_chain_update(trans)) + continue; + + if (nft_hook_list_find(&nft_trans_chain_hooks(trans), h)) { + nft_chain_release_hook(&hook); + return -EEXIST; + } } } } else { @@ -9060,6 +9075,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; + struct nftables_pernet *nft_net; struct nft_hook *hook, *next; struct nf_hook_ops *ops; struct nft_trans *trans; @@ -9076,6 +9092,20 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, if (nft_hook_list_find(&flowtable->hook_list, hook)) { list_del(&hook->list); nft_netdev_hook_free(hook); + continue; + } + + nft_net = nft_pernet(ctx->net); + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->msg_type != NFT_MSG_NEWFLOWTABLE || + trans->table != ctx->table || + !nft_trans_flowtable_update(trans)) + continue; + + if (nft_hook_list_find(&nft_trans_flowtable_hooks(trans), hook)) { + err = -EEXIST; + goto err_flowtable_update_hook; + } } } From d045c3154080a04beb07726fa311b89d21608981 Mon Sep 17 00:00:00 2001 From: Pratyush Brahma Date: Wed, 13 Aug 2025 12:51:02 +0530 Subject: [PATCH 0262/1307] mm/numa_memblks: Use pr_debug instead of printk(KERN_DEBUG) Replace the direct usage of printk(KERN_DEBUG ...) with pr_debug(...) to align with the consistent `pr_*` API usage within the file. Reviewed-by: Joshua Hahn Signed-off-by: Pratyush Brahma Link: https://lore.kernel.org/r/20250813-numa-dbg-v3-1-1dcd1234fcc5@oss.qualcomm.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/numa_memblks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/numa_memblks.c b/mm/numa_memblks.c index 541a99c4071a..de626525a87c 100644 --- a/mm/numa_memblks.c +++ b/mm/numa_memblks.c @@ -76,7 +76,7 @@ static int __init numa_alloc_distance(void) for (j = 0; j < cnt; j++) numa_distance[i * cnt + j] = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); + pr_debug("NUMA: Initialized distance table, cnt=%d\n", cnt); return 0; } From 21924af67d69d7c9fdaf845be69043cfe75196a1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 5 Aug 2025 00:10:02 +0000 Subject: [PATCH 0263/1307] locking: Fix __clear_task_blocked_on() warning from __ww_mutex_wound() path The __clear_task_blocked_on() helper added a number of sanity checks ensuring we hold the mutex wait lock and that the task we are clearing blocked_on pointer (if set) matches the mutex. However, there is an edge case in the _ww_mutex_wound() logic where we need to clear the blocked_on pointer for the task that owns the mutex, not the task that is waiting on the mutex. For this case the sanity checks aren't valid, so handle this by allowing a NULL lock to skip the additional checks. K Prateek Nayak and Maarten Lankhorst also pointed out that in this case where we don't hold the owner's mutex wait_lock, we need to be a bit more careful using READ_ONCE/WRITE_ONCE in both the __clear_task_blocked_on() and __set_task_blocked_on() implementations to avoid accidentally tripping WARN_ONs if two instances race. So do that here as well. This issue was easier to miss, I realized, as the test-ww_mutex driver only exercises the wait-die class of ww_mutexes. I've sent a patch[1] to address this so the logic will be easier to test. [1]: https://lore.kernel.org/lkml/20250801023358.562525-2-jstultz@google.com/ Fixes: a4f0b6fef4b0 ("locking/mutex: Add p->blocked_on wrappers for correctness checks") Closes: https://lore.kernel.org/lkml/68894443.a00a0220.26d0e1.0015.GAE@google.com/ Reported-by: syzbot+602c4720aed62576cd79@syzkaller.appspotmail.com Reported-by: Maarten Lankhorst Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: K Prateek Nayak Acked-by: Maarten Lankhorst Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20250805001026.2247040-1-jstultz@google.com --- include/linux/sched.h | 29 +++++++++++++++++------------ kernel/locking/ww_mutex.h | 6 +++++- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 40d2fa90df42..62103dd6a48e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2144,6 +2144,8 @@ static inline struct mutex *__get_task_blocked_on(struct task_struct *p) static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + WARN_ON_ONCE(!m); /* The task should only be setting itself as blocked */ WARN_ON_ONCE(p != current); @@ -2154,8 +2156,8 @@ static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) * with a different mutex. Note, setting it to the same * lock repeatedly is ok. */ - WARN_ON_ONCE(p->blocked_on && p->blocked_on != m); - p->blocked_on = m; + WARN_ON_ONCE(blocked_on && blocked_on != m); + WRITE_ONCE(p->blocked_on, m); } static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) @@ -2166,16 +2168,19 @@ static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) { - WARN_ON_ONCE(!m); - /* Currently we serialize blocked_on under the mutex::wait_lock */ - lockdep_assert_held_once(&m->wait_lock); - /* - * There may be cases where we re-clear already cleared - * blocked_on relationships, but make sure we are not - * clearing the relationship with a different lock. - */ - WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m); - p->blocked_on = NULL; + if (m) { + struct mutex *blocked_on = READ_ONCE(p->blocked_on); + + /* Currently we serialize blocked_on under the mutex::wait_lock */ + lockdep_assert_held_once(&m->wait_lock); + /* + * There may be cases where we re-clear already cleared + * blocked_on relationships, but make sure we are not + * clearing the relationship with a different lock. + */ + WARN_ON_ONCE(blocked_on && blocked_on != m); + } + WRITE_ONCE(p->blocked_on, NULL); } static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 086fd5487ca7..31a785afee6c 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -342,8 +342,12 @@ static bool __ww_mutex_wound(struct MUTEX *lock, * When waking up the task to wound, be sure to clear the * blocked_on pointer. Otherwise we can see circular * blocked_on relationships that can't resolve. + * + * NOTE: We pass NULL here instead of lock, because we + * are waking the mutex owner, who may be currently + * blocked on a different mutex. */ - __clear_task_blocked_on(owner, lock); + __clear_task_blocked_on(owner, NULL); wake_q_add(wake_q, owner); } return true; From b56cc41a3ae7323aa3c6165f93c32e020538b6d2 Mon Sep 17 00:00:00 2001 From: Arnaud Lecomte Date: Sat, 26 Jul 2025 23:09:31 +0100 Subject: [PATCH 0264/1307] hid: fix I2C read buffer overflow in raw_event() for mcp2221 As reported by syzbot, mcp2221_raw_event lacked validation of incoming I2C read data sizes, risking buffer overflows in mcp->rxbuf during multi-part transfers. As highlighted in the DS20005565B spec, p44, we have: "The number of read-back data bytes to follow in this packet: from 0 to a maximum of 60 bytes of read-back bytes." This patch enforces we don't exceed this limit. Reported-by: syzbot+52c1a7d3e5b361ccd346@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=52c1a7d3e5b361ccd346 Tested-by: syzbot+52c1a7d3e5b361ccd346@syzkaller.appspotmail.com Signed-off-by: Arnaud Lecomte Link: https://patch.msgid.link/20250726220931.7126-1-contact@arnaud-lcm.com Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-mcp2221.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index fcfe9370a887..157c363ea9ba 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -906,6 +906,10 @@ static int mcp2221_raw_event(struct hid_device *hdev, } if (data[2] == MCP2221_I2C_READ_COMPL || data[2] == MCP2221_I2C_READ_PARTIAL) { + if (!mcp->rxbuf || mcp->rxbuf_idx < 0 || data[3] > 60) { + mcp->status = -EINVAL; + break; + } buf = mcp->rxbuf; memcpy(&buf[mcp->rxbuf_idx], &data[4], data[3]); mcp->rxbuf_idx = mcp->rxbuf_idx + data[3]; From 89a2d212bdb4bc29bed8e7077abe054b801137ea Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 11 Aug 2025 13:17:59 -0500 Subject: [PATCH 0265/1307] dma/pool: Ensure DMA_DIRECT_REMAP allocations are decrypted When CONFIG_DMA_DIRECT_REMAP is enabled, atomic pool pages are remapped via dma_common_contiguous_remap() using the supplied pgprot. Currently, the mapping uses pgprot_dmacoherent(PAGE_KERNEL), which leaves the memory encrypted on systems with memory encryption enabled (e.g., ARM CCA Realms). This can cause the DMA layer to fail or crash when accessing the memory, as the underlying physical pages are not configured as expected. Fix this by requesting a decrypted mapping in the vmap() call: pgprot_decrypted(pgprot_dmacoherent(PAGE_KERNEL)) This ensures that atomic pool memory is consistently mapped unencrypted. Cc: stable@vger.kernel.org Signed-off-by: Shanker Donthineni Reviewed-by: Catalin Marinas Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250811181759.998805-1-sdonthineni@nvidia.com --- kernel/dma/pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 7b04f7575796..ee45dee33d49 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -102,8 +102,8 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, #ifdef CONFIG_DMA_DIRECT_REMAP addr = dma_common_contiguous_remap(page, pool_size, - pgprot_dmacoherent(PAGE_KERNEL), - __builtin_return_address(0)); + pgprot_decrypted(pgprot_dmacoherent(PAGE_KERNEL)), + __builtin_return_address(0)); if (!addr) goto free_page; #else From d3af6ca9a8c34bbd8cff32b469b84c9021c9e7e4 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Sun, 10 Aug 2025 19:10:41 +0100 Subject: [PATCH 0266/1307] HID: asus: fix UAF via HID_CLAIMED_INPUT validation After hid_hw_start() is called hidinput_connect() will eventually be called to set up the device with the input layer since the HID_CONNECT_DEFAULT connect mask is used. During hidinput_connect() all input and output reports are processed and corresponding hid_inputs are allocated and configured via hidinput_configure_usages(). This process involves slot tagging report fields and configuring usages by setting relevant bits in the capability bitmaps. However it is possible that the capability bitmaps are not set at all leading to the subsequent hidinput_has_been_populated() check to fail leading to the freeing of the hid_input and the underlying input device. This becomes problematic because a malicious HID device like a ASUS ROG N-Key keyboard can trigger the above scenario via a specially crafted descriptor which then leads to a user-after-free when the name of the freed input device is written to later on after hid_hw_start(). Below, report 93 intentionally utilises the HID_UP_UNDEFINED Usage Page which is skipped during usage configuration, leading to the frees. 0x05, 0x0D, // Usage Page (Digitizer) 0x09, 0x05, // Usage (Touch Pad) 0xA1, 0x01, // Collection (Application) 0x85, 0x0D, // Report ID (13) 0x06, 0x00, 0xFF, // Usage Page (Vendor Defined 0xFF00) 0x09, 0xC5, // Usage (0xC5) 0x15, 0x00, // Logical Minimum (0) 0x26, 0xFF, 0x00, // Logical Maximum (255) 0x75, 0x08, // Report Size (8) 0x95, 0x04, // Report Count (4) 0xB1, 0x02, // Feature (Data,Var,Abs) 0x85, 0x5D, // Report ID (93) 0x06, 0x00, 0x00, // Usage Page (Undefined) 0x09, 0x01, // Usage (0x01) 0x15, 0x00, // Logical Minimum (0) 0x26, 0xFF, 0x00, // Logical Maximum (255) 0x75, 0x08, // Report Size (8) 0x95, 0x1B, // Report Count (27) 0x81, 0x02, // Input (Data,Var,Abs) 0xC0, // End Collection Below is the KASAN splat after triggering the UAF: [ 21.672709] ================================================================== [ 21.673700] BUG: KASAN: slab-use-after-free in asus_probe+0xeeb/0xf80 [ 21.673700] Write of size 8 at addr ffff88810a0ac000 by task kworker/1:2/54 [ 21.673700] [ 21.673700] CPU: 1 UID: 0 PID: 54 Comm: kworker/1:2 Not tainted 6.16.0-rc4-g9773391cf4dd-dirty #36 PREEMPT(voluntary) [ 21.673700] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 21.673700] Call Trace: [ 21.673700] [ 21.673700] dump_stack_lvl+0x5f/0x80 [ 21.673700] print_report+0xd1/0x660 [ 21.673700] kasan_report+0xe5/0x120 [ 21.673700] __asan_report_store8_noabort+0x1b/0x30 [ 21.673700] asus_probe+0xeeb/0xf80 [ 21.673700] hid_device_probe+0x2ee/0x700 [ 21.673700] really_probe+0x1c6/0x6b0 [ 21.673700] __driver_probe_device+0x24f/0x310 [ 21.673700] driver_probe_device+0x4e/0x220 [...] [ 21.673700] [ 21.673700] Allocated by task 54: [ 21.673700] kasan_save_stack+0x3d/0x60 [ 21.673700] kasan_save_track+0x18/0x40 [ 21.673700] kasan_save_alloc_info+0x3b/0x50 [ 21.673700] __kasan_kmalloc+0x9c/0xa0 [ 21.673700] __kmalloc_cache_noprof+0x139/0x340 [ 21.673700] input_allocate_device+0x44/0x370 [ 21.673700] hidinput_connect+0xcb6/0x2630 [ 21.673700] hid_connect+0xf74/0x1d60 [ 21.673700] hid_hw_start+0x8c/0x110 [ 21.673700] asus_probe+0x5a3/0xf80 [ 21.673700] hid_device_probe+0x2ee/0x700 [ 21.673700] really_probe+0x1c6/0x6b0 [ 21.673700] __driver_probe_device+0x24f/0x310 [ 21.673700] driver_probe_device+0x4e/0x220 [...] [ 21.673700] [ 21.673700] Freed by task 54: [ 21.673700] kasan_save_stack+0x3d/0x60 [ 21.673700] kasan_save_track+0x18/0x40 [ 21.673700] kasan_save_free_info+0x3f/0x60 [ 21.673700] __kasan_slab_free+0x3c/0x50 [ 21.673700] kfree+0xcf/0x350 [ 21.673700] input_dev_release+0xab/0xd0 [ 21.673700] device_release+0x9f/0x220 [ 21.673700] kobject_put+0x12b/0x220 [ 21.673700] put_device+0x12/0x20 [ 21.673700] input_free_device+0x4c/0xb0 [ 21.673700] hidinput_connect+0x1862/0x2630 [ 21.673700] hid_connect+0xf74/0x1d60 [ 21.673700] hid_hw_start+0x8c/0x110 [ 21.673700] asus_probe+0x5a3/0xf80 [ 21.673700] hid_device_probe+0x2ee/0x700 [ 21.673700] really_probe+0x1c6/0x6b0 [ 21.673700] __driver_probe_device+0x24f/0x310 [ 21.673700] driver_probe_device+0x4e/0x220 [...] Fixes: 9ce12d8be12c ("HID: asus: Add i2c touchpad support") Cc: stable@vger.kernel.org Signed-off-by: Qasim Ijaz Link: https://patch.msgid.link/20250810181041.44874-1-qasdev00@gmail.com Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-asus.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 4b45e31f0bab..d27dcfb2b9e4 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1213,7 +1213,13 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) return ret; } - if (!drvdata->input) { + /* + * Check that input registration succeeded. Checking that + * HID_CLAIMED_INPUT is set prevents a UAF when all input devices + * were freed during registration due to no usages being mapped, + * leaving drvdata->input pointing to freed memory. + */ + if (!drvdata->input || !(hdev->claimed & HID_CLAIMED_INPUT)) { hid_err(hdev, "Asus input not registered\n"); ret = -ENOMEM; goto err_stop_hw; From 3eb61d7cb74cea2ea697363669fa256937164758 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 15 Jul 2025 10:26:22 +0200 Subject: [PATCH 0267/1307] Revert "drm/amdgpu: Use dma_buf from GEM object instance" This reverts commit 515986100d176663d0a03219a3056e4252f729e6. The dma_buf field in struct drm_gem_object is not stable over the object instance's lifetime. The field becomes NULL when user space releases the final GEM handle on the buffer object. This resulted in a NULL-pointer deref. Workarounds in commit 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers") and commit f6bfc9afc751 ("drm/framebuffer: Acquire internal references on GEM handles") only solved the problem partially. They especially don't work for buffer objects without a DRM framebuffer associated. Hence, this revert to going back to using .import_attach->dmabuf. Signed-off-by: Thomas Zimmermann Reviewed-by: Simona Vetter Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20250715082635.34974-1-tzimmermann@suse.de --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ff98c87b2e0b..5743ebb2f1b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -514,7 +514,7 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, return false; if (drm_gem_is_imported(obj)) { - struct dma_buf *dma_buf = obj->dma_buf; + struct dma_buf *dma_buf = obj->import_attach->dmabuf; if (dma_buf->ops != &amdgpu_dmabuf_ops) /* No XGMI with non AMD GPUs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6626a6e64ff5..d1ccbfcf21fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -317,7 +317,8 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, */ if (!vm->is_compute_context || !vm->process_info) return 0; - if (!drm_gem_is_imported(obj) || !dma_buf_is_dynamic(obj->dma_buf)) + if (!drm_gem_is_imported(obj) || + !dma_buf_is_dynamic(obj->import_attach->dmabuf)) return 0; mutex_lock_nested(&vm->process_info->lock, 1); if (!WARN_ON(!vm->process_info->eviction_fence)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5cacf5717016..8777ed8facd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1276,7 +1276,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct drm_gem_object *obj = &bo->tbo.base; if (drm_gem_is_imported(obj) && bo_va->is_xgmi) { - struct dma_buf *dma_buf = obj->dma_buf; + struct dma_buf *dma_buf = obj->import_attach->dmabuf; struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); From 3c3e9a9f2972b364e8c2cfbfdeb23c6d6be4f87f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 25 Jul 2025 18:31:04 -0700 Subject: [PATCH 0268/1307] RDMA/rxe: Flush delayed SKBs while releasing RXE resources When skb packets are sent out, these skb packets still depends on the rxe resources, for example, QP, sk, when these packets are destroyed. If these rxe resources are released when the skb packets are destroyed, the call traces will appear. To avoid skb packets hang too long time in some network devices, a timestamp is added when these skb packets are created. If these skb packets hang too long time in network devices, these network devices can free these skb packets to release rxe resources. Reported-by: syzbot+8425ccfb599521edb153@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8425ccfb599521edb153 Tested-by: syzbot+8425ccfb599521edb153@syzkaller.appspotmail.com Fixes: 1a633bdc8fd9 ("RDMA/rxe: Let destroy qp succeed with stuck packet") Signed-off-by: Zhu Yanjun Link: https://patch.msgid.link/20250726013104.463570-1-yanjun.zhu@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_net.c | 29 ++++++++--------------------- drivers/infiniband/sw/rxe/rxe_qp.c | 2 +- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 132a87e52d5c..ac0183a2ff7a 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -345,33 +345,15 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, static void rxe_skb_tx_dtor(struct sk_buff *skb) { - struct net_device *ndev = skb->dev; - struct rxe_dev *rxe; - unsigned int qp_index; - struct rxe_qp *qp; + struct rxe_qp *qp = skb->sk->sk_user_data; int skb_out; - rxe = rxe_get_dev_from_net(ndev); - if (!rxe && is_vlan_dev(ndev)) - rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); - if (WARN_ON(!rxe)) - return; - - qp_index = (int)(uintptr_t)skb->sk->sk_user_data; - if (!qp_index) - return; - - qp = rxe_pool_get_index(&rxe->qp_pool, qp_index); - if (!qp) - goto put_dev; - skb_out = atomic_dec_return(&qp->skb_out); - if (qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW) + if (unlikely(qp->need_req_skb && + skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) rxe_sched_task(&qp->send_task); rxe_put(qp); -put_dev: - ib_device_put(&rxe->ib_dev); sock_put(skb->sk); } @@ -383,6 +365,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) sock_hold(sk); skb->sk = sk; skb->destructor = rxe_skb_tx_dtor; + rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) @@ -405,6 +388,7 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) sock_hold(sk); skb->sk = sk; skb->destructor = rxe_skb_tx_dtor; + rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) @@ -497,6 +481,9 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, goto out; } + /* Add time stamp to skb. */ + skb->tstamp = ktime_get(); + skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); /* FIXME: hold reference to this netdev until life of this skb. */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index f2af3e0aef35..95f1c1c2949d 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -244,7 +244,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) return err; - qp->sk->sk->sk_user_data = (void *)(uintptr_t)qp->elem.index; + qp->sk->sk->sk_user_data = qp; /* pick a source UDP port number for this QP based on * the source QPN. this spreads traffic for different QPs From d5c74713f0117d07f91eb48b10bc2ad44e23c9b9 Mon Sep 17 00:00:00 2001 From: Boshi Yu Date: Fri, 25 Jul 2025 13:53:55 +0800 Subject: [PATCH 0269/1307] RDMA/erdma: Fix ignored return value of init_kernel_qp The init_kernel_qp interface may fail. Check its return value and free related resources properly when it does. Fixes: 155055771704 ("RDMA/erdma: Add verbs implementation") Reviewed-by: Cheng Xu Signed-off-by: Boshi Yu Link: https://patch.msgid.link/20250725055410.67520-3-boshiyu@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 94c211df09d8..2e01520ca385 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -1031,7 +1031,9 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (ret) goto err_out_cmd; } else { - init_kernel_qp(dev, qp, attrs); + ret = init_kernel_qp(dev, qp, attrs); + if (ret) + goto err_out_xa; } qp->attrs.max_send_sge = attrs->cap.max_send_sge; From d4ac86b47563c7895dae28658abd1879d266b2b4 Mon Sep 17 00:00:00 2001 From: Boshi Yu Date: Fri, 25 Jul 2025 13:53:56 +0800 Subject: [PATCH 0270/1307] RDMA/erdma: Fix unset QPN of GSI QP The QPN of the GSI QP was not set, which may cause issues. Set the QPN to 1 when creating the GSI QP. Fixes: 999a0a2e9b87 ("RDMA/erdma: Support UD QPs and UD WRs") Reviewed-by: Cheng Xu Signed-off-by: Boshi Yu Link: https://patch.msgid.link/20250725055410.67520-4-boshiyu@linux.alibaba.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 2e01520ca385..fdeec33c71da 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -994,6 +994,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL); if (xa_is_err(old_entry)) ret = xa_err(old_entry); + else + qp->ibqp.qp_num = 1; } else { ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, XA_LIMIT(1, dev->attrs.max_qp - 1), From f0ba0e7172a222ea6043b61ecd86723c46d7bcf2 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 23 Jul 2025 15:38:10 +0200 Subject: [PATCH 0271/1307] btrfs: zoned: skip ZONE FINISH of conventional zones Don't call ZONE FINISH for conventional zones as this will result in I/O errors. Instead check if the zone that needs finishing is a conventional zone and if yes skip it. Also factor out the actual handling of finishing a single zone into a helper function, as do_zone_finish() is growing ever bigger and the indentations levels are getting higher. Reviewed-by: Naohiro Aota Reviewed-by: Anand Jain Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 55 ++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index db11b5b5f0e6..36de6d0d595f 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2245,6 +2245,40 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group) rcu_read_unlock(); } +static int call_zone_finish(struct btrfs_block_group *block_group, + struct btrfs_io_stripe *stripe) +{ + struct btrfs_device *device = stripe->dev; + const u64 physical = stripe->physical; + struct btrfs_zoned_device_info *zinfo = device->zone_info; + int ret; + + if (!device->bdev) + return 0; + + if (zinfo->max_active_zones == 0) + return 0; + + if (btrfs_dev_is_sequential(device, physical)) { + unsigned int nofs_flags; + + nofs_flags = memalloc_nofs_save(); + ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, + physical >> SECTOR_SHIFT, + zinfo->zone_size >> SECTOR_SHIFT); + memalloc_nofs_restore(nofs_flags); + + if (ret) + return ret; + } + + if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) + zinfo->reserved_active_zones++; + btrfs_dev_clear_active_zone(device, physical); + + return 0; +} + static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written) { struct btrfs_fs_info *fs_info = block_group->fs_info; @@ -2329,31 +2363,12 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ down_read(&dev_replace->rwsem); map = block_group->physical_map; for (i = 0; i < map->num_stripes; i++) { - struct btrfs_device *device = map->stripes[i].dev; - const u64 physical = map->stripes[i].physical; - struct btrfs_zoned_device_info *zinfo = device->zone_info; - unsigned int nofs_flags; - - if (!device->bdev) - continue; - - if (zinfo->max_active_zones == 0) - continue; - - nofs_flags = memalloc_nofs_save(); - ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, - physical >> SECTOR_SHIFT, - zinfo->zone_size >> SECTOR_SHIFT); - memalloc_nofs_restore(nofs_flags); + ret = call_zone_finish(block_group, &map->stripes[i]); if (ret) { up_read(&dev_replace->rwsem); return ret; } - - if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) - zinfo->reserved_active_zones++; - btrfs_dev_clear_active_zone(device, physical); } up_read(&dev_replace->rwsem); From daa0fde322350b467bc62bc1b141bf62df6123f8 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:53 +0900 Subject: [PATCH 0272/1307] btrfs: zoned: fix data relocation block group reservation btrfs_zoned_reserve_data_reloc_bg() is called on mount and at that point, all data block groups belong to the primary data space_info. So, we don't find anything in the data relocation space_info. Also, the condition "bg->used > 0" can select a block group with full of zone_unusable bytes for the candidate. As we cannot allocate from the block group, it is useless to reserve it as the data relocation block group. Furthermore, because of the space_info separation, we need to migrate the selected block group to the data relocation space_info. If not, the extent allocator cannot use the block group to do the allocation. This commit fixes these three issues. Fixes: e606ff985ec7 ("btrfs: zoned: reserve data_reloc block group on mount") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 55 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 36de6d0d595f..7a3351b1b0c6 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -17,6 +17,7 @@ #include "accessors.h" #include "bio.h" #include "transaction.h" +#include "sysfs.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 @@ -2519,12 +2520,12 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) { struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; - struct btrfs_space_info *space_info = data_sinfo->sub_group[0]; + struct btrfs_space_info *space_info = data_sinfo; struct btrfs_trans_handle *trans; struct btrfs_block_group *bg; struct list_head *bg_list; u64 alloc_flags; - bool initial = false; + bool first = true; bool did_chunk_alloc = false; int index; int ret; @@ -2538,21 +2539,52 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) if (sb_rdonly(fs_info->sb)) return; - ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags); index = btrfs_bg_flags_to_raid_index(alloc_flags); - bg_list = &data_sinfo->block_groups[index]; + /* Scan the data space_info to find empty block groups. Take the second one. */ again: + bg_list = &space_info->block_groups[index]; list_for_each_entry(bg, bg_list, list) { - if (bg->used > 0) + if (bg->alloc_offset != 0) continue; - if (!initial) { - initial = true; + if (first) { + first = false; continue; } + if (space_info == data_sinfo) { + /* Migrate the block group to the data relocation space_info. */ + struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0]; + int factor; + + ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); + factor = btrfs_bg_type_to_factor(bg->flags); + + down_write(&space_info->groups_sem); + list_del_init(&bg->list); + /* We can assume this as we choose the second empty one. */ + ASSERT(!list_empty(&space_info->block_groups[index])); + up_write(&space_info->groups_sem); + + spin_lock(&space_info->lock); + space_info->total_bytes -= bg->length; + space_info->disk_total -= bg->length * factor; + /* There is no allocation ever happened. */ + ASSERT(bg->used == 0); + ASSERT(bg->zone_unusable == 0); + /* No super block in a block group on the zoned setup. */ + ASSERT(bg->bytes_super == 0); + spin_unlock(&space_info->lock); + + bg->space_info = reloc_sinfo; + if (reloc_sinfo->block_group_kobjs[index] == NULL) + btrfs_sysfs_add_block_group_type(bg); + + btrfs_add_bg_to_space_info(fs_info, bg); + } + fs_info->data_reloc_bg = bg->start; set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags); btrfs_zone_activate(bg); @@ -2567,11 +2599,18 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) if (IS_ERR(trans)) return; + /* Allocate new BG in the data relocation space_info. */ + space_info = data_sinfo->sub_group[0]; + ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); btrfs_end_transaction(trans); if (ret == 1) { + /* + * We allocated a new block group in the data relocation space_info. We + * can take that one. + */ + first = false; did_chunk_alloc = true; - bg_list = &space_info->block_groups[index]; goto again; } } From 5c4b93f4c8e5c53574c1a48d66a27a2c68b414af Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:54 +0900 Subject: [PATCH 0273/1307] btrfs: zoned: fix write time activation failure for metadata block group Since commit 13bb483d32ab ("btrfs: zoned: activate metadata block group on write time"), we activate a metadata block group at the write time. If the zone capacity is small enough, we can allocate the entire region before the first write. Then, we hit the btrfs_zoned_bg_is_full() in btrfs_zone_activate() and the activation fails. For a data block group, we activate it at the allocation time and we should check the fullness condition in the caller side. Add, a WARN to check the fullness condition. For a metadata block group, we don't need the fullness check because we activate it at the write time. Instead, activating it once it is written should be invalid. Catch that with a WARN too. Fixes: 13bb483d32ab ("btrfs: zoned: activate metadata block group on write time") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 7a3351b1b0c6..ab6844dce8bc 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2169,10 +2169,15 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } - /* No space left */ - if (btrfs_zoned_bg_is_full(block_group)) { - ret = false; - goto out_unlock; + if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) { + /* The caller should check if the block group is full. */ + if (WARN_ON_ONCE(btrfs_zoned_bg_is_full(block_group))) { + ret = false; + goto out_unlock; + } + } else { + /* Since it is already written, it should have been active. */ + WARN_ON_ONCE(block_group->meta_write_pointer != block_group->start); } for (i = 0; i < map->num_stripes; i++) { From 04147d8394e80acaaebf0365f112339e8b606c05 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:55 +0900 Subject: [PATCH 0274/1307] btrfs: zoned: limit active zones to max_open_zones When there is no active zone limit, we can technically write into any number of zones at the same time. However, exceeding the max open zones can degrade performance. To prevent this, set the max_active_zones to bdev_max_open_zones() if there is no active zone limit. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index ab6844dce8bc..e0ee3aeabd2c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -43,6 +43,9 @@ /* Number of superblock log zones */ #define BTRFS_NR_SB_LOG_ZONES 2 +/* Default number of max active zones when the device has no limits. */ +#define BTRFS_DEFAULT_MAX_ACTIVE_ZONES 128 + /* * Minimum of active zones we need: * @@ -417,7 +420,10 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; - max_active_zones = bdev_max_active_zones(bdev); + max_active_zones = min_not_zero(bdev_max_active_zones(bdev), + bdev_max_open_zones(bdev)); + if (!max_active_zones && zone_info->nr_zones > BTRFS_DEFAULT_MAX_ACTIVE_ZONES) + max_active_zones = BTRFS_DEFAULT_MAX_ACTIVE_ZONES; if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) { btrfs_err(fs_info, "zoned: %s: max active zones %u is too small, need at least %u active zones", From 085a1b42e52750769a3fa29d4da6c05ab56f18f8 Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Sat, 26 Jul 2025 15:53:45 +0800 Subject: [PATCH 0275/1307] RDMA/hns: Fix querying wrong SCC context for DIP algorithm When using DIP algorithm, all QPs establishing connections with the same destination IP share the same SCC, which is indexed by dip_idx, but dip_idx isn't necessarily equal to qpn. Therefore, dip_idx should be used to query SCC context instead of qpn. Fixes: 124a9fbe43aa ("RDMA/hns: Append SCC context to the raw dump of QPC") Signed-off-by: wenglianfa Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250726075345.846957-1-huangjunxian6@hisilicon.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_restrack.c | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 64bca08f3f1a..244a4780d3a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5476,7 +5476,7 @@ static int hns_roce_v2_query_srqc(struct hns_roce_dev *hr_dev, u32 srqn, return ret; } -static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 qpn, +static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 sccn, void *buffer) { struct hns_roce_v2_scc_context *context; @@ -5488,7 +5488,7 @@ static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 qpn, return PTR_ERR(mailbox); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SCCC, - qpn); + sccn); if (ret) goto out; diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index f637b73b946e..230187dda6a0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -100,6 +100,7 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) struct hns_roce_v2_qp_context qpc; struct hns_roce_v2_scc_context sccc; } context = {}; + u32 sccn = hr_qp->qpn; int ret; if (!hr_dev->hw->query_qpc) @@ -116,7 +117,13 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) !hr_dev->hw->query_sccc) goto out; - ret = hr_dev->hw->query_sccc(hr_dev, hr_qp->qpn, &context.sccc); + if (hr_qp->cong_type == CONG_TYPE_DIP) { + if (!hr_qp->dip) + goto out; + sccn = hr_qp->dip->dip_idx; + } + + ret = hr_dev->hw->query_sccc(hr_dev, sccn, &context.sccc); if (ret) ibdev_warn_ratelimited(&hr_dev->ib_dev, "failed to query SCCC, ret = %d.\n", From 6296f9a5293ada28558f2867ac54c487e1e2b9f2 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Tue, 5 Aug 2025 15:39:57 +0530 Subject: [PATCH 0276/1307] RDMA/bnxt_re: Fix to do SRQ armena by default Whenever SRQ is created, make sure SRQ arm enable is always set. Driver is always ready to receive SRQ ASYNC event. Additional note - There is no need to do srq arm enable conditionally. See bnxt_qplib_armen_db in bnxt_qplib_create_cq(). Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kashyap Desai Signed-off-by: Saravanan Vajravel Link: https://patch.msgid.link/20250805101000.233310-2-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Kalesh AP Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index dfe3177123e5..b2c1240775f4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -705,8 +705,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.db = srq->dpi->dbr; srq->dbinfo.max_slot = 1; srq->dbinfo.priv_db = res->dpi_tbl.priv_db; - if (srq->threshold) - bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); + bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); srq->arm_req = false; return 0; From 666bce0bd7e771127cb0cda125cc9d32d9f9f15d Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Tue, 5 Aug 2025 15:39:58 +0530 Subject: [PATCH 0277/1307] RDMA/bnxt_re: Fix to remove workload check in SRQ limit path There should not be any checks of current workload to set srq_limit value to SRQ hw context. Remove all such workload checks and make a direct call to set srq_limit via doorbell SRQ_ARM. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kashyap Desai Signed-off-by: Saravanan Vajravel Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250805101000.233310-3-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 ++----- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 27 ------------------------ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 2 -- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 37c2bc3bdba5..260dc67b8b87 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1921,7 +1921,6 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); struct bnxt_re_dev *rdev = srq->rdev; - int rc; switch (srq_attr_mask) { case IB_SRQ_MAX_WR: @@ -1933,11 +1932,8 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, return -EINVAL; srq->qplib_srq.threshold = srq_attr->srq_limit; - rc = bnxt_qplib_modify_srq(&rdev->qplib_res, &srq->qplib_srq); - if (rc) { - ibdev_err(&rdev->ibdev, "Modify HW SRQ failed!"); - return rc; - } + bnxt_qplib_srq_arm_db(&srq->qplib_srq.dbinfo, srq->qplib_srq.threshold); + /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index b2c1240775f4..ee36b3d82cc0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -706,7 +706,6 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.max_slot = 1; srq->dbinfo.priv_db = res->dpi_tbl.priv_db; bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); - srq->arm_req = false; return 0; fail: @@ -716,24 +715,6 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, return rc; } -int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, - struct bnxt_qplib_srq *srq) -{ - struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - u32 count; - - count = __bnxt_qplib_get_avail(srq_hwq); - if (count > srq->threshold) { - srq->arm_req = false; - bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); - } else { - /* Deferred arming */ - srq->arm_req = true; - } - - return 0; -} - int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { @@ -775,7 +756,6 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; struct rq_wqe *srqe; struct sq_sge *hw_sge; - u32 count = 0; int i, next; spin_lock(&srq_hwq->lock); @@ -807,15 +787,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot); - spin_lock(&srq_hwq->lock); - count = __bnxt_qplib_get_avail(srq_hwq); - spin_unlock(&srq_hwq->lock); /* Ring DB */ bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ); - if (srq->arm_req == true && count > srq->threshold) { - srq->arm_req = false; - bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); - } return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index ab125f1d949e..4921a214c34c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -546,8 +546,6 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, srqn_handler_t srq_handler); int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); -int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, - struct bnxt_qplib_srq *srq); int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, From ba60a1e8cbbd396c69ff9c8bc3242f5ab133e38a Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 5 Aug 2025 15:39:59 +0530 Subject: [PATCH 0278/1307] RDMA/bnxt_re: Fix a possible memory leak in the driver The GID context reuse logic requires the context memory to be not freed if and when DEL_GID firmware command fails. But, if there's no subsequent ADD_GID to reuse it, the context memory must be freed when the driver is unloaded. Otherwise it leads to a memory leak. Below is the kmemleak trace reported: unreferenced object 0xffff88817a4f34d0 (size 8): comm "insmod", pid 1072504, jiffies 4402561550 hex dump (first 8 bytes): 01 00 00 00 00 00 00 00 ........ backtrace (crc ccaa009e): __kmalloc_cache_noprof+0x33e/0x400 0xffffffffc2db9d48 add_modify_gid+0x5e0/0xb60 [ib_core] __ib_cache_gid_add+0x213/0x350 [ib_core] update_gid+0xf2/0x180 [ib_core] enum_netdev_ipv4_ips+0x3f3/0x690 [ib_core] enum_all_gids_of_dev_cb+0x125/0x1b0 [ib_core] ib_enum_roce_netdev+0x14b/0x250 [ib_core] ib_cache_setup_one+0x2e5/0x540 [ib_core] ib_register_device+0x82c/0xf10 [ib_core] 0xffffffffc2df5ad9 0xffffffffc2da8b07 0xffffffffc2db174d auxiliary_bus_probe+0xa5/0x120 really_probe+0x1e4/0x850 __driver_probe_device+0x18f/0x3d0 Fixes: 4a62c5e9e2e1 ("RDMA/bnxt_re: Do not free the ctx_tbl entry if delete GID fails") Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250805101000.233310-4-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Sriharsha Basavapatna Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 293b0a96c8e3..df7cf8d68e27 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -2017,6 +2017,28 @@ static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev) rdev->nqr = NULL; } +/* When DEL_GID fails, driver is not freeing GID ctx memory. + * To avoid the memory leak, free the memory during unload + */ +static void bnxt_re_free_gid_ctx(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; + struct bnxt_re_gid_ctx *ctx, **ctx_tbl; + int i; + + if (!sgid_tbl->active) + return; + + ctx_tbl = sgid_tbl->ctx; + for (i = 0; i < sgid_tbl->max; i++) { + if (sgid_tbl->hw_id[i] == 0xFFFF) + continue; + + ctx = ctx_tbl[i]; + kfree(ctx); + } +} + static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) { u8 type; @@ -2030,6 +2052,7 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) cancel_delayed_work_sync(&rdev->worker); + bnxt_re_free_gid_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags)) bnxt_re_cleanup_res(rdev); From 806b9f494f62791ee6d68f515a8056c615a0e7b2 Mon Sep 17 00:00:00 2001 From: Anantha Prabhu Date: Tue, 5 Aug 2025 15:40:00 +0530 Subject: [PATCH 0279/1307] RDMA/bnxt_re: Fix to initialize the PBL array memset the PBL page pointer and page map arrays before populating the SGL addresses of the HWQ. Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Signed-off-by: Anantha Prabhu Reviewed-by: Saravanan Vajravel Reviewed-by: Selvin Xavier Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250805101000.233310-5-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 6cd05207ffed..cc5c82d96839 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -121,6 +121,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, pbl->pg_arr = vmalloc_array(pages, sizeof(void *)); if (!pbl->pg_arr) return -ENOMEM; + memset(pbl->pg_arr, 0, pages * sizeof(void *)); pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t)); if (!pbl->pg_map_arr) { @@ -128,6 +129,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, pbl->pg_arr = NULL; return -ENOMEM; } + memset(pbl->pg_map_arr, 0, pages * sizeof(dma_addr_t)); pbl->pg_count = 0; pbl->pg_size = sginfo->pgsize; From 2186e8c39eb156b3557a467ce4e5dc3f24826609 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:56:01 -0700 Subject: [PATCH 0280/1307] MAINTAINERS: Remove bouncing irdma maintainer This maintainer's email no longer works. Remove it from MAINTAINERS. This still leaves one maintainer for the driver. Signed-off-by: Dave Hansen Cc: Tatyana Nikolova Cc: linux-rdma@vger.kernel.org Link: https://patch.msgid.link/20250808175601.EF0AF767@davehans-spike.ostc.intel.com Signed-off-by: Leon Romanovsky --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..b31fd6e7539c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12280,7 +12280,6 @@ F: include/linux/avf/virtchnl.h F: include/linux/net/intel/*/ INTEL ETHERNET PROTOCOL DRIVER FOR RDMA -M: Mustafa Ismail M: Tatyana Nikolova L: linux-rdma@vger.kernel.org S: Supported From 111aea0464c20f3eb25a48d5ff6c036e6b416123 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sun, 10 Aug 2025 23:21:58 +0530 Subject: [PATCH 0281/1307] RDMA/core: Free pfn_list with appropriate kvfree call Ensure that pfn_list allocated by kvcalloc() is freed using corresponding kvfree() function. Match memory allocation and free routines kvcalloc -> kvfree. Fixes: 259e9bd07c57 ("RDMA/core: Avoid hmm_dma_map_alloc() for virtual DMA devices") Signed-off-by: Akhilesh Patil Link: https://patch.msgid.link/aJjcPjL1BVh8QrMN@bhairav-test.ee.iitb.ac.in Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_odp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index b1c44ec1a3f3..572a91a62a7b 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -115,7 +115,7 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp, out_free_map: if (ib_uses_virt_dma(dev)) - kfree(map->pfn_list); + kvfree(map->pfn_list); else hmm_dma_map_free(dev->dma_device, map); return ret; @@ -287,7 +287,7 @@ static void ib_umem_odp_free(struct ib_umem_odp *umem_odp) mutex_unlock(&umem_odp->umem_mutex); mmu_interval_notifier_remove(&umem_odp->notifier); if (ib_uses_virt_dma(dev)) - kfree(umem_odp->map.pfn_list); + kvfree(umem_odp->map.pfn_list); else hmm_dma_map_free(dev->dma_device, &umem_odp->map); } From 185c926283da67a72df20a63a5046b3b4631b7d9 Mon Sep 17 00:00:00 2001 From: Minjong Kim Date: Wed, 13 Aug 2025 19:30:22 +0900 Subject: [PATCH 0282/1307] HID: hid-ntrig: fix unable to handle page fault in ntrig_report_version() in ntrig_report_version(), hdev parameter passed from hid_probe(). sending descriptor to /dev/uhid can make hdev->dev.parent->parent to null if hdev->dev.parent->parent is null, usb_dev has invalid address(0xffffffffffffff58) that hid_to_usb_dev(hdev) returned when usb_rcvctrlpipe() use usb_dev,it trigger page fault error for address(0xffffffffffffff58) add null check logic to ntrig_report_version() before calling hid_to_usb_dev() Signed-off-by: Minjong Kim Link: https://patch.msgid.link/20250813-hid-ntrig-page-fault-fix-v2-1-f98581f35106@samsung.com Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-ntrig.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c index 2738ce947434..0f76e241e0af 100644 --- a/drivers/hid/hid-ntrig.c +++ b/drivers/hid/hid-ntrig.c @@ -144,6 +144,9 @@ static void ntrig_report_version(struct hid_device *hdev) struct usb_device *usb_dev = hid_to_usb_dev(hdev); unsigned char *data = kmalloc(8, GFP_KERNEL); + if (!hid_is_usb(hdev)) + return; + if (!data) goto err_free; From fa2e2d31ee3b7212079323b4b09201ef68af3a97 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Tue, 12 Aug 2025 20:26:02 +0800 Subject: [PATCH 0283/1307] RDMA/hns: Fix dip entries leak on devices newer than hip09 DIP algorithm is also supported on devices newer than hip09, so free dip entries too. Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250812122602.3524602-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 244a4780d3a6..f82bdd46a917 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3043,7 +3043,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) if (!hr_dev->is_vf) hns_roce_free_link_table(hr_dev); - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09) + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) free_dip_entry(hr_dev); } From f7a2e1c08727384cde1c686dd57172f99b5f2e6e Mon Sep 17 00:00:00 2001 From: Erick Karanja Date: Wed, 13 Aug 2025 10:18:36 +0300 Subject: [PATCH 0284/1307] Docs: admin-guide: Correct spelling mistake Fix spelling mistake directoy to directory Reported-by: codespell Signed-off-by: Erick Karanja Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250813071837.668613-1-karanja99erick@gmail.com Signed-off-by: Jens Axboe --- Documentation/admin-guide/blockdev/zoned_loop.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst index 9c7aa3b482f3..64dcfde7450a 100644 --- a/Documentation/admin-guide/blockdev/zoned_loop.rst +++ b/Documentation/admin-guide/blockdev/zoned_loop.rst @@ -79,7 +79,7 @@ zone_capacity_mb Device zone capacity (must always be equal to or lower than the zone size. Default: zone size. conv_zones Total number of conventioanl zones starting from sector 0. Default: 8. -base_dir Path to the base directoy where to create the directory +base_dir Path to the base directory where to create the directory containing the zone files of the device. Default=/var/local/zloop. The device directory containing the zone files is always From 8f5845e0743bf3512b71b3cb8afe06c192d6acc4 Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Tue, 12 Aug 2025 23:42:57 +0800 Subject: [PATCH 0285/1307] block: restore default wbt enablement The commit 245618f8e45f ("block: protect wbt_lat_usec using q->elevator_lock") protected wbt_enable_default() with q->elevator_lock; however, it also placed wbt_enable_default() before blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);, resulting in wbt failing to be enabled. Moreover, the protection of wbt_enable_default() by q->elevator_lock was removed in commit 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()"), so we can directly fix this issue by placing wbt_enable_default() after blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);. Additionally, this issue also causes the inability to read the wbt_lat_usec file, and the scenario is as follows: root@q:/sys/block/sda/queue# cat wbt_lat_usec cat: wbt_lat_usec: Invalid argument root@q:/data00/sjc/linux# ls /sys/kernel/debug/block/sda/rqos cannot access '/sys/kernel/debug/block/sda/rqos': No such file or directory root@q:/data00/sjc/linux# find /sys -name wbt /sys/kernel/debug/tracing/events/wbt After testing with this patch, wbt can be enabled normally. Signed-off-by: Julian Sun Cc: stable@vger.kernel.org Fixes: 245618f8e45f ("block: protect wbt_lat_usec using q->elevator_lock") Reviewed-by: Nilay Shroff Reviewed-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250812154257.57540-1-sunjunchao@bytedance.com Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index c5cf79a20842..4a7f1a349998 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -902,9 +902,9 @@ int blk_register_queue(struct gendisk *disk) if (queue_is_mq(q)) elevator_set_default(q); - wbt_enable_default(disk); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); + wbt_enable_default(disk); /* Now everything is ready and send out KOBJ_ADD uevent */ kobject_uevent(&disk->queue_kobj, KOBJ_ADD); From 25db5f284fb8f30222146ca15b3ab8265789da38 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Wed, 13 Aug 2025 11:29:29 +0800 Subject: [PATCH 0286/1307] md: add legacy_async_del_gendisk mode commit 9e59d609763f ("md: call del_gendisk in control path") changes the async way to sync way of calling del_gendisk. But it breaks mdadm --assemble command. The assemble command runs like this: 1. create the array 2. stop the array 3. access the sysfs files after stopping The sync way calls del_gendisk in step 2, so all sysfs files are removed. Now to avoid breaking mdadm assemble command, this patch adds the parameter legacy_async_del_gendisk that can be used to choose which way. The default is async way. In future, we plan to change default to sync way in kernel 7.0. Then users need to upgrade to mdadm 4.5+ which removes step 2. Fixes: 9e59d609763f ("md: call del_gendisk in control path") Reported-by: Mikulas Patocka Closes: https://lore.kernel.org/linux-raid/CAMw=ZnQ=ET2St-+hnhsuq34rRPnebqcXqP1QqaHW5Bh4aaaZ4g@mail.gmail.com/T/#t Suggested-and-reviewed-by: Yu Kuai Signed-off-by: Xiao Ni Reviewed-by: Paul Menzel Link: https://lore.kernel.org/linux-raid/20250813032929.54978-1-xni@redhat.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 56 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index ac85ec73a409..772cffe02ff5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -339,6 +339,7 @@ static int start_readonly; * so all the races disappear. */ static bool create_on_open = true; +static bool legacy_async_del_gendisk = true; /* * We have a system wide 'event count' that is incremented @@ -877,15 +878,18 @@ void mddev_unlock(struct mddev *mddev) export_rdev(rdev, mddev); } - /* Call del_gendisk after release reconfig_mutex to avoid - * deadlock (e.g. call del_gendisk under the lock and an - * access to sysfs files waits the lock) - * And MD_DELETED is only used for md raid which is set in - * do_md_stop. dm raid only uses md_stop to stop. So dm raid - * doesn't need to check MD_DELETED when getting reconfig lock - */ - if (test_bit(MD_DELETED, &mddev->flags)) - del_gendisk(mddev->gendisk); + if (!legacy_async_del_gendisk) { + /* + * Call del_gendisk after release reconfig_mutex to avoid + * deadlock (e.g. call del_gendisk under the lock and an + * access to sysfs files waits the lock) + * And MD_DELETED is only used for md raid which is set in + * do_md_stop. dm raid only uses md_stop to stop. So dm raid + * doesn't need to check MD_DELETED when getting reconfig lock + */ + if (test_bit(MD_DELETED, &mddev->flags)) + del_gendisk(mddev->gendisk); + } } EXPORT_SYMBOL_GPL(mddev_unlock); @@ -5818,6 +5822,13 @@ static void md_kobj_release(struct kobject *ko) { struct mddev *mddev = container_of(ko, struct mddev, kobj); + if (legacy_async_del_gendisk) { + if (mddev->sysfs_state) + sysfs_put(mddev->sysfs_state); + if (mddev->sysfs_level) + sysfs_put(mddev->sysfs_level); + del_gendisk(mddev->gendisk); + } put_disk(mddev->gendisk); } @@ -6021,6 +6032,9 @@ static int md_alloc_and_put(dev_t dev, char *name) { struct mddev *mddev = md_alloc(dev, name); + if (legacy_async_del_gendisk) + pr_warn("md: async del_gendisk mode will be removed in future, please upgrade to mdadm-4.5+\n"); + if (IS_ERR(mddev)) return PTR_ERR(mddev); mddev_put(mddev); @@ -6431,10 +6445,22 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - /* if UNTIL_STOP is set, it's cleared here */ - mddev->hold_active = 0; - /* Don't clear MD_CLOSING, or mddev can be opened again. */ - mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + + /* + * For legacy_async_del_gendisk mode, it can stop the array in the + * middle of assembling it, then it still can access the array. So + * it needs to clear MD_CLOSING. If not legacy_async_del_gendisk, + * it can't open the array again after stopping it. So it doesn't + * clear MD_CLOSING. + */ + if (legacy_async_del_gendisk && mddev->hold_active) { + clear_bit(MD_CLOSING, &mddev->flags); + } else { + /* if UNTIL_STOP is set, it's cleared here */ + mddev->hold_active = 0; + /* Don't clear MD_CLOSING, or mddev can be opened again. */ + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + } mddev->sb_flags = 0; mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; @@ -6658,7 +6684,8 @@ static int do_md_stop(struct mddev *mddev, int mode) export_array(mddev); md_clean(mddev); - set_bit(MD_DELETED, &mddev->flags); + if (!legacy_async_del_gendisk) + set_bit(MD_DELETED, &mddev->flags); } md_new_event(); sysfs_notify_dirent_safe(mddev->sysfs_state); @@ -10392,6 +10419,7 @@ module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); module_param(create_on_open, bool, S_IRUSR|S_IWUSR); +module_param(legacy_async_del_gendisk, bool, 0600); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD RAID framework"); From afa17a09c699410113199dc15256c6ea2b4133f7 Mon Sep 17 00:00:00 2001 From: Even Xu Date: Wed, 6 Aug 2025 08:23:32 +0800 Subject: [PATCH 0287/1307] HID: intel-thc-hid: Intel-quicki2c: Enhance driver re-install flow After driver module is removed and during re-install stage, if there is continueous user touching on the screen, it is a risk impacting THC hardware initialization which causes driver installation failure. This patch enhances this flow by quiescing the external touch interrupt after driver is removed which keeps THC hardware ignore external interrupt during this remove and re-install stage. Signed-off-by: Even Xu Tested-by: Rui Zhang Fixes: 66b59bfce6d9 ("HID: intel-thc-hid: intel-quicki2c: Complete THC QuickI2C driver") Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c index e944a6ccb776..854926b3cfd4 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c @@ -419,6 +419,7 @@ static struct quicki2c_device *quicki2c_dev_init(struct pci_dev *pdev, void __io */ static void quicki2c_dev_deinit(struct quicki2c_device *qcdev) { + thc_interrupt_quiesce(qcdev->thc_hw, true); thc_interrupt_enable(qcdev->thc_hw, false); thc_ltr_unconfig(qcdev->thc_hw); thc_wot_unconfig(qcdev->thc_hw); From 4bcd3061e8154606af7f721cb75ca04ffe191a12 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Jul 2025 19:01:45 +0930 Subject: [PATCH 0288/1307] btrfs: clear block dirty if submit_one_sector() failed [BUG] If submit_one_sector() failed, the block will be kept dirty, but with their corresponding range finished in the ordered extent. This means if a writeback happens later again, we can hit the following problems: - ASSERT(block_start != EXTENT_MAP_HOLE) in submit_one_sector() If the original extent map is a hole, then we can hit this case, as the new ordered extent failed, we will drop the new extent map and re-read one from the disk. - DEBUG_WARN() in btrfs_writepage_cow_fixup() This is because we no longer have an ordered extent for those dirty blocks. The original for them is already finished with error. [CAUSE] The function submit_one_sector() is not following the regular error handling of writeback. The common practice is to clear the folio dirty, start and finish the writeback for the block. This is normally done by extent_clear_unlock_delalloc() with PAGE_START_WRITEBACK | PAGE_END_WRITEBACK flags during run_delalloc_range(). So if we keep those failed blocks dirty, they will stay in the page cache and wait for the next writeback. And since the original ordered extent is already finished and removed, depending on the original extent map, we either hit the ASSERT() inside submit_one_sector(), or hit the DEBUG_WARN() in btrfs_writepage_cow_fixup(). [FIX] Follow the regular error handling to clear the dirty flag for the block, start and finish writeback for that block instead. Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f23d75986947..741c20480099 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1512,7 +1512,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, /* * Return 0 if we have submitted or queued the sector for submission. - * Return <0 for critical errors. + * Return <0 for critical errors, and the sector will have its dirty flag cleared. * * Caller should make sure filepos < i_size and handle filepos >= i_size case. */ @@ -1535,8 +1535,17 @@ static int submit_one_sector(struct btrfs_inode *inode, ASSERT(filepos < i_size); em = btrfs_get_extent(inode, NULL, filepos, sectorsize); - if (IS_ERR(em)) + if (IS_ERR(em)) { + /* + * When submission failed, we should still clear the folio dirty. + * Or the folio will be written back again but without any + * ordered extent. + */ + btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize); + btrfs_folio_set_writeback(fs_info, folio, filepos, sectorsize); + btrfs_folio_clear_writeback(fs_info, folio, filepos, sectorsize); return PTR_ERR(em); + } extent_offset = filepos - em->start; em_end = btrfs_extent_map_end(em); @@ -1666,8 +1675,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, * Here we set writeback and clear for the range. If the full folio * is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag. * - * If we hit any error, the corresponding sector will still be dirty - * thus no need to clear PAGECACHE_TAG_DIRTY. + * If we hit any error, the corresponding sector will have its dirty + * flag cleared and writeback finished, thus no need to handle the error case. */ if (!submitted_io && !error) { btrfs_folio_set_writeback(fs_info, folio, start, len); From 05b372862600e551bbf86e7f24a1caeed5e06150 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Jul 2025 19:01:46 +0930 Subject: [PATCH 0289/1307] btrfs: clear block dirty if btrfs_writepage_cow_fixup() failed [BUG] If btrfs_writepage_cow_fixup() failed (returning value -EUCLEAN), the block will be kept dirty, but with its corresponding range finished in the ordered extent. Currently that error pattern is only possible for experimental builds, which places extra check to ensure we shouldn't hit a dirty block without a corresponding ordered extent. This means if later a writeback happens again, we can hit the following problems: - ASSERT(block_start != EXTENT_MAP_HOLE) in submit_one_sector() If the original extent map is a hole, then we can hit this case, as the new ordered extent failed, we will drop the new extent map and re-read one from the disk. - DEBUG_WARN() in btrfs_writepage_cow_fixup() This is because we no longer have an ordered extent for those dirty blocks. The original for them is already finished with error. [CAUSE] The function btrfs_writepage_cow_fixup() is not following the regular error handling of writeback. The common practice is to clear the folio dirty, start and finish the writeback for the block. This is normally done by extent_clear_unlock_delalloc() with PAGE_START_WRITEBACK | PAGE_END_WRITEBACK flags during run_delalloc_range(). So if we keep those failed blocks dirty, they will stay in the page cache and wait for the next writeback. And since the original ordered extent is already finished and removed, depending on the original extent map, we either hit the ASSERT() inside submit_one_sector(), or hit the DEBUG_WARN() in btrfs_writepage_cow_fixup() again (and very ironic). [FIX] Follow the regular error handling to clear the dirty flag for the block range, start and finish writeback for that block range instead. Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 741c20480099..be9c9c804952 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1618,8 +1618,12 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, folio_unlock(folio); return 1; } - if (ret < 0) + if (ret < 0) { + btrfs_folio_clear_dirty(fs_info, folio, start, len); + btrfs_folio_set_writeback(fs_info, folio, start, len); + btrfs_folio_clear_writeback(fs_info, folio, start, len); return ret; + } for (cur = start; cur < start + len; cur += fs_info->sectorsize) set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap); From f022499f24e520706b9a8238746e1cacc37eb4e0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 1 Aug 2025 16:39:49 +0100 Subject: [PATCH 0290/1307] btrfs: do not set mtime/ctime to current time when unlinking for log replay If we are doing an unlink for log replay, we are updating the directory's mtime and ctime to the current time, and this is incorrect since it should stay with the mtime and ctime that were set when the directory was logged. This is the same as when adding a link to an inode during log replay (with btrfs_add_link()), where we want the mtime and ctime to be the values that were in place when the inode was logged. This was found with generic/547 using LOAD_FACTOR=20 and TIME_FACTOR=20, where due to large log trees we have longer log replay times and fssum could detect a mismatch of the mtime and ctime of a directory. Fix this by skipping the mtime and ctime update at __btrfs_unlink_inode() if we are in log replay context (just like btrfs_add_link()). Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d740910e071a..9e4aec7330cb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4189,6 +4189,23 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, return ret; } +static void update_time_after_link_or_unlink(struct btrfs_inode *dir) +{ + struct timespec64 now; + + /* + * If we are replaying a log tree, we do not want to update the mtime + * and ctime of the parent directory with the current time, since the + * log replay procedure is responsible for setting them to their correct + * values (the ones it had when the fsync was done). + */ + if (test_bit(BTRFS_FS_LOG_RECOVERING, &dir->root->fs_info->flags)) + return; + + now = inode_set_ctime_current(&dir->vfs_inode); + inode_set_mtime_to_ts(&dir->vfs_inode, now); +} + /* * unlink helper that gets used here in inode.c and in the tree logging * recovery code. It remove a link in a directory with a given name, and @@ -4289,7 +4306,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, inode_inc_iversion(&inode->vfs_inode); inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); - inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); + update_time_after_link_or_unlink(dir); return btrfs_update_inode(trans, dir); } @@ -6683,15 +6700,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + name->len * 2); inode_inc_iversion(&parent_inode->vfs_inode); - /* - * If we are replaying a log tree, we do not want to update the mtime - * and ctime of the parent directory with the current time, since the - * log replay procedure is responsible for setting them to their correct - * values (the ones it had when the fsync was done). - */ - if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) - inode_set_mtime_to_ts(&parent_inode->vfs_inode, - inode_set_ctime_current(&parent_inode->vfs_inode)); + update_time_after_link_or_unlink(parent_inode); ret = btrfs_update_inode(trans, parent_inode); if (ret) From 1f3d56db694cce6dfbffba0f398a06a222204487 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 31 Jul 2025 08:20:01 +0930 Subject: [PATCH 0291/1307] btrfs: clear TAG_TOWRITE from buffer tree when submitting a tree block [POSSIBLE BUG] After commit 5e121ae687b8 ("btrfs: use buffer xarray for extent buffer writeback operations"), we have a dedicated xarray for extent buffers, and a lot of tags are migrated to that buffer tree, like PAGECACHE_TAG_TOWRITE/DIRTY/WRITEBACK. This frees us from the limits of page flags, but there is a new asymmetric behavior, we call buffer_tree_tag_for_writeback() to set PAGECACHE_TAG_TOWRITE for the involved ranges, but there is no one to clear that tag. Before that rework, we relied on the page cache tag which was cleared when folio_start_writeback() was called. Although this has its own problems (e.g. the first one calling folio_start_writeback() will clear the tag for the whole page), it at least cleared the tag. But now our real tags are stored in the buffer tree, no one is really clearing the PAGECACHE_TAG_TOWRITE tag now. [FIX] Thankfully this is not going to cause any real bug, but just some inefficiency iterating the extent buffers. As if we hit an extent buffer which is not dirty but still has the PAGECACHE_TAG_TOWRITE tag, lock_extent_buffer_for_io() will skip it so we won't writeback the extent buffer again. To properly fix the inefficiency, just clear the PAGECACHE_TAG_TOWRITE inside lock_extent_buffer_for_io(). There is no error path between lock_extent_buffer_for_io() and write_one_eb(), so we're safe to clear the tag there. Reviewed-by: Naohiro Aota Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index be9c9c804952..c953297aa89a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1826,6 +1826,7 @@ static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *e xas_load(&xas); xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK); xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irqrestore(&xas, flags); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); From b1511360c8ac882b0c52caa263620538e8d73220 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 31 Jul 2025 12:46:56 +0900 Subject: [PATCH 0292/1307] btrfs: subpage: keep TOWRITE tag until folio is cleaned btrfs_subpage_set_writeback() calls folio_start_writeback() the first time a folio is written back, and it also clears the PAGECACHE_TAG_TOWRITE tag even if there are still dirty blocks in the folio. This can break ordering guarantees, such as those required by btrfs_wait_ordered_extents(). That ordering breakage leads to a real failure. For example, running generic/464 on a zoned setup will hit the following ASSERT. This happens because the broken ordering fails to flush existing dirty pages before the file size is truncated. assertion failed: !list_empty(&ordered->list) :: 0, in fs/btrfs/zoned.c:1899 ------------[ cut here ]------------ kernel BUG at fs/btrfs/zoned.c:1899! Oops: invalid opcode: 0000 [#1] SMP NOPTI CPU: 2 UID: 0 PID: 1906169 Comm: kworker/u130:2 Kdump: loaded Not tainted 6.16.0-rc6-BTRFS-ZNS+ #554 PREEMPT(voluntary) Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021 Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] RIP: 0010:btrfs_finish_ordered_zoned.cold+0x50/0x52 [btrfs] RSP: 0018:ffffc9002efdbd60 EFLAGS: 00010246 RAX: 000000000000004c RBX: ffff88811923c4e0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff827e38b1 RDI: 00000000ffffffff RBP: ffff88810005d000 R08: 00000000ffffdfff R09: ffffffff831051c8 R10: ffffffff83055220 R11: 0000000000000000 R12: ffff8881c2458c00 R13: ffff88811923c540 R14: ffff88811923c5e8 R15: ffff8881c1bd9680 FS: 0000000000000000(0000) GS:ffff88a04acd0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f907c7a918c CR3: 0000000004024000 CR4: 0000000000350ef0 Call Trace: ? srso_return_thunk+0x5/0x5f btrfs_finish_ordered_io+0x4a/0x60 [btrfs] btrfs_work_helper+0xf9/0x490 [btrfs] process_one_work+0x204/0x590 ? srso_return_thunk+0x5/0x5f worker_thread+0x1d6/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0x118/0x230 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x205/0x260 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Consider process A calling writepages() with WB_SYNC_NONE. In zoned mode or for compressed writes, it locks several folios for delalloc and starts writing them out. Let's call the last locked folio folio X. Suppose the write range only partially covers folio X, leaving some pages dirty. Process A calls btrfs_subpage_set_writeback() when building a bio. This function call clears the TOWRITE tag of folio X, whose size = 8K and the block size = 4K. It is following state. 0 4K 8K |/////|/////| (flag: DIRTY, tag: DIRTY) <-----> Process A will write this range. Now suppose process B concurrently calls writepages() with WB_SYNC_ALL. It calls tag_pages_for_writeback() to tag dirty folios with PAGECACHE_TAG_TOWRITE. Since folio X is still dirty, it gets tagged. Then, B collects tagged folios using filemap_get_folios_tag() and must wait for folio X to be written before returning from writepages(). 0 4K 8K |/////|/////| (flag: DIRTY, tag: DIRTY|TOWRITE) However, between tagging and collecting, process A may call btrfs_subpage_set_writeback() and clear folio X's TOWRITE tag. 0 4K 8K | |/////| (flag: DIRTY|WRITEBACK, tag: DIRTY) As a result, process B won't see folio X in its batch, and returns without waiting for it. This breaks the WB_SYNC_ALL ordering requirement. Fix this by using btrfs_subpage_set_writeback_keepwrite(), which retains the TOWRITE tag. We now manually clear the tag only after the folio becomes clean, via the xas operation. Fixes: 3470da3b7d87 ("btrfs: subpage: introduce helpers for writeback status") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/subpage.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index c9b3821957f7..cb4f97833dc3 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -448,8 +448,25 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, spin_lock_irqsave(&bfs->lock, flags); bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + + /* + * Don't clear the TOWRITE tag when starting writeback on a still-dirty + * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, + * assume writeback is complete, and exit too early — violating sync + * ordering guarantees. + */ if (!folio_test_writeback(folio)) - folio_start_writeback(folio); + __folio_start_writeback(folio, true); + if (!folio_test_dirty(folio)) { + struct address_space *mapping = folio_mapping(folio); + XA_STATE(xas, &mapping->i_pages, folio->index); + unsigned long flags; + + xas_lock_irqsave(&xas, flags); + xas_load(&xas); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irqrestore(&xas, flags); + } spin_unlock_irqrestore(&bfs->lock, flags); } From dc61d97b0ba064fb21b01fbfa7436873948277bd Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 12 Aug 2025 01:32:58 +0900 Subject: [PATCH 0293/1307] btrfs: fix buffer index in wait_eb_writebacks() The commit f2cb97ee964a ("btrfs: index buffer_tree using node size") changed the index of buffer_tree from "start >> sectorsize_bits" to "start >> nodesize_bits". However, the change is not applied for wait_eb_writebacks() and caused IO failures by writing in a full zone. Use the index properly. Fixes: f2cb97ee964a ("btrfs: index buffer_tree using node size") Reviewed-by: Qu Wenruo Reviewed-by: Boris Burkov Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index e0ee3aeabd2c..ea662036f441 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2242,7 +2242,7 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group) struct btrfs_fs_info *fs_info = block_group->fs_info; const u64 end = block_group->start + block_group->length; struct extent_buffer *eb; - unsigned long index, start = (block_group->start >> fs_info->sectorsize_bits); + unsigned long index, start = (block_group->start >> fs_info->nodesize_bits); rcu_read_lock(); xa_for_each_start(&fs_info->buffer_tree, index, eb, start) { From edf842abe4368ce3c423343cf4b23b210fcf1622 Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Wed, 23 Jul 2025 00:38:37 +0900 Subject: [PATCH 0294/1307] btrfs: fix incorrect log message for nobarrier mount option Fix a wrong log message that appears when the "nobarrier" mount option is unset. When "nobarrier" is unset, barrier is actually enabled. However, the log incorrectly stated "turning off barriers". Fixes: eddb1a433f26 ("btrfs: add reconfigure callback for fs_context") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Qu Wenruo Signed-off-by: Kyoji Ogasawara Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 466d0450269c..768a2532fa4a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1462,7 +1462,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme"); - btrfs_info_if_unset(info, old, NOBARRIER, "turning off barriers"); + btrfs_info_if_unset(info, old, NOBARRIER, "turning on barriers"); btrfs_info_if_unset(info, old, NOTREELOG, "enabling tree log"); btrfs_info_if_unset(info, old, SPACE_CACHE, "disabling disk space caching"); btrfs_info_if_unset(info, old, FREE_SPACE_TREE, "disabling free space tree"); From b435ab556bea875c088485f271ef2709ca1d75f5 Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Wed, 13 Aug 2025 03:00:06 +0900 Subject: [PATCH 0295/1307] btrfs: restore mount option info messages during mount After the fsconfig migration in 6.8, mount option info messages are no longer displayed during mount operations because btrfs_emit_options() is only called during remount, not during initial mount. Fix this by calling btrfs_emit_options() in btrfs_fill_super() after open_ctree() succeeds. Additionally, prevent log duplication by ensuring btrfs_check_options() handles validation with warn-level and err-level messages, while btrfs_emit_options() provides info-level messages. Fixes: eddb1a433f26 ("btrfs: add reconfigure callback for fs_context") CC: stable@vger.kernel.org # 6.8+ Reviewed-by: Qu Wenruo Signed-off-by: Kyoji Ogasawara Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 768a2532fa4a..8469f36ef011 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -88,6 +88,9 @@ struct btrfs_fs_context { refcount_t refs; }; +static void btrfs_emit_options(struct btrfs_fs_info *info, + struct btrfs_fs_context *old); + enum { Opt_acl, Opt_clear_cache, @@ -698,12 +701,9 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { - btrfs_info(info, "disk space caching is enabled"); btrfs_warn(info, "space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); } - if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) - btrfs_info(info, "using free-space-tree"); } return ret; @@ -980,6 +980,8 @@ static int btrfs_fill_super(struct super_block *sb, return ret; } + btrfs_emit_options(fs_info, NULL); + inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); From 74857fdc5dd2cdcdeb6e99bdf26976fd9299d2bb Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Wed, 13 Aug 2025 03:00:07 +0900 Subject: [PATCH 0296/1307] btrfs: fix printing of mount info messages for NODATACOW/NODATASUM The NODATASUM message was printed twice by mistake and the NODATACOW was missing from the 'unset' part. Fix the duplication and make the output look the same. Fixes: eddb1a433f26 ("btrfs: add reconfigure callback for fs_context") CC: stable@vger.kernel.org # 6.8+ Reviewed-by: Qu Wenruo Signed-off-by: Kyoji Ogasawara Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8469f36ef011..7f31f8bd63ba 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1439,7 +1439,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, { btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); btrfs_info_if_set(info, old, DEGRADED, "allowing degraded mounts"); - btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); + btrfs_info_if_set(info, old, NODATACOW, "setting nodatacow"); btrfs_info_if_set(info, old, SSD, "enabling ssd optimizations"); btrfs_info_if_set(info, old, SSD_SPREAD, "using spread ssd allocation scheme"); btrfs_info_if_set(info, old, NOBARRIER, "turning off barriers"); @@ -1461,6 +1461,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums"); btrfs_info_if_set(info, old, IGNORESUPERFLAGS, "ignoring unknown super block flags"); + btrfs_info_if_unset(info, old, NODATASUM, "setting datasum"); btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme"); From 9d83e1f05c98bab5de350bef89177e2be8b34db0 Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Wed, 13 Aug 2025 20:02:14 +0800 Subject: [PATCH 0297/1307] io_uring/io-wq: add check free worker before create new worker After commit 0b2b066f8a85 ("io_uring/io-wq: only create a new worker if it can make progress"), in our produce environment, we still observe that part of io_worker threads keeps creating and destroying. After analysis, it was confirmed that this was due to a more complex scenario involving a large number of fsync operations, which can be abstracted as frequent write + fsync operations on multiple files in a single uring instance. Since write is a hash operation while fsync is not, and fsync is likely to be suspended during execution, the action of checking the hash value in io_wqe_dec_running cannot handle such scenarios. Similarly, if hash-based work and non-hash-based work are sent at the same time, similar issues are likely to occur. Returning to the starting point of the issue, when a new work arrives, io_wq_enqueue may wake up free worker A, while io_wq_dec_running may create worker B. Ultimately, only one of A and B can obtain and process the task, leaving the other in an idle state. In the end, the issue is caused by inconsistent logic in the checks performed by io_wq_enqueue and io_wq_dec_running. Therefore, the problem can be resolved by checking for available workers in io_wq_dec_running. Signed-off-by: Fengnan Chang Reviewed-by: Diangang Li Link: https://lore.kernel.org/r/20250813120214.18729-1-changfengnan@bytedance.com Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index be91edf34f01..17dfaa0395c4 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -357,6 +357,13 @@ static void create_worker_cb(struct callback_head *cb) worker = container_of(cb, struct io_worker, create_work); wq = worker->wq; acct = worker->acct; + + rcu_read_lock(); + do_create = !io_acct_activate_free_worker(acct); + rcu_read_unlock(); + if (!do_create) + goto no_need_create; + raw_spin_lock(&acct->workers_lock); if (acct->nr_workers < acct->max_workers) { @@ -367,6 +374,7 @@ static void create_worker_cb(struct callback_head *cb) if (do_create) { create_io_worker(wq, acct); } else { +no_need_create: atomic_dec(&acct->nr_running); io_worker_ref_put(wq); } From 47ed64db8c17eb16541098add865178fb7e68744 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Wed, 13 Aug 2025 18:07:08 +0800 Subject: [PATCH 0298/1307] ASoC: tas2781: Normalize the volume kcontrol name Change the name of the kcontrol from "Gain" to "Volume". Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250813100708.12197-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 8e7e45c046b8..676130f4cf3e 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -908,10 +908,10 @@ static const struct snd_kcontrol_new tasdevice_cali_controls[] = { }; static const struct snd_kcontrol_new tas2781_snd_controls[] = { - SOC_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL, + SOC_SINGLE_RANGE_EXT_TLV("Speaker Analog Volume", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, tas2781_amp_putvol, amp_vol_tlv), - SOC_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain", TAS2781_DVC_LVL, + SOC_SINGLE_RANGE_EXT_TLV("Speaker Digital Volume", TAS2781_DVC_LVL, 0, 0, 200, 1, tas2781_digital_getvol, tas2781_digital_putvol, dvc_tlv), }; From e664036cf36480414936cd91f4cfa2179a3d8367 Mon Sep 17 00:00:00 2001 From: Miao Li Date: Fri, 1 Aug 2025 16:27:28 +0800 Subject: [PATCH 0299/1307] usb: quirks: Add DELAY_INIT quick for another SanDisk 3.2Gen1 Flash Drive Another SanDisk 3.2Gen1 Flash Drive also need DELAY_INIT quick, or it will randomly work incorrectly on Huawei hisi platforms when doing reboot test. Signed-off-by: Miao Li Cc: stable Link: https://lore.kernel.org/r/20250801082728.469406-1-limiao870622@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index ff0ff95d5cca..f5bc53875330 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -371,6 +371,7 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, /* SanDisk Corp. SanDisk 3.2Gen1 */ + { USB_DEVICE(0x0781, 0x5596), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x0781, 0x55a3), .driver_info = USB_QUIRK_DELAY_INIT }, /* SanDisk Extreme 55AE */ From 202ad1aaca777dc7fd24f459f5f808f5abd2bfda Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 1 Aug 2025 18:40:41 +0100 Subject: [PATCH 0300/1307] usb: gadget: tegra-xudc: fix PM use count underflow Upon resume from system suspend, the PM runtime core issues the following warning: tegra-xudc 3550000.usb: Runtime PM usage count underflow! This is because tegra_xudc_resume() unconditionally calls schedule_work(&xudc->usb_role_sw_work) whether or not anything has changed, which causes tegra_xudc_device_mode_off() to be called even when we're already in that mode. Keep track of the current state of "device_mode", and only schedule this work if it has changed from the hardware state on resume. Signed-off-by: "Russell King (Oracle)" Link: https://lore.kernel.org/r/E1uhtkH-007KDZ-JT@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/tegra-xudc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index 2957316fd3d0..1d3085cc9d22 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -502,6 +502,7 @@ struct tegra_xudc { struct clk_bulk_data *clks; bool device_mode; + bool current_device_mode; struct work_struct usb_role_sw_work; struct phy **usb3_phy; @@ -715,6 +716,8 @@ static void tegra_xudc_device_mode_on(struct tegra_xudc *xudc) phy_set_mode_ext(xudc->curr_utmi_phy, PHY_MODE_USB_OTG, USB_ROLE_DEVICE); + + xudc->current_device_mode = true; } static void tegra_xudc_device_mode_off(struct tegra_xudc *xudc) @@ -725,6 +728,8 @@ static void tegra_xudc_device_mode_off(struct tegra_xudc *xudc) dev_dbg(xudc->dev, "device mode off\n"); + xudc->current_device_mode = false; + connected = !!(xudc_readl(xudc, PORTSC) & PORTSC_CCS); reinit_completion(&xudc->disconnect_complete); @@ -4044,10 +4049,10 @@ static int __maybe_unused tegra_xudc_resume(struct device *dev) spin_lock_irqsave(&xudc->lock, flags); xudc->suspended = false; + if (xudc->device_mode != xudc->current_device_mode) + schedule_work(&xudc->usb_role_sw_work); spin_unlock_irqrestore(&xudc->lock, flags); - schedule_work(&xudc->usb_role_sw_work); - pm_runtime_enable(dev); return 0; From f9420f4757752f056144896024d5ea89e5a611f1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 3 Aug 2025 00:55:20 +0200 Subject: [PATCH 0301/1307] usb: renesas-xhci: Fix External ROM access timeouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase the External ROM access timeouts to prevent failures during programming of External SPI EEPROM chips. The current timeouts are too short for some SPI EEPROMs used with uPD720201 controllers. The current timeout for Chip Erase in renesas_rom_erase() is 100 ms , the current timeout for Sector Erase issued by the controller before Page Program in renesas_fw_download_image() is also 100 ms. Neither timeout is sufficient for e.g. the Macronix MX25L5121E or MX25V5126F. MX25L5121E reference manual [1] page 35 section "ERASE AND PROGRAMMING PERFORMANCE" and page 23 section "Table 8. AC CHARACTERISTICS (Temperature = 0°C to 70°C for Commercial grade, VCC = 2.7V ~ 3.6V)" row "tCE" indicate that the maximum time required for Chip Erase opcode to complete is 2 s, and for Sector Erase it is 300 ms . MX25V5126F reference manual [2] page 47 section "13. ERASE AND PROGRAMMING PERFORMANCE (2.3V - 3.6V)" and page 42 section "Table 8. AC CHARACTERISTICS (Temperature = -40°C to 85°C for Industrial grade, VCC = 2.3V - 3.6V)" row "tCE" indicate that the maximum time required for Chip Erase opcode to complete is 3.2 s, and for Sector Erase it is 400 ms . Update the timeouts such, that Chip Erase timeout is set to 5 seconds, and Sector Erase timeout is set to 500 ms. Such lengthy timeouts ought to be sufficient for majority of SPI EEPROM chips. [1] https://www.macronix.com/Lists/Datasheet/Attachments/8634/MX25L5121E,%203V,%20512Kb,%20v1.3.pdf [2] https://www.macronix.com/Lists/Datasheet/Attachments/8750/MX25V5126F,%202.5V,%20512Kb,%20v1.1.pdf Fixes: 2478be82de44 ("usb: renesas-xhci: Add ROM loader for uPD720201") Cc: stable Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20250802225526.25431-1-marek.vasut+renesas@mailbox.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci-renesas.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c index 620f8f0febb8..86df80399c9f 100644 --- a/drivers/usb/host/xhci-pci-renesas.c +++ b/drivers/usb/host/xhci-pci-renesas.c @@ -47,8 +47,9 @@ #define RENESAS_ROM_ERASE_MAGIC 0x5A65726F #define RENESAS_ROM_WRITE_MAGIC 0x53524F4D -#define RENESAS_RETRY 10000 -#define RENESAS_DELAY 10 +#define RENESAS_RETRY 50000 /* 50000 * RENESAS_DELAY ~= 500ms */ +#define RENESAS_CHIP_ERASE_RETRY 500000 /* 500000 * RENESAS_DELAY ~= 5s */ +#define RENESAS_DELAY 10 #define RENESAS_FW_NAME "renesas_usb_fw.mem" @@ -407,7 +408,7 @@ static void renesas_rom_erase(struct pci_dev *pdev) /* sleep a bit while ROM is erased */ msleep(20); - for (i = 0; i < RENESAS_RETRY; i++) { + for (i = 0; i < RENESAS_CHIP_ERASE_RETRY; i++) { retval = pci_read_config_byte(pdev, RENESAS_ROM_STATUS, &status); status &= RENESAS_ROM_STATUS_ERASE; From 8fe06185e11ae753414aa6117f0e798aa77567ff Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 6 Aug 2025 16:39:55 +0800 Subject: [PATCH 0302/1307] usb: core: hcd: fix accessing unmapped memory in SINGLE_STEP_SET_FEATURE test The USB core will unmap urb->transfer_dma after SETUP stage completes. Then the USB controller will access unmapped memory when it received device descriptor. If iommu is equipped, the entire test can't be completed due to the memory accessing is blocked. Fix it by calling map_urb_for_dma() again for IN stage. To reduce redundant map for urb->transfer_buffer, this will also set URB_NO_TRANSFER_DMA_MAP flag before first map_urb_for_dma() to skip dma map for urb->transfer_buffer and clear URB_NO_TRANSFER_DMA_MAP flag before second map_urb_for_dma(). Fixes: 216e0e563d81 ("usb: core: hcd: use map_urb_for_dma for single step set feature urb") Cc: stable Reviewed-by: Jun Li Signed-off-by: Xu Yang Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250806083955.3325299-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 03771bbc6c01..c4a1875b5d3d 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2166,7 +2166,7 @@ static struct urb *request_single_step_set_feature_urb( urb->complete = usb_ehset_completion; urb->status = -EINPROGRESS; urb->actual_length = 0; - urb->transfer_flags = URB_DIR_IN; + urb->transfer_flags = URB_DIR_IN | URB_NO_TRANSFER_DMA_MAP; usb_get_urb(urb); atomic_inc(&urb->use_count); atomic_inc(&urb->dev->urbnum); @@ -2230,9 +2230,15 @@ int ehset_single_step_set_feature(struct usb_hcd *hcd, int port) /* Complete remaining DATA and STATUS stages using the same URB */ urb->status = -EINPROGRESS; + urb->transfer_flags &= ~URB_NO_TRANSFER_DMA_MAP; usb_get_urb(urb); atomic_inc(&urb->use_count); atomic_inc(&urb->dev->urbnum); + if (map_urb_for_dma(hcd, urb, GFP_KERNEL)) { + usb_put_urb(urb); + goto out1; + } + retval = hcd->driver->submit_single_step_set_feature(hcd, urb, 0); if (!retval && !wait_for_completion_timeout(&done, msecs_to_jiffies(2000))) { From 6ca8af3c8fb584f3424a827f554ff74f898c27cd Mon Sep 17 00:00:00 2001 From: Mael GUERIN Date: Wed, 6 Aug 2025 18:44:03 +0200 Subject: [PATCH 0303/1307] USB: storage: Add unusual-devs entry for Novatek NTK96550-based camera Add the US_FL_BULK_IGNORE_TAG quirk for Novatek NTK96550-based camera to fix USB resets after sending SCSI vendor commands due to CBW and CSW tags difference, leading to undesired slowness while communicating with the device. Please find below the copy of /sys/kernel/debug/usb/devices with my device plugged in (listed as TechSys USB mass storage here, the underlying chipset being the Novatek NTK96550-based camera): T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0603 ProdID=8611 Rev= 0.01 S: Manufacturer=TechSys S: Product=USB Mass Storage S: SerialNumber=966110000000100 C:* #Ifs= 1 Cfg#= 1 Atr=c0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Mael GUERIN Cc: stable Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250806164406.43450-1-mael.guerin@murena.io Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 54f0b1c83317..bee9f1e8003d 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -934,6 +934,13 @@ UNUSUAL_DEV( 0x05e3, 0x0723, 0x9451, 0x9451, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_SANE_SENSE ), +/* Added by Maël GUERIN */ +UNUSUAL_DEV( 0x0603, 0x8611, 0x0000, 0xffff, + "Novatek", + "NTK96550-based camera", + USB_SC_SCSI, USB_PR_BULK, NULL, + US_FL_BULK_IGNORE_TAG ), + /* * Reported by Hanno Boeck * Taken from the Lycoris Kernel From 58577118cc7cec9eb7c1836bf88f865ff2c5e3a3 Mon Sep 17 00:00:00 2001 From: Kuen-Han Tsai Date: Thu, 7 Aug 2025 17:06:55 +0800 Subject: [PATCH 0304/1307] usb: dwc3: Ignore late xferNotReady event to prevent halt timeout During a device-initiated disconnect, the End Transfer command resets the event filter, allowing a new xferNotReady event to be generated before the controller is fully halted. Processing this late event incorrectly triggers a Start Transfer, which prevents the controller from halting and results in a DSTS.DEVCTLHLT bit polling timeout. Ignore the late xferNotReady event if the controller is already in a disconnected state. Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Cc: stable Signed-off-by: Kuen-Han Tsai Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250807090700.2397190-1-khtsai@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 25db36c63951..68fa2813e5f4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3777,6 +3777,15 @@ static void dwc3_gadget_endpoint_transfer_complete(struct dwc3_ep *dep, static void dwc3_gadget_endpoint_transfer_not_ready(struct dwc3_ep *dep, const struct dwc3_event_depevt *event) { + /* + * During a device-initiated disconnect, a late xferNotReady event can + * be generated after the End Transfer command resets the event filter, + * but before the controller is halted. Ignore it to prevent a new + * transfer from starting. + */ + if (!dep->dwc->connected) + return; + dwc3_gadget_endpoint_frame_from_event(dep, event); /* From 86f390ba59cd8d5755bafe2b163c3e6b89d6bbd9 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 12 Aug 2025 16:11:00 +0300 Subject: [PATCH 0305/1307] usb: dwc3: pci: add support for the Intel Wildcat Lake This patch adds the necessary PCI ID for Intel Wildcat Lake devices. Signed-off-by: Heikki Krogerus Cc: stable Link: https://lore.kernel.org/r/20250812131101.2930199-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 54a4ee2b90b7..39c72cb52ce7 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -41,6 +41,7 @@ #define PCI_DEVICE_ID_INTEL_TGPLP 0xa0ee #define PCI_DEVICE_ID_INTEL_TGPH 0x43ee #define PCI_DEVICE_ID_INTEL_JSP 0x4dee +#define PCI_DEVICE_ID_INTEL_WCL 0x4d7e #define PCI_DEVICE_ID_INTEL_ADL 0x460e #define PCI_DEVICE_ID_INTEL_ADL_PCH 0x51ee #define PCI_DEVICE_ID_INTEL_ADLN 0x465e @@ -431,6 +432,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, TGPLP, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, TGPH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, JSP, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, WCL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ADL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ADL_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ADLN, &dwc3_pci_intel_swnode) }, From 9528d32873b38281ae105f2f5799e79ae9d086c2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 11 Aug 2025 10:27:45 +0200 Subject: [PATCH 0306/1307] kcov, usb: Don't disable interrupts in kcov_remote_start_usb_softirq() kcov_remote_start_usb_softirq() the begin of urb's completion callback. HCDs marked HCD_BH will invoke this function from the softirq and in_serving_softirq() will detect this properly. Root-HUB (RH) requests will not be delayed to softirq but complete immediately in IRQ context. This will confuse kcov because in_serving_softirq() will report true if the softirq is served after the hardirq and if the softirq got interrupted by the hardirq in which currently runs. This was addressed by simply disabling interrupts in kcov_remote_start_usb_softirq() which avoided the interruption by the RH while a regular completion callback was invoked. This not only changes the behaviour while kconv is enabled but also breaks PREEMPT_RT because now sleeping locks can no longer be acquired. Revert the previous fix. Address the issue by invoking kcov_remote_start_usb() only if the context is just "serving softirqs" which is identified by checking in_serving_softirq() and in_hardirq() must be false. Fixes: f85d39dd7ed89 ("kcov, usb: disable interrupts in kcov_remote_start_usb_softirq") Cc: stable Reported-by: Yunseong Kim Closes: https://lore.kernel.org/all/20250725201400.1078395-2-ysk@kzalloc.com/ Tested-by: Yunseong Kim Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250811082745.ycJqBXMs@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 12 +++++------ include/linux/kcov.h | 47 ++++++++---------------------------------- 2 files changed, 14 insertions(+), 45 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index c4a1875b5d3d..6270fbb5c699 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1636,7 +1636,6 @@ static void __usb_hcd_giveback_urb(struct urb *urb) struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus); struct usb_anchor *anchor = urb->anchor; int status = urb->unlinked; - unsigned long flags; urb->hcpriv = NULL; if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) && @@ -1654,14 +1653,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb) /* pass ownership to the completion handler */ urb->status = status; /* - * Only collect coverage in the softirq context and disable interrupts - * to avoid scenarios with nested remote coverage collection sections - * that KCOV does not support. - * See the comment next to kcov_remote_start_usb_softirq() for details. + * This function can be called in task context inside another remote + * coverage collection section, but kcov doesn't support that kind of + * recursion yet. Only collect coverage in softirq context for now. */ - flags = kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); + kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); urb->complete(urb); - kcov_remote_stop_softirq(flags); + kcov_remote_stop_softirq(); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 75a2fb8b16c3..0143358874b0 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -57,47 +57,21 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary - * workaround for KCOV's lack of nested remote coverage sections support. - * - * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337. - * - * kcov_remote_start_usb_softirq(): - * - * 1. Only collects coverage when called in the softirq context. This allows - * avoiding nested remote coverage collection sections in the task context. - * For example, USB/IP calls usb_hcd_giveback_urb() in the task context - * within an existing remote coverage collection section. Thus, KCOV should - * not attempt to start collecting coverage within the coverage collection - * section in __usb_hcd_giveback_urb() in this case. - * - * 2. Disables interrupts for the duration of the coverage collection section. - * This allows avoiding nested remote coverage collection sections in the - * softirq context (a softirq might occur during the execution of a work in - * the BH workqueue, which runs with in_serving_softirq() > 0). - * For example, usb_giveback_urb_bh() runs in the BH workqueue with - * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in - * the middle of its remote coverage collection section, and the interrupt - * handler might invoke __usb_hcd_giveback_urb() again. + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding support for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 */ -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) +static inline void kcov_remote_start_usb_softirq(u64 id) { - unsigned long flags = 0; - - if (in_serving_softirq()) { - local_irq_save(flags); + if (in_serving_softirq() && !in_hardirq()) kcov_remote_start_usb(id); - } - - return flags; } -static inline void kcov_remote_stop_softirq(unsigned long flags) +static inline void kcov_remote_stop_softirq(void) { - if (in_serving_softirq()) { + if (in_serving_softirq() && !in_hardirq()) kcov_remote_stop(); - local_irq_restore(flags); - } } #ifdef CONFIG_64BIT @@ -131,11 +105,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) -{ - return 0; -} -static inline void kcov_remote_stop_softirq(unsigned long flags) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ From 421255afa2a58eee2109dda56c137a7b61c4b05f Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 11 Aug 2025 18:08:33 +0800 Subject: [PATCH 0307/1307] usb: chipidea: imx: improve usbmisc_imx7d_pullup() When add workaround for ERR051725, the usbmisc will put PHY to Non-driving mode (OPMODE = 01) after stopping the device controller and put PHY back to Normal mode (OPMODE = 00) after starting the device controller. However, this will bring issue for host controller. Because the PHY may stay in Non-driving mode after switching the role from device to host. Then the port will not work if USB device is attached. To fix this issue, improving the workaround by putting PHY to Non-driving mode for a certain period and back to Normal mode finally. To make host detect a disconnect signal, the period should be at least 125us (a micro-frame time) for high-speed link. And only working as high-speed mode will need workaround for ERR051725. So this will also filter the pullup event for high-speed. Fixes: 11992b410083 ("usb: chipidea: imx: implement workaround for ERR051725") Reviewed-by: Jun Li Signed-off-by: Xu Yang Acked-by: Peter Chen Link: https://lore.kernel.org/r/20250811100833.862876-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 3 ++- drivers/usb/chipidea/usbmisc_imx.c | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index e1ec9b38f5b9..d7c2a1a3c271 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -338,7 +338,8 @@ static int ci_hdrc_imx_notify_event(struct ci_hdrc *ci, unsigned int event) schedule_work(&ci->usb_phy->chg_work); break; case CI_HDRC_CONTROLLER_PULLUP_EVENT: - if (ci->role == CI_ROLE_GADGET) + if (ci->role == CI_ROLE_GADGET && + ci->gadget.speed == USB_SPEED_HIGH) imx_usbmisc_pullup(data->usbmisc_data, ci->gadget.connected); break; diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 3d20c5e76c6a..b1418885707c 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -1068,15 +1068,24 @@ static void usbmisc_imx7d_pullup(struct imx_usbmisc_data *data, bool on) unsigned long flags; u32 val; + if (on) + return; + spin_lock_irqsave(&usbmisc->lock, flags); val = readl(usbmisc->base + MX7D_USBNC_USB_CTRL2); - if (!on) { - val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_MASK; - val |= MX7D_USBNC_USB_CTRL2_OPMODE(1); - val |= MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_EN; - } else { - val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_EN; - } + val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_MASK; + val |= MX7D_USBNC_USB_CTRL2_OPMODE(1); + val |= MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_EN; + writel(val, usbmisc->base + MX7D_USBNC_USB_CTRL2); + spin_unlock_irqrestore(&usbmisc->lock, flags); + + /* Last for at least 1 micro-frame to let host see disconnect signal */ + usleep_range(125, 150); + + spin_lock_irqsave(&usbmisc->lock, flags); + val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_MASK; + val |= MX7D_USBNC_USB_CTRL2_OPMODE(0); + val &= ~MX7D_USBNC_USB_CTRL2_OPMODE_OVERRIDE_EN; writel(val, usbmisc->base + MX7D_USBNC_USB_CTRL2); spin_unlock_irqrestore(&usbmisc->lock, flags); } From 98da66a70ad2396e5a508c4245367797ebc052ce Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 13 Aug 2025 16:52:49 +0200 Subject: [PATCH 0308/1307] usb: storage: realtek_cr: Use correct byte order for bcs->Residue Since 'bcs->Residue' has the data type '__le32', convert it to the correct byte order of the CPU using this driver when assigning it to the local variable 'residue'. Cc: stable Fixes: 50a6cb932d5c ("USB: usb_storage: add ums-realtek driver") Suggested-by: Alan Stern Acked-by: Alan Stern Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250813145247.184717-3-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/realtek_cr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 7dea28c2b8ee..cb5bbb19060e 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -252,7 +252,7 @@ static int rts51x_bulk_transport(struct us_data *us, u8 lun, return USB_STOR_TRANSPORT_ERROR; } - residue = bcs->Residue; + residue = le32_to_cpu(bcs->Residue); if (bcs->Tag != us->tag) return USB_STOR_TRANSPORT_ERROR; From a5ba9ad417254c49ecf06ac5ab36ec4b12ee133f Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 4 Aug 2025 19:13:11 +0200 Subject: [PATCH 0309/1307] rust: faux: fix C header link Starting with Rust 1.91.0 (expected 2025-10-30), `rustdoc` has improved some false negatives around intra-doc links [1], and it found a broken intra-doc link we currently have: error: unresolved link to `include/linux/device/faux.h` --> rust/kernel/faux.rs:7:17 | 7 | //! C header: [`include/linux/device/faux.h`] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ no item named `include/linux/device/faux.h` in scope | = help: to escape `[` and `]` characters, add '\' before them like `\[` or `\]` = note: `-D rustdoc::broken-intra-doc-links` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(rustdoc::broken_intra_doc_links)]` Our `srctree/` C header links are not intra-doc links, thus they need the link destination. Thus fix it. Cc: stable Link: https://github.com/rust-lang/rust/pull/132748 [1] Fixes: 78418f300d39 ("rust/kernel: Add faux device bindings") Signed-off-by: Miguel Ojeda Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250804171311.1186538-1-ojeda@kernel.org Signed-off-by: Greg Kroah-Hartman --- rust/kernel/faux.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 7a906099993f..7fe2dd197e37 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -4,7 +4,7 @@ //! //! This module provides bindings for working with faux devices in kernel modules. //! -//! C header: [`include/linux/device/faux.h`] +//! C header: [`include/linux/device/faux.h`](srctree/include/linux/device/faux.h) use crate::{bindings, device, error::code::*, prelude::*}; use core::ptr::{addr_of_mut, null, null_mut, NonNull}; From 23cbfd6fed78715459a4395c034c4e76b8c85320 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Aug 2025 17:36:27 +0200 Subject: [PATCH 0310/1307] ALSA: azt3328: Put __maybe_unused for inline functions for gameport Some inline functions are unused depending on kconfig, and the recent change for clang builds made those handled as errors with W=1. For avoiding pitfalls, mark those with __maybe_unused attributes. Link: https://patch.msgid.link/20250813153628.12303-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/azt3328.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c index 4418b9ae33e6..b33344f65b8c 100644 --- a/sound/pci/azt3328.c +++ b/sound/pci/azt3328.c @@ -412,25 +412,25 @@ snd_azf3328_ctrl_outl(const struct snd_azf3328 *chip, unsigned reg, u32 value) outl(value, chip->ctrl_io + reg); } -static inline void +static inline void __maybe_unused snd_azf3328_game_outb(const struct snd_azf3328 *chip, unsigned reg, u8 value) { outb(value, chip->game_io + reg); } -static inline void +static inline void __maybe_unused snd_azf3328_game_outw(const struct snd_azf3328 *chip, unsigned reg, u16 value) { outw(value, chip->game_io + reg); } -static inline u8 +static inline u8 __maybe_unused snd_azf3328_game_inb(const struct snd_azf3328 *chip, unsigned reg) { return inb(chip->game_io + reg); } -static inline u16 +static inline u16 __maybe_unused snd_azf3328_game_inw(const struct snd_azf3328 *chip, unsigned reg) { return inw(chip->game_io + reg); From ee8f1613596ad44c7cff4805d65a8a705998db11 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Wed, 13 Aug 2025 09:03:08 -0500 Subject: [PATCH 0311/1307] Revert "ALSA: hda: Add ASRock X670E Taichi to denylist" On a motherboard with an AMD Granite Ridge CPU there is a report that 3.5mm microphone and headphones aren't working. In the log it's observed: snd_hda_intel 0000:02:00.6: Skipping the device on the denylist This was because of commit df42ee7e22f03 ("ALSA: hda: Add ASRock X670E Taichi to denylist"). Reverting this commit allows the microphone and headphones to work again. As at least some combinations of this motherboard do have applicable devices, revert so that they can be probed. Cc: Richard Gong Cc: Juan Martinez Signed-off-by: Mario Limonciello (AMD) Link: https://patch.msgid.link/20250813140427.1577172-1-superm1@kernel.org Signed-off-by: Takashi Iwai --- sound/hda/controllers/intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index fcf67e97a546..1bb3ff55b115 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -2077,7 +2077,6 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ - { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1022, 0xd601) }, /* ASRock X670E Taichi */ {} }; From 7d34ec36abb84fdfb6632a0f2cbda90379ae21fc Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 11 Aug 2025 23:14:55 -0500 Subject: [PATCH 0312/1307] smb3: fix for slab out of bounds on mount to ksmbd With KASAN enabled, it is possible to get a slab out of bounds during mount to ksmbd due to missing check in parse_server_interfaces() (see below): BUG: KASAN: slab-out-of-bounds in parse_server_interfaces+0x14ee/0x1880 [cifs] Read of size 4 at addr ffff8881433dba98 by task mount/9827 CPU: 5 UID: 0 PID: 9827 Comm: mount Tainted: G OE 6.16.0-rc2-kasan #2 PREEMPT(voluntary) Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Dell Inc. Precision Tower 3620/0MWYPT, BIOS 2.13.1 06/14/2019 Call Trace: dump_stack_lvl+0x9f/0xf0 print_report+0xd1/0x670 __virt_addr_valid+0x22c/0x430 ? parse_server_interfaces+0x14ee/0x1880 [cifs] ? kasan_complete_mode_report_info+0x2a/0x1f0 ? parse_server_interfaces+0x14ee/0x1880 [cifs] kasan_report+0xd6/0x110 parse_server_interfaces+0x14ee/0x1880 [cifs] __asan_report_load_n_noabort+0x13/0x20 parse_server_interfaces+0x14ee/0x1880 [cifs] ? __pfx_parse_server_interfaces+0x10/0x10 [cifs] ? trace_hardirqs_on+0x51/0x60 SMB3_request_interfaces+0x1ad/0x3f0 [cifs] ? __pfx_SMB3_request_interfaces+0x10/0x10 [cifs] ? SMB2_tcon+0x23c/0x15d0 [cifs] smb3_qfs_tcon+0x173/0x2b0 [cifs] ? __pfx_smb3_qfs_tcon+0x10/0x10 [cifs] ? cifs_get_tcon+0x105d/0x2120 [cifs] ? do_raw_spin_unlock+0x5d/0x200 ? cifs_get_tcon+0x105d/0x2120 [cifs] ? __pfx_smb3_qfs_tcon+0x10/0x10 [cifs] cifs_mount_get_tcon+0x369/0xb90 [cifs] ? dfs_cache_find+0xe7/0x150 [cifs] dfs_mount_share+0x985/0x2970 [cifs] ? check_path.constprop.0+0x28/0x50 ? save_trace+0x54/0x370 ? __pfx_dfs_mount_share+0x10/0x10 [cifs] ? __lock_acquire+0xb82/0x2ba0 ? __kasan_check_write+0x18/0x20 cifs_mount+0xbc/0x9e0 [cifs] ? __pfx_cifs_mount+0x10/0x10 [cifs] ? do_raw_spin_unlock+0x5d/0x200 ? cifs_setup_cifs_sb+0x29d/0x810 [cifs] cifs_smb3_do_mount+0x263/0x1990 [cifs] Reported-by: Namjae Jeon Tested-by: Namjae Jeon Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index ad8947434b71..218b6ce7ff3a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -772,6 +772,13 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, bytes_left -= sizeof(*p); break; } + /* Validate that Next doesn't point beyond the buffer */ + if (next > bytes_left) { + cifs_dbg(VFS, "%s: invalid Next pointer %zu > %zd\n", + __func__, next, bytes_left); + rc = -EINVAL; + goto out; + } p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next); bytes_left -= next; } @@ -783,7 +790,9 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, } /* Azure rounds the buffer size up 8, to a 16 byte boundary */ - if ((bytes_left > 8) || p->Next) + if ((bytes_left > 8) || + (bytes_left >= offsetof(struct network_interface_info_ioctl_rsp, Next) + + sizeof(p->Next) && p->Next)) cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); ses->iface_last_update = jiffies; From e3835731e169a48a2c73018d135b5c08c39ea61d Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 11 Aug 2025 22:07:37 +0800 Subject: [PATCH 0313/1307] smb: client: fix mid_q_entry memleak leak with per-mid locking This is step 4/4 of a patch series to fix mid_q_entry memory leaks caused by race conditions in callback execution. In compound_send_recv(), when wait_for_response() is interrupted by signals, the code attempts to cancel pending requests by changing their callbacks to cifs_cancelled_callback. However, there's a race condition between signal interruption and network response processing that causes both mid_q_entry and server buffer leaks: ``` User foreground process cifsd cifs_readdir open_cached_dir cifs_send_recv compound_send_recv smb2_setup_request smb2_mid_entry_alloc smb2_get_mid_entry smb2_mid_entry_alloc mempool_alloc // alloc mid kref_init(&temp->refcount); // refcount = 1 mid[0]->callback = cifs_compound_callback; mid[1]->callback = cifs_compound_last_callback; smb_send_rqst rc = wait_for_response wait_event_state TASK_KILLABLE cifs_demultiplex_thread allocate_buffers server->bigbuf = cifs_buf_get() standard_receive3 ->find_mid() smb2_find_mid __smb2_find_mid kref_get(&mid->refcount) // +1 cifs_handle_standard handle_mid /* bigbuf will also leak */ mid->resp_buf = server->bigbuf server->bigbuf = NULL; dequeue_mid /* in for loop */ mids[0]->callback cifs_compound_callback /* Signal interrupts wait: rc = -ERESTARTSYS */ /* if (... || midQ[i]->mid_state == MID_RESPONSE_RECEIVED) *? midQ[0]->callback = cifs_cancelled_callback; cancelled_mid[i] = true; /* The change comes too late */ mid->mid_state = MID_RESPONSE_READY release_mid // -1 /* cancelled_mid[i] == true causes mid won't be released in compound_send_recv cleanup */ /* cifs_cancelled_callback won't executed to release mid */ ``` The root cause is that there's a race between callback assignment and execution. Fix this by introducing per-mid locking: - Add spinlock_t mid_lock to struct mid_q_entry - Add mid_execute_callback() for atomic callback execution - Use mid_lock in cancellation paths to ensure atomicity This ensures that either the original callback or the cancellation callback executes atomically, preventing reference count leaks when requests are interrupted by signals. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220404 Fixes: ee258d79159a ("CIFS: Move credit processing to mid callbacks for SMB3") Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 21 +++++++++++++++++++++ fs/smb/client/cifstransport.c | 19 +++++++++---------- fs/smb/client/connect.c | 8 ++++---- fs/smb/client/smb2ops.c | 4 ++-- fs/smb/client/smb2transport.c | 1 + fs/smb/client/transport.c | 7 +++---- 6 files changed, 40 insertions(+), 20 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index e6830ab3a546..1e64a4fb6af0 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1732,6 +1732,7 @@ struct mid_q_entry { int mid_rc; /* rc for MID_RC */ __le16 command; /* smb command code */ unsigned int optype; /* operation type */ + spinlock_t mid_lock; bool wait_cancelled:1; /* Cancelled while waiting for response */ bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */ bool large_buf:1; /* if valid response, is pointer to large buf */ @@ -2036,6 +2037,9 @@ require use of the stronger protocol */ * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled + * mid_q_entry->mid_lock mid_q_entry->callback alloc_mid + * smb2_mid_entry_alloc + * (Any fields of mid_q_entry that will need protection) ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE @@ -2375,6 +2379,23 @@ static inline bool cifs_netbios_name(const char *name, size_t namelen) return ret; } +/* + * Execute mid callback atomically - ensures callback runs exactly once + * and prevents sleeping in atomic context. + */ +static inline void mid_execute_callback(struct mid_q_entry *mid) +{ + void (*callback)(struct mid_q_entry *mid); + + spin_lock(&mid->mid_lock); + callback = mid->callback; + mid->callback = NULL; /* Mark as executed, */ + spin_unlock(&mid->mid_lock); + + if (callback) + callback(mid); +} + #define CIFS_REPARSE_SUPPORT(tcon) \ ((tcon)->posix_extensions || \ (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \ diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c index 352dafb888dd..e98b95eff8c9 100644 --- a/fs/smb/client/cifstransport.c +++ b/fs/smb/client/cifstransport.c @@ -46,6 +46,7 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); kref_init(&temp->refcount); + spin_lock_init(&temp->mid_lock); temp->mid = get_mid(smb_buffer); temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); @@ -345,16 +346,15 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = wait_for_response(server, midQ); if (rc != 0) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { + spin_lock(&midQ->mid_lock); + if (midQ->callback) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); add_credits(server, &credits, 0); return rc; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); } rc = cifs_sync_mid_result(midQ, server); @@ -527,15 +527,14 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, rc = wait_for_response(server, midQ); if (rc) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { + spin_lock(&midQ->mid_lock); + if (midQ->callback) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); return rc; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ->mid_lock); } /* We got the response - restart system call. */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 587845a2452d..281ccbeea719 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -335,7 +335,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_entry_safe(mid, nmid, &retry_list, qhead) { list_del_init(&mid->qhead); - mid->callback(mid); + mid_execute_callback(mid); release_mid(mid); } @@ -919,7 +919,7 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) list_del_init(&mid->qhead); mid->mid_rc = mid_rc; mid->mid_state = MID_RC; - mid->callback(mid); + mid_execute_callback(mid); release_mid(mid); } @@ -1117,7 +1117,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Callback mid %llu\n", mid_entry->mid); list_del_init(&mid_entry->qhead); - mid_entry->callback(mid_entry); + mid_execute_callback(mid_entry); release_mid(mid_entry); } /* 1/8th of sec is more than enough time for them to exit */ @@ -1394,7 +1394,7 @@ cifs_demultiplex_thread(void *p) } if (!mids[i]->multiRsp || mids[i]->multiEnd) - mids[i]->callback(mids[i]); + mid_execute_callback(mids[i]); release_mid(mids[i]); } else if (server->ops->is_oplock_break && diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 218b6ce7ff3a..3b251de874ec 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4814,7 +4814,7 @@ static void smb2_decrypt_offload(struct work_struct *work) dw->server->ops->is_network_name_deleted(dw->buf, dw->server); - mid->callback(mid); + mid_execute_callback(mid); } else { spin_lock(&dw->server->srv_lock); if (dw->server->tcpStatus == CifsNeedReconnect) { @@ -4822,7 +4822,7 @@ static void smb2_decrypt_offload(struct work_struct *work) mid->mid_state = MID_RETRY_NEEDED; spin_unlock(&dw->server->mid_queue_lock); spin_unlock(&dw->server->srv_lock); - mid->callback(mid); + mid_execute_callback(mid); } else { spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_REQUEST_SUBMITTED; diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index ff9ef7fcd010..bc0e92eb2b64 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -771,6 +771,7 @@ smb2_mid_entry_alloc(const struct smb2_hdr *shdr, temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); kref_init(&temp->refcount); + spin_lock_init(&temp->mid_lock); temp->mid = le64_to_cpu(shdr->MessageId); temp->credits = credits > 0 ? credits : 1; temp->pid = current->pid; diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 32d528b4dd83..a61ba7f3fb86 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1005,15 +1005,14 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); - spin_lock(&server->mid_queue_lock); + spin_lock(&midQ[i]->mid_lock); midQ[i]->wait_cancelled = true; - if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED || - midQ[i]->mid_state == MID_RESPONSE_RECEIVED) { + if (midQ[i]->callback) { midQ[i]->callback = cifs_cancelled_callback; cancelled_mid[i] = true; credits[i].value = 0; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&midQ[i]->mid_lock); } } From 8c48e1c7520321cc87ff651e96093e2f412785fb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 12 Aug 2025 18:45:06 +0200 Subject: [PATCH 0314/1307] smb: client: don't wait for info->send_pending == 0 on error We already called ib_drain_qp() before and that makes sure send_done() was called with IB_WC_WR_FLUSH_ERR, but didn't called atomic_dec_and_test(&sc->send_io.pending.count) So we may never reach the info->send_pending == 0 condition. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 5349ae5e05fa ("smb: client: let send_done() cleanup before calling smbd_disconnect_rdma_connection()") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c628e91c328b..02d6db431fd4 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1337,10 +1337,6 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); - log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); - wait_event(info->wait_send_pending, - atomic_read(&info->send_pending) == 0); - /* It's not possible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { @@ -1986,7 +1982,11 @@ int smbd_send(struct TCP_Server_Info *server, */ wait_event(info->wait_send_pending, - atomic_read(&info->send_pending) == 0); + atomic_read(&info->send_pending) == 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) + rc = -EAGAIN; return rc; } From e3f776d30a56286aaf882b96c92e797b7587a6ab Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 9 Aug 2025 09:17:46 -0500 Subject: [PATCH 0315/1307] cifs: update internal version number to 2.56 Signed-off-by: Steve French --- fs/smb/client/cifsfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 487f39cff77e..3ce7c614ccc0 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 55 -#define CIFS_VERSION "2.55" +#define SMB3_PRODUCT_BUILD 56 +#define CIFS_VERSION "2.56" #endif /* _CIFSFS_H */ From e19d8dd694d261ac26adb2a26121a37c107c81ad Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Fri, 1 Aug 2025 17:07:24 +0800 Subject: [PATCH 0316/1307] smb: client: remove redundant lstrp update in negotiate protocol Commit 34331d7beed7 ("smb: client: fix first command failure during re-negotiation") addressed a race condition by updating lstrp before entering negotiate state. However, this approach may have some unintended side effects. The lstrp field is documented as "when we got last response from this server", and updating it before actually receiving a server response could potentially affect other mechanisms that rely on this timestamp. For example, the SMB echo detection logic also uses lstrp as a reference point. In scenarios with frequent user operations during reconnect states, the repeated calls to cifs_negotiate_protocol() might continuously update lstrp, which could interfere with the echo detection timing. Additionally, commit 266b5d02e14f ("smb: client: fix race condition in negotiate timeout by using more precise timing") introduced a dedicated neg_start field specifically for tracking negotiate start time. This provides a more precise solution for the original race condition while preserving the intended semantics of lstrp. Since the race condition is now properly handled by the neg_start mechanism, the lstrp update in cifs_negotiate_protocol() is no longer necessary and can be safely removed. Fixes: 266b5d02e14f ("smb: client: fix race condition in negotiate timeout by using more precise timing") Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/connect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 281ccbeea719..dd12f3eb61dc 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4205,7 +4205,6 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, return 0; } - server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; server->neg_start = jiffies; spin_unlock(&server->srv_lock); From 0b3ccb76b95bd06cf80124d8adda647c82a6cc0f Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Wed, 9 Jul 2025 16:08:36 +0200 Subject: [PATCH 0317/1307] drm/msm/dsi: Fix 14nm DSI PHY PLL Lock issue To configure and enable the DSI PHY PLL clocks, the MDSS AHB clock must be active for MMIO operations. Typically, this AHB clock is enabled as part of the DSI PHY interface enabling (dsi_phy_enable_resource). However, since these PLL clocks are registered as clock entities, they can be enabled independently of the DSI PHY interface, leading to enabling failures and subsequent warnings: ``` msm_dsi_phy 5e94400.phy: [drm:dsi_pll_14nm_vco_prepare] *ERROR* DSI PLL lock failed ------------[ cut here ]------------ dsi0pllbyte already disabled WARNING: CPU: 3 PID: 1 at drivers/clk/clk.c:1194 clk_core_disable+0xa4/0xac CPU: 3 UID: 0 PID: 1 Comm: swapper/0 Tainted: Tainted: [W]=WARN Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [...] ``` This issue is particularly prevalent at boot time during the disabling of unused clocks (clk_disable_unused()) which includes enabling the parent clock(s) when CLK_OPS_PARENT_ENABLE flag is set (this is the case for the 14nm DSI PHY PLL consumers). To resolve this issue, we move the AHB clock as a PM dependency of the DSI PHY device (via pm_clk). Since the DSI PHY device is the parent of the PLL clocks, this resolves the PLL/AHB dependency. Now the AHB clock is enabled prior the PLL clk_prepare callback, as part of the runtime-resume chain. We also eliminate dsi_phy_[enable|disable]_resource functions, which are superseded by runtime PM. Note that it breaks compatibility with kernels before 6.0, as we do not support anymore the legacy `iface_clk` name. Signed-off-by: Loic Poulain Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/663239/ Link: https://lore.kernel.org/r/20250709140836.124143-1-loic.poulain@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 59 ++++++++------------------- drivers/gpu/drm/msm/dsi/phy/dsi_phy.h | 1 - 2 files changed, 18 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 221f12db5f8b..4ea681130dba 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include "dsi_phy.h" @@ -511,30 +513,6 @@ int msm_dsi_cphy_timing_calc_v4(struct msm_dsi_dphy_timing *timing, return 0; } -static int dsi_phy_enable_resource(struct msm_dsi_phy *phy) -{ - struct device *dev = &phy->pdev->dev; - int ret; - - ret = pm_runtime_resume_and_get(dev); - if (ret) - return ret; - - ret = clk_prepare_enable(phy->ahb_clk); - if (ret) { - DRM_DEV_ERROR(dev, "%s: can't enable ahb clk, %d\n", __func__, ret); - pm_runtime_put_sync(dev); - } - - return ret; -} - -static void dsi_phy_disable_resource(struct msm_dsi_phy *phy) -{ - clk_disable_unprepare(phy->ahb_clk); - pm_runtime_put(&phy->pdev->dev); -} - static const struct of_device_id dsi_phy_dt_match[] = { #ifdef CONFIG_DRM_MSM_DSI_28NM_PHY { .compatible = "qcom,dsi-phy-28nm-hpm", @@ -698,22 +676,20 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) if (ret) return ret; - phy->ahb_clk = msm_clk_get(pdev, "iface"); - if (IS_ERR(phy->ahb_clk)) - return dev_err_probe(dev, PTR_ERR(phy->ahb_clk), - "Unable to get ahb clk\n"); + platform_set_drvdata(pdev, phy); - ret = devm_pm_runtime_enable(&pdev->dev); + ret = devm_pm_runtime_enable(dev); if (ret) return ret; - /* PLL init will call into clk_register which requires - * register access, so we need to enable power and ahb clock. - */ - ret = dsi_phy_enable_resource(phy); + ret = devm_pm_clk_create(dev); if (ret) return ret; + ret = pm_clk_add(dev, "iface"); + if (ret < 0) + return dev_err_probe(dev, ret, "Unable to get iface clk\n"); + if (phy->cfg->ops.pll_init) { ret = phy->cfg->ops.pll_init(phy); if (ret) @@ -727,18 +703,19 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "Failed to register clk provider\n"); - dsi_phy_disable_resource(phy); - - platform_set_drvdata(pdev, phy); - return 0; } +static const struct dev_pm_ops dsi_phy_pm_ops = { + SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; + static struct platform_driver dsi_phy_platform_driver = { .probe = dsi_phy_driver_probe, .driver = { .name = "msm_dsi_phy", .of_match_table = dsi_phy_dt_match, + .pm = &dsi_phy_pm_ops, }, }; @@ -764,9 +741,9 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, dev = &phy->pdev->dev; - ret = dsi_phy_enable_resource(phy); + ret = pm_runtime_resume_and_get(dev); if (ret) { - DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n", + DRM_DEV_ERROR(dev, "%s: resume failed, %d\n", __func__, ret); goto res_en_fail; } @@ -810,7 +787,7 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, phy_en_fail: regulator_bulk_disable(phy->cfg->num_regulators, phy->supplies); reg_en_fail: - dsi_phy_disable_resource(phy); + pm_runtime_put(dev); res_en_fail: return ret; } @@ -823,7 +800,7 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) phy->cfg->ops.disable(phy); regulator_bulk_disable(phy->cfg->num_regulators, phy->supplies); - dsi_phy_disable_resource(phy); + pm_runtime_put(&phy->pdev->dev); } void msm_dsi_phy_set_usecase(struct msm_dsi_phy *phy, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c558f8df1684..3cbf08231492 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -104,7 +104,6 @@ struct msm_dsi_phy { phys_addr_t lane_size; int id; - struct clk *ahb_clk; struct regulator_bulk_data *supplies; struct msm_dsi_dphy_timing timing; From 553666f839b86545300773954df7426a45c169c4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 15 Jul 2025 18:50:37 +0300 Subject: [PATCH 0318/1307] drm/msm/kms: move snapshot init earlier in KMS init Various parts of the display driver can be triggering the display snapshot (including the IOMMU fault handlers). Move the call to msm_disp_snapshot_init() before KMS initialization, otherwise it is possible to ocassionally trigger the kernel fault during init: __lock_acquire+0x44/0x2798 (P) lock_acquire+0x114/0x25c _raw_spin_lock_irqsave+0x6c/0x90 kthread_queue_work+0x2c/0xac msm_disp_snapshot_state+0x2c/0x4c msm_kms_fault_handler+0x2c/0x74 msm_disp_fault_handler+0x30/0x48 report_iommu_fault+0x54/0x128 arm_smmu_context_fault+0x74/0x184 __handle_irq_event_percpu+0xa4/0x24c handle_irq_event_percpu+0x20/0x5c handle_irq_event+0x48/0x84 handle_fasteoi_irq+0xcc/0x170 generic_handle_domain_irq+0x48/0x70 gic_handle_irq+0x54/0x11c call_on_irq_stack+0x3c/0x50 do_interrupt_handler+0x54/0x78 el1_interrupt+0x3c/0x5c el1h_64_irq_handler+0x20/0x30 el1h_64_irq+0x6c/0x70 _raw_spin_unlock_irqrestore+0x44/0x68 (P) klist_next+0xc4/0x124 bus_for_each_drv+0x9c/0xe8 __device_attach+0xfc/0x190 device_initial_probe+0x1c/0x2c bus_probe_device+0x44/0xa0 device_add+0x204/0x3e4 platform_device_add+0x170/0x244 platform_device_register_full+0x130/0x138 drm_connector_hdmi_audio_init+0xc0/0x108 drm_bridge_connector_init+0x318/0x394 msm_dsi_manager_connector_init+0xac/0xdc msm_dsi_modeset_init+0x78/0xc0 _dpu_kms_drm_obj_init+0x198/0x75c dpu_kms_hw_init+0x2f8/0x494 msm_drm_kms_init+0xb0/0x230 msm_drm_init+0x218/0x250 msm_drm_bind+0x3c/0x4c try_to_bring_up_aggregate_device+0x208/0x2a4 __component_add+0xa8/0x188 component_add+0x1c/0x2c dsi_dev_attach+0x24/0x34 dsi_host_attach+0x68/0xa0 devm_mipi_dsi_attach+0x40/0xcc lt9611_attach_dsi+0x94/0x118 lt9611_probe+0x368/0x3c8 i2c_device_probe+0x2d0/0x3d8 really_probe+0x130/0x354 __driver_probe_device+0xac/0x110 driver_probe_device+0x44/0x110 __device_attach_driver+0xb0/0x138 bus_for_each_drv+0x90/0xe8 __device_attach+0xfc/0x190 device_initial_probe+0x1c/0x2c bus_probe_device+0x44/0xa0 deferred_probe_work_func+0xac/0x110 process_one_work+0x20c/0x51c process_scheduled_works+0x58/0x88 worker_thread+0x1ec/0x304 kthread+0x194/0x1d4 ret_from_fork+0x10/0x20 Reported-by: Konrad Dybcio Fixes: 98659487b845 ("drm/msm: add support to take dpu snapshot") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/664149/ Link: https://lore.kernel.org/r/20250715-msm-move-snapshot-init-v1-1-f39c396192ab@oss.qualcomm.com --- drivers/gpu/drm/msm/msm_kms.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index 6889f1c1e721..56828d218e88 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -275,6 +275,12 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) if (ret) return ret; + ret = msm_disp_snapshot_init(ddev); + if (ret) { + DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); + return ret; + } + ret = priv->kms_init(ddev); if (ret) { DRM_DEV_ERROR(dev, "failed to load kms\n"); @@ -327,10 +333,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv) goto err_msm_uninit; } - ret = msm_disp_snapshot_init(ddev); - if (ret) - DRM_DEV_ERROR(dev, "msm_disp_snapshot_init failed ret = %d\n", ret); - drm_mode_config_reset(ddev); return 0; From 1a76b255eceb9c570c6228f6393e1d63d97a22ba Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 15 Jul 2025 20:28:18 +0300 Subject: [PATCH 0319/1307] drm/msm/dpu: correct dpu_plane_virtual_atomic_check() Fix c&p error in dpu_plane_virtual_atomic_check(), compare CRTC width too, in addition to CRTC height. Fixes: 8c62a31607f6 ("drm/msm/dpu: allow using two SSPP blocks for a single plane") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507150432.U0cALR6W-lkp@intel.com/ Signed-off-by: Dmitry Baryshkov Reviewed-by: Jessica Zhang Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/664170/ Link: https://lore.kernel.org/r/20250715-msm-fix-virt-atomic-check-v1-1-9bab02c9f952@oss.qualcomm.com --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 01171c535a27..c722f54e71b0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1162,7 +1162,7 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, if (!old_plane_state || !old_plane_state->fb || old_plane_state->src_w != plane_state->src_w || old_plane_state->src_h != plane_state->src_h || - old_plane_state->src_w != plane_state->src_w || + old_plane_state->crtc_w != plane_state->crtc_w || old_plane_state->crtc_h != plane_state->crtc_h || msm_framebuffer_format(old_plane_state->fb) != msm_framebuffer_format(plane_state->fb)) From 494045c561e68945b1183ff416b8db8e37a122d6 Mon Sep 17 00:00:00 2001 From: Ayushi Makhija Date: Wed, 30 Jul 2025 18:09:38 +0530 Subject: [PATCH 0320/1307] drm/msm: update the high bitfield of certain DSI registers Currently, the high bitfield of certain DSI registers do not align with the configuration of the SWI registers description. This can lead to wrong programming these DSI registers, for example for 4k resloution where H_TOTAL is taking 13 bits but software is programming only 12 bits because of the incorrect bitmask for H_TOTAL bitfeild, this is causing DSI FIFO errors. To resolve this issue, increase the high bitfield of the DSI registers from 12 bits to 16 bits in dsi.xml to match the SWI register configuration. Signed-off-by: Ayushi Makhija Fixes: 4f52f5e63b62 ("drm/msm: import XML display registers database") Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/666229/ Link: https://lore.kernel.org/r/20250730123938.1038640-1-quic_amakhija@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/registers/display/dsi.xml | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/msm/registers/display/dsi.xml b/drivers/gpu/drm/msm/registers/display/dsi.xml index 501ffc585a9f..c7a7b633d747 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi.xml @@ -159,28 +159,28 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - - + + - - + + - - + + - - + + - - + + - - + + @@ -209,8 +209,8 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> - - + + From daab47925c06a04792ca720d8438abd37775e357 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 15 Jul 2025 16:27:35 -0700 Subject: [PATCH 0321/1307] drm/msm/dpu: Initialize crtc_state to NULL in dpu_plane_virtual_atomic_check() After a recent change in clang to expose uninitialized warnings from const variables and pointers [1], there is a warning around crtc_state in dpu_plane_virtual_atomic_check(): drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1145:6: error: variable 'crtc_state' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] 1145 | if (plane_state->crtc) | ^~~~~~~~~~~~~~~~~ drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1149:58: note: uninitialized use occurs here 1149 | ret = dpu_plane_atomic_check_nosspp(plane, plane_state, crtc_state); | ^~~~~~~~~~ drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1145:2: note: remove the 'if' if its condition is always true 1145 | if (plane_state->crtc) | ^~~~~~~~~~~~~~~~~~~~~~ 1146 | crtc_state = drm_atomic_get_new_crtc_state(state, drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c:1139:35: note: initialize the variable 'crtc_state' to silence this warning 1139 | struct drm_crtc_state *crtc_state; | ^ | = NULL Initialize crtc_state to NULL like other places in the driver do, so that it is consistently initialized. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2106 Fixes: 774bcfb73176 ("drm/msm/dpu: add support for virtual planes") Link: https://github.com/llvm/llvm-project/commit/2464313eef01c5b1edf0eccf57a32cdee01472c7 [1] Signed-off-by: Nathan Chancellor Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index c722f54e71b0..6859e8ef6b05 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1129,7 +1129,7 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane); struct dpu_plane_state *pstate = to_dpu_plane_state(plane_state); - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *crtc_state = NULL; int ret; if (IS_ERR(plane_state)) From 197713d0cf018e7d58a63a83cc43035b56678a50 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 23 Jul 2025 16:33:43 +0300 Subject: [PATCH 0322/1307] soc: qcom: ubwc: provide no-UBWC configuration After the commit 45a2974157d2 ("drm/msm: Use the central UBWC config database") the MDSS driver errors out if UBWC database didn't provide it with the UBWC configuration. Make UBWC database return zero data for MSM8916 / APQ8016, MSM8974 / APQ8074, MSM8226 and MSM8939. Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Acked-by: Bjorn Andersson Reviewed-by: Rob Clark Reviewed-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/665313/ Link: https://lore.kernel.org/r/20250723-ubwc-no-ubwc-v3-1-81bdb75685bf@oss.qualcomm.com --- drivers/soc/qcom/ubwc_config.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index bd0a98aad9f3..9002fc9373ce 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -12,6 +12,10 @@ #include +static const struct qcom_ubwc_cfg_data no_ubwc_data = { + /* no UBWC, no HBB */ +}; + static const struct qcom_ubwc_cfg_data msm8937_data = { .ubwc_enc_version = UBWC_1_0, .ubwc_dec_version = UBWC_1_0, @@ -215,11 +219,18 @@ static const struct qcom_ubwc_cfg_data x1e80100_data = { }; static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { + { .compatible = "qcom,apq8016", .data = &no_ubwc_data }, + { .compatible = "qcom,apq8026", .data = &no_ubwc_data }, + { .compatible = "qcom,apq8074", .data = &no_ubwc_data }, { .compatible = "qcom,apq8096", .data = &msm8998_data }, + { .compatible = "qcom,msm8226", .data = &no_ubwc_data }, + { .compatible = "qcom,msm8916", .data = &no_ubwc_data }, { .compatible = "qcom,msm8917", .data = &msm8937_data }, { .compatible = "qcom,msm8937", .data = &msm8937_data }, + { .compatible = "qcom,msm8939", .data = &no_ubwc_data }, { .compatible = "qcom,msm8953", .data = &msm8937_data }, { .compatible = "qcom,msm8956", .data = &msm8937_data }, + { .compatible = "qcom,msm8974", .data = &no_ubwc_data }, { .compatible = "qcom,msm8976", .data = &msm8937_data }, { .compatible = "qcom,msm8996", .data = &msm8998_data }, { .compatible = "qcom,msm8998", .data = &msm8998_data }, From 0b6974bb4134ca6396752a0b122026b41300592f Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 23 Jul 2025 16:19:22 +0200 Subject: [PATCH 0323/1307] soc: qcom: ubwc: Add missing UBWC config for SM7225 SM7225 is a variation of SM6350, and also needs an entry in the table. Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Signed-off-by: Luca Weiss Reviewed-by: Dmitry Baryshkov Acked-by: Bjorn Andersson Signed-off-by: Dmitry Baryshkov --- drivers/soc/qcom/ubwc_config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 9002fc9373ce..1490a7f63767 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -257,6 +257,7 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,sm6375", .data = &sm6350_data, }, { .compatible = "qcom,sm7125", .data = &sc7180_data }, { .compatible = "qcom,sm7150", .data = &sm7150_data, }, + { .compatible = "qcom,sm7225", .data = &sm6350_data, }, { .compatible = "qcom,sm8150", .data = &sm8150_data, }, { .compatible = "qcom,sm8250", .data = &sm8250_data, }, { .compatible = "qcom,sm8350", .data = &sm8350_data, }, From 757fc66da91b54d4fbc414bee5c440b52560d3b7 Mon Sep 17 00:00:00 2001 From: Baolin Liu Date: Tue, 12 Aug 2025 10:17:09 +0800 Subject: [PATCH 0324/1307] ext4: fix incorrect function name in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6b730a405037 “ext4: hoist ext4_block_write_begin and replace the __block_write_begin”, the comment should be updated accordingly from "__block_write_begin" to "ext4_block_write_begin". Fixes: 6b730a405037 (“ext4: hoist ext4_block_write_begin and replace...") Signed-off-by: Baolin Liu Reviewed-by: Darrick J. Wong Link: https://patch.msgid.link/20250812021709.1120716-1-liubaolin12138@163.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 731a800d9c14..238a0f12a5c0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3159,7 +3159,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, folio_unlock(folio); folio_put(folio); /* - * block_write_begin may have instantiated a few blocks + * ext4_block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold inode lock. */ From 9d98cf4632258720f18265a058e62fde120c0151 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Tue, 12 Aug 2025 14:37:52 +0800 Subject: [PATCH 0325/1307] jbd2: prevent softlockup in jbd2_log_do_checkpoint() Both jbd2_log_do_checkpoint() and jbd2_journal_shrink_checkpoint_list() periodically release j_list_lock after processing a batch of buffers to avoid long hold times on the j_list_lock. However, since both functions contend for j_list_lock, the combined time spent waiting and processing can be significant. jbd2_journal_shrink_checkpoint_list() explicitly calls cond_resched() when need_resched() is true to avoid softlockups during prolonged operations. But jbd2_log_do_checkpoint() only exits its loop when need_resched() is true, relying on potentially sleeping functions like __flush_batch() or wait_on_buffer() to trigger rescheduling. If those functions do not sleep, the kernel may hit a softlockup. watchdog: BUG: soft lockup - CPU#3 stuck for 156s! [kworker/u129:2:373] CPU: 3 PID: 373 Comm: kworker/u129:2 Kdump: loaded Not tainted 6.6.0+ #10 Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.27 06/13/2017 Workqueue: writeback wb_workfn (flush-7:2) pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : native_queued_spin_lock_slowpath+0x358/0x418 lr : jbd2_log_do_checkpoint+0x31c/0x438 [jbd2] Call trace: native_queued_spin_lock_slowpath+0x358/0x418 jbd2_log_do_checkpoint+0x31c/0x438 [jbd2] __jbd2_log_wait_for_space+0xfc/0x2f8 [jbd2] add_transaction_credits+0x3bc/0x418 [jbd2] start_this_handle+0xf8/0x560 [jbd2] jbd2__journal_start+0x118/0x228 [jbd2] __ext4_journal_start_sb+0x110/0x188 [ext4] ext4_do_writepages+0x3dc/0x740 [ext4] ext4_writepages+0xa4/0x190 [ext4] do_writepages+0x94/0x228 __writeback_single_inode+0x48/0x318 writeback_sb_inodes+0x204/0x590 __writeback_inodes_wb+0x54/0xf8 wb_writeback+0x2cc/0x3d8 wb_do_writeback+0x2e0/0x2f8 wb_workfn+0x80/0x2a8 process_one_work+0x178/0x3e8 worker_thread+0x234/0x3b8 kthread+0xf0/0x108 ret_from_fork+0x10/0x20 So explicitly call cond_resched() in jbd2_log_do_checkpoint() to avoid softlockup. Cc: stable@kernel.org Signed-off-by: Baokun Li Link: https://patch.msgid.link/20250812063752.912130-1-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/jbd2/checkpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index b3971e91e8eb..38861ca04899 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -285,6 +285,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) retry: if (batch_count) __flush_batch(journal, &batch_count); + cond_resched(); spin_lock(&journal->j_list_lock); goto restart; } From 0c6b24d70da21201ed009a2aca740d2dfddc7ab5 Mon Sep 17 00:00:00 2001 From: Jason-JH Lin Date: Mon, 28 Jul 2025 10:48:50 +0800 Subject: [PATCH 0326/1307] drm/mediatek: Add error handling for old state CRTC in atomic_disable Introduce error handling to address an issue where, after a hotplug event, the cursor continues to update. This situation can lead to a kernel panic due to accessing the NULL `old_state->crtc`. E,g. Unable to handle kernel NULL pointer dereference at virtual address Call trace: mtk_crtc_plane_disable+0x24/0x140 mtk_plane_atomic_update+0x8c/0xa8 drm_atomic_helper_commit_planes+0x114/0x2c8 drm_atomic_helper_commit_tail_rpm+0x4c/0x158 commit_tail+0xa0/0x168 drm_atomic_helper_commit+0x110/0x120 drm_atomic_commit+0x8c/0xe0 drm_atomic_helper_update_plane+0xd4/0x128 __setplane_atomic+0xcc/0x110 drm_mode_cursor_common+0x250/0x440 drm_mode_cursor_ioctl+0x44/0x70 drm_ioctl+0x264/0x5d8 __arm64_sys_ioctl+0xd8/0x510 invoke_syscall+0x6c/0xe0 do_el0_svc+0x68/0xe8 el0_svc+0x34/0x60 el0t_64_sync_handler+0x1c/0xf8 el0t_64_sync+0x180/0x188 Adding NULL pointer checks to ensure stability by preventing operations on an invalid CRTC state. Fixes: d208261e9f7c ("drm/mediatek: Add wait_event_timeout when disabling plane") Signed-off-by: Jason-JH Lin Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/linux-mediatek/patch/20250728025036.24953-1-jason-jh.lin@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_plane.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c index cbc4f37da8ba..02349bd44001 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_plane.c @@ -292,7 +292,8 @@ static void mtk_plane_atomic_disable(struct drm_plane *plane, wmb(); /* Make sure the above parameter is set before update */ mtk_plane_state->pending.dirty = true; - mtk_crtc_plane_disable(old_state->crtc, plane); + if (old_state && old_state->crtc) + mtk_crtc_plane_disable(old_state->crtc, plane); } static void mtk_plane_atomic_update(struct drm_plane *plane, From 75a7b151e808355a1fdf972e85da137612b8f2ae Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 12 Aug 2025 15:09:06 +0200 Subject: [PATCH 0327/1307] rust: devres: fix leaking call to devm_add_action() When the data argument of Devres::new() is Err(), we leak the preceding call to devm_add_action(). In order to fix this, call devm_add_action() in a unit type initializer in try_pin_init!() after the initializers of all other fields. Fixes: f5d3ef25d238 ("rust: devres: get rid of Devres' inner Arc") Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250812130928.11075-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/devres.rs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index da18091143a6..d04e3fcebafb 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -115,10 +115,11 @@ pub struct Devres { /// Contains all the fields shared with [`Self::callback`]. // TODO: Replace with `UnsafePinned`, once available. // - // Subsequently, the `drop_in_place()` in `Devres::drop` and the explicit `Send` and `Sync' - // impls can be removed. + // Subsequently, the `drop_in_place()` in `Devres::drop` and `Devres::new` as well as the + // explicit `Send` and `Sync' impls can be removed. #[pin] inner: Opaque>, + _add_action: (), } impl Devres { @@ -140,7 +141,15 @@ pub fn new<'a, E>( dev: dev.into(), callback, // INVARIANT: `inner` is properly initialized. - inner <- { + inner <- Opaque::pin_init(try_pin_init!(Inner { + devm <- Completion::new(), + revoke <- Completion::new(), + data <- Revocable::new(data), + })), + // TODO: Replace with "initializer code blocks" [1] once available. + // + // [1] https://github.com/Rust-for-Linux/pin-init/pull/69 + _add_action: { // SAFETY: `this` is a valid pointer to uninitialized memory. let inner = unsafe { &raw mut (*this.as_ptr()).inner }; @@ -152,13 +161,13 @@ pub fn new<'a, E>( // live at least as long as the returned `impl PinInit`. to_result(unsafe { bindings::devm_add_action(dev.as_raw(), Some(callback), inner.cast()) - })?; + }).inspect_err(|_| { + let inner = Opaque::cast_into(inner); - Opaque::pin_init(try_pin_init!(Inner { - devm <- Completion::new(), - revoke <- Completion::new(), - data <- Revocable::new(data), - })) + // SAFETY: `inner` is a valid pointer to an `Inner` and valid for both reads + // and writes. + unsafe { core::ptr::drop_in_place(inner) }; + })?; }, }) } From a58893aa173923fdc49c2d35d638d8133065e952 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 12 Aug 2025 13:08:58 +0800 Subject: [PATCH 0328/1307] net: mctp: Fix bad kfree_skb in bind lookup test The kunit test's skb_pkt is consumed by mctp_dst_input() so shouldn't be freed separately. Fixes: e6d8e7dbc5a3 ("net: mctp: Add bind lookup test") Reported-by: Alexandre Ghiti Closes: https://lore.kernel.org/all/734b02a3-1941-49df-a0da-ec14310d41e4@ghiti.fr/ Signed-off-by: Matt Johnston Tested-by: Alexandre Ghiti Link: https://patch.msgid.link/20250812-fix-mctp-bind-test-v1-1-5e2128664eb3@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/test/route-test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index fb6b46a952cb..69a3ccfc6310 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -1586,7 +1586,6 @@ static void mctp_test_bind_lookup(struct kunit *test) cleanup: kfree_skb(skb_sock); - kfree_skb(skb_pkt); /* Drop all binds */ for (size_t i = 0; i < ARRAY_SIZE(lookup_binds); i++) From b2cafefaf0473bafb0c3502a8530167d35e06113 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 12 Aug 2025 16:21:26 +0000 Subject: [PATCH 0329/1307] netdevsim: Fix wild pointer access in nsim_queue_free(). syzbot reported the splat below. [0] When nsim_queue_uninit() is called from nsim_init_netdevsim(), register_netdevice() has not been called, thus dev->dstats has not been allocated. Let's not call dev_dstats_rx_dropped_add() in such a case. [0] BUG: unable to handle page fault for address: ffff88809782c020 PF: supervisor write access in kernel mode PF: error_code(0x0002) - not-present page PGD 1b401067 P4D 1b401067 PUD 0 Oops: Oops: 0002 [#1] SMP KASAN NOPTI CPU: 3 UID: 0 PID: 8476 Comm: syz.1.251 Not tainted 6.16.0-syzkaller-06699-ge8d780dcd957 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:local_add arch/x86/include/asm/local.h:33 [inline] RIP: 0010:u64_stats_add include/linux/u64_stats_sync.h:89 [inline] RIP: 0010:dev_dstats_rx_dropped_add include/linux/netdevice.h:3027 [inline] RIP: 0010:nsim_queue_free+0xba/0x120 drivers/net/netdevsim/netdev.c:714 Code: 07 77 6c 4a 8d 3c ed 20 7e f1 8d 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 75 46 4a 03 1c ed 20 7e f1 8d <4c> 01 63 20 be 00 02 00 00 48 8d 3d 00 00 00 00 e8 61 2f 58 fa 48 RSP: 0018:ffffc900044af150 EFLAGS: 00010286 RAX: dffffc0000000000 RBX: ffff88809782c000 RCX: 00000000000079c3 RDX: 1ffffffff1be2fc7 RSI: ffffffff8c15f380 RDI: ffffffff8df17e38 RBP: ffff88805f59d000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000003 R14: ffff88806ceb3d00 R15: ffffed100dfd308e FS: 0000000000000000(0000) GS:ffff88809782c000(0063) knlGS:00000000f505db40 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: ffff88809782c020 CR3: 000000006fc6a000 CR4: 0000000000352ef0 Call Trace: nsim_queue_uninit drivers/net/netdevsim/netdev.c:993 [inline] nsim_init_netdevsim drivers/net/netdevsim/netdev.c:1049 [inline] nsim_create+0xd0a/0x1260 drivers/net/netdevsim/netdev.c:1101 __nsim_dev_port_add+0x435/0x7d0 drivers/net/netdevsim/dev.c:1438 nsim_dev_port_add_all drivers/net/netdevsim/dev.c:1494 [inline] nsim_dev_reload_create drivers/net/netdevsim/dev.c:1546 [inline] nsim_dev_reload_up+0x5b8/0x860 drivers/net/netdevsim/dev.c:1003 devlink_reload+0x322/0x7c0 net/devlink/dev.c:474 devlink_nl_reload_doit+0xe31/0x1410 net/devlink/dev.c:584 genl_family_rcv_msg_doit+0x206/0x2f0 net/netlink/genetlink.c:1115 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x55c/0x800 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x155/0x420 net/netlink/af_netlink.c:2552 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline] netlink_unicast+0x5aa/0x870 net/netlink/af_netlink.c:1346 netlink_sendmsg+0x8d1/0xdd0 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg net/socket.c:729 [inline] ____sys_sendmsg+0xa95/0xc70 net/socket.c:2614 ___sys_sendmsg+0x134/0x1d0 net/socket.c:2668 __sys_sendmsg+0x16d/0x220 net/socket.c:2700 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0x7c/0x3a0 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x32/0x80 arch/x86/entry/syscall_32.c:331 entry_SYSENTER_compat_after_hwframe+0x84/0x8e RIP: 0023:0xf708e579 Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d b4 26 00 00 00 00 8d b4 26 00 00 00 00 RSP: 002b:00000000f505d55c EFLAGS: 00000296 ORIG_RAX: 0000000000000172 RAX: ffffffffffffffda RBX: 0000000000000007 RCX: 0000000080000080 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000296 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: CR2: ffff88809782c020 Fixes: 2a68a22304f9 ("netdevsim: account dropped packet length in stats on queue free") Reported-by: syzbot+8aa80c6232008f7b957d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/688bb9ca.a00a0220.26d0e1.0050.GAE@google.com/ Suggested-by: Jakub Kicinski Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250812162130.4129322-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 39fe28af48b9..0178219f0db5 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -710,9 +710,13 @@ static struct nsim_rq *nsim_queue_alloc(void) static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq) { hrtimer_cancel(&rq->napi_timer); - local_bh_disable(); - dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen); - local_bh_enable(); + + if (rq->skb_queue.qlen) { + local_bh_disable(); + dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen); + local_bh_enable(); + } + skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); kfree(rq); } From 39f8fcda2088382a4aa70b258d6f7225aa386f11 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 12 Aug 2025 11:29:07 -0700 Subject: [PATCH 0330/1307] bnxt: fill data page pool with frags if PAGE_SIZE > BNXT_RX_PAGE_SIZE The data page pool always fills the HW rx ring with pages. On arm64 with 64K pages, this will waste _at least_ 32K of memory per entry in the rx ring. Fix by fragmenting the pages if PAGE_SIZE > BNXT_RX_PAGE_SIZE. This makes the data page pool the same as the header pool. Tested with iperf3 with a small (64 entries) rx ring to encourage buffer circulation. Fixes: cd1fafe7da1f ("eth: bnxt: add support rx side device memory TCP") Reviewed-by: Michael Chan Signed-off-by: David Wei Link: https://patch.msgid.link/20250812182907.1540755-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 76a4c5ae8000..2800a90fba1f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -926,15 +926,21 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, static netmem_ref __bnxt_alloc_rx_netmem(struct bnxt *bp, dma_addr_t *mapping, struct bnxt_rx_ring_info *rxr, + unsigned int *offset, gfp_t gfp) { netmem_ref netmem; - netmem = page_pool_alloc_netmems(rxr->page_pool, gfp); + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) { + netmem = page_pool_alloc_frag_netmem(rxr->page_pool, offset, BNXT_RX_PAGE_SIZE, gfp); + } else { + netmem = page_pool_alloc_netmems(rxr->page_pool, gfp); + *offset = 0; + } if (!netmem) return 0; - *mapping = page_pool_get_dma_addr_netmem(netmem); + *mapping = page_pool_get_dma_addr_netmem(netmem) + *offset; return netmem; } @@ -1029,7 +1035,7 @@ static int bnxt_alloc_rx_netmem(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, dma_addr_t mapping; netmem_ref netmem; - netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, gfp); + netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, &offset, gfp); if (!netmem) return -ENOMEM; From 87c6efc5ce9c126ae4a781bc04504b83780e3650 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 12 Aug 2025 18:40:29 +0200 Subject: [PATCH 0331/1307] net/sched: ets: use old 'nbands' while purging unused classes Shuang reported sch_ets test-case [1] crashing in ets_class_qlen_notify() after recent changes from Lion [2]. The problem is: in ets_qdisc_change() we purge unused DWRR queues; the value of 'q->nbands' is the new one, and the cleanup should be done with the old one. The problem is here since my first attempts to fix ets_qdisc_change(), but it surfaced again after the recent qdisc len accounting fixes. Fix it purging idle DWRR queues before assigning a new value of 'q->nbands', so that all purge operations find a consistent configuration: - old 'q->nbands' because it's needed by ets_class_find() - old 'q->nstrict' because it's needed by ets_class_is_strict() BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 62 UID: 0 PID: 39457 Comm: tc Kdump: loaded Not tainted 6.12.0-116.el10.x86_64 #1 PREEMPT(voluntary) Hardware name: Dell Inc. PowerEdge R640/06DKY5, BIOS 2.12.2 07/09/2021 RIP: 0010:__list_del_entry_valid_or_report+0x4/0x80 Code: ff 4c 39 c7 0f 84 39 19 8e ff b8 01 00 00 00 c3 cc cc cc cc 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa <48> 8b 17 48 8b 4f 08 48 85 d2 0f 84 56 19 8e ff 48 85 c9 0f 84 ab RSP: 0018:ffffba186009f400 EFLAGS: 00010202 RAX: 00000000000000d6 RBX: 0000000000000000 RCX: 0000000000000004 RDX: ffff9f0fa29b69c0 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffffffc12c2400 R08: 0000000000000008 R09: 0000000000000004 R10: ffffffffffffffff R11: 0000000000000004 R12: 0000000000000000 R13: ffff9f0f8cfe0000 R14: 0000000000100005 R15: 0000000000000000 FS: 00007f2154f37480(0000) GS:ffff9f269c1c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001530be001 CR4: 00000000007726f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ets_class_qlen_notify+0x65/0x90 [sch_ets] qdisc_tree_reduce_backlog+0x74/0x110 ets_qdisc_change+0x630/0xa40 [sch_ets] __tc_modify_qdisc.constprop.0+0x216/0x7f0 tc_modify_qdisc+0x7c/0x120 rtnetlink_rcv_msg+0x145/0x3f0 netlink_rcv_skb+0x53/0x100 netlink_unicast+0x245/0x390 netlink_sendmsg+0x21b/0x470 ____sys_sendmsg+0x39d/0x3d0 ___sys_sendmsg+0x9a/0xe0 __sys_sendmsg+0x7a/0xd0 do_syscall_64+0x7d/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f2155114084 Code: 89 02 b8 ff ff ff ff eb bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 80 3d 25 f0 0c 00 00 74 13 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89 RSP: 002b:00007fff1fd7a988 EFLAGS: 00000202 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000560ec063e5e0 RCX: 00007f2155114084 RDX: 0000000000000000 RSI: 00007fff1fd7a9f0 RDI: 0000000000000003 RBP: 00007fff1fd7aa60 R08: 0000000000000010 R09: 000000000000003f R10: 0000560ee9b3a010 R11: 0000000000000202 R12: 00007fff1fd7aae0 R13: 000000006891ccde R14: 0000560ec063e5e0 R15: 00007fff1fd7aad0 [1] https://lore.kernel.org/netdev/e08c7f4a6882f260011909a868311c6e9b54f3e4.1639153474.git.dcaratti@redhat.com/ [2] https://lore.kernel.org/netdev/d912cbd7-193b-4269-9857-525bee8bbb6a@gmail.com/ Cc: stable@vger.kernel.org Fixes: 103406b38c60 ("net/sched: Always pass notifications when child class becomes empty") Fixes: c062f2a0b04d ("net/sched: sch_ets: don't remove idle classes from the round-robin list") Fixes: dcc68b4d8084 ("net: sch_ets: Add a new Qdisc") Reported-by: Li Shuang Closes: https://issues.redhat.com/browse/RHEL-108026 Reviewed-by: Petr Machata Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Signed-off-by: Davide Caratti Link: https://patch.msgid.link/7928ff6d17db47a2ae7cc205c44777b1f1950545.1755016081.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ets.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 037f764822b9..82635dd2cfa5 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -651,6 +651,12 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); + for (i = nbands; i < oldbands; i++) { + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) + list_del_init(&q->classes[i].alist); + qdisc_purge_queue(q->classes[i].qdisc); + } + WRITE_ONCE(q->nbands, nbands); for (i = nstrict; i < q->nstrict; i++) { if (q->classes[i].qdisc->q.qlen) { @@ -658,11 +664,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].deficit = quanta[i]; } } - for (i = q->nbands; i < oldbands; i++) { - if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del_init(&q->classes[i].alist); - qdisc_purge_queue(q->classes[i].qdisc); - } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); From 774a2ae6617b30a4dcc7ebaf178ef05da05b2a47 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 12 Aug 2025 18:40:30 +0200 Subject: [PATCH 0332/1307] selftests: net/forwarding: test purge of active DWRR classes Extend sch_ets.sh to add a reproducer for problematic list deletions when active DWRR class are purged by ets_qdisc_change() [1] [2]. [1] https://lore.kernel.org/netdev/e08c7f4a6882f260011909a868311c6e9b54f3e4.1639153474.git.dcaratti@redhat.com/ [2] https://lore.kernel.org/netdev/f3b9bacc73145f265c19ab80785933da5b7cbdec.1754581577.git.dcaratti@redhat.com/ Suggested-by: Victor Nogueira Signed-off-by: Davide Caratti Acked-by: Victor Nogueira Link: https://patch.msgid.link/489497cb781af7389011ca1591fb702a7391f5e7.1755016081.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/sch_ets.sh | 1 + tools/testing/selftests/net/forwarding/sch_ets_tests.sh | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 1f6f53e284b5..6269d5e23487 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -11,6 +11,7 @@ ALL_TESTS=" ets_test_strict ets_test_mixed ets_test_dwrr + ets_test_plug classifier_mode ets_test_strict ets_test_mixed diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 08240d3e3c87..79d837a2868a 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -224,3 +224,11 @@ ets_test_dwrr() ets_set_dwrr_two_bands xfail_on_slow ets_dwrr_test_01 } + +ets_test_plug() +{ + ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514" + tc qdisc add dev $put handle 20: parent 10:4 plug + start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1" + ets_qdisc_setup $put 2 +} From 52565a935213cd6a8662ddb8efe5b4219343a25d Mon Sep 17 00:00:00 2001 From: Sven Stegemann Date: Tue, 12 Aug 2025 21:18:03 +0200 Subject: [PATCH 0333/1307] net: kcm: Fix race condition in kcm_unattach() syzbot found a race condition when kcm_unattach(psock) and kcm_release(kcm) are executed at the same time. kcm_unattach() is missing a check of the flag kcm->tx_stopped before calling queue_work(). If the kcm has a reserved psock, kcm_unattach() might get executed between cancel_work_sync() and unreserve_psock() in kcm_release(), requeuing kcm->tx_work right before kcm gets freed in kcm_done(). Remove kcm->tx_stopped and replace it by the less error-prone disable_work_sync(). Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+e62c9db591c30e174662@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e62c9db591c30e174662 Reported-by: syzbot+d199b52665b6c3069b94@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d199b52665b6c3069b94 Reported-by: syzbot+be6b1fdfeae512726b4e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=be6b1fdfeae512726b4e Signed-off-by: Sven Stegemann Link: https://patch.msgid.link/20250812191810.27777-1-sven@stegemann.de Signed-off-by: Jakub Kicinski --- include/net/kcm.h | 1 - net/kcm/kcmsock.c | 10 ++-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/include/net/kcm.h b/include/net/kcm.h index 441e993be634..d9c35e71ecea 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -71,7 +71,6 @@ struct kcm_sock { struct list_head wait_psock_list; struct sk_buff *seq_skb; struct mutex tx_mutex; - u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ bool tx_wait; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a4971e6fa943..b4f01cb07561 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -430,7 +430,7 @@ static void psock_write_space(struct sock *sk) /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm && !unlikely(kcm->tx_stopped)) + if (kcm) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1693,12 +1693,6 @@ static int kcm_release(struct socket *sock) */ __skb_queue_purge(&sk->sk_write_queue); - /* Set tx_stopped. This is checked when psock is bound to a kcm and we - * get a writespace callback. This prevents further work being queued - * from the callback (unbinding the psock occurs after canceling work. - */ - kcm->tx_stopped = 1; - release_sock(sk); spin_lock_bh(&mux->lock); @@ -1714,7 +1708,7 @@ static int kcm_release(struct socket *sock) /* Cancel work. After this point there should be no outside references * to the kcm socket. */ - cancel_work_sync(&kcm->tx_work); + disable_work_sync(&kcm->tx_work); lock_sock(sk); psock = kcm->tx_psock; From e2f9ae91619add9884428d095c3c630b6b120a61 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 14 Aug 2025 11:38:58 +0900 Subject: [PATCH 0334/1307] MAINTAINERS: Remove bouncing kprobes maintainer The kprobes MAINTAINERS entry includes anil.s.keshavamurthy@intel.com. That address is bouncing. Remove it. This still leaves three other listed maintainers. Link: https://lore.kernel.org/all/20250808180124.7DDE2ECD@davehans-spike.ostc.intel.com/ Signed-off-by: Dave Hansen Cc: Naveen N Rao Cc: "David S. Miller" Cc: Masami Hiramatsu Cc: linux-trace-kernel@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..a2bc2bb91970 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13686,7 +13686,6 @@ F: scripts/Makefile.kmsan KPROBES M: Naveen N Rao -M: Anil S Keshavamurthy M: "David S. Miller" M: Masami Hiramatsu L: linux-kernel@vger.kernel.org From 4faff70959d51078f9ee8372f8cff0d7045e4114 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 11 Aug 2025 17:29:31 +0800 Subject: [PATCH 0335/1307] net: usb: asix_devices: add phy_mask for ax88772 mdio bus Without setting phy_mask for ax88772 mdio bus, current driver may create at most 32 mdio phy devices with phy address range from 0x00 ~ 0x1f. DLink DUB-E100 H/W Ver B1 is such a device. However, only one main phy device will bind to net phy driver. This is creating issue during system suspend/resume since phy_polling_mode() in phy_state_machine() will directly deference member of phydev->drv for non-main phy devices. Then NULL pointer dereference issue will occur. Due to only external phy or internal phy is necessary, add phy_mask for ax88772 mdio bus to workarnoud the issue. Closes: https://lore.kernel.org/netdev/20250806082931.3289134-1-xu.yang_2@nxp.com Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Tested-by: Oleksij Rempel Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250811092931.860333-1-xu.yang_2@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/usb/asix_devices.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 9b0318fb50b5..d9f5942ccc44 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -676,6 +676,7 @@ static int ax88772_init_mdio(struct usbnet *dev) priv->mdio->read = &asix_mdio_bus_read; priv->mdio->write = &asix_mdio_bus_write; priv->mdio->name = "Asix MDIO Bus"; + priv->mdio->phy_mask = ~(BIT(priv->phy_addr) | BIT(AX_EMBD_PHY_ADDR)); /* mii bus name is usb-- */ snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); From d832ccbc301fbd9e5a1d691bdcf461cdb514595f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Aug 2025 10:12:42 +0200 Subject: [PATCH 0336/1307] ALSA: usb-audio: Validate UAC3 power domain descriptors, too UAC3 power domain descriptors need to be verified with its variable bLength for avoiding the unexpected OOB accesses by malicious firmware, too. Fixes: 9a2fe9b801f5 ("ALSA: usb: initial USB Audio Device Class 3.0 support") Reported-and-tested-by: Youngjun Lee Cc: Link: https://patch.msgid.link/20250814081245.8902-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/validate.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 6fe206f6e911..4f4e8e87a14c 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -221,6 +221,17 @@ static bool validate_uac3_feature_unit(const void *p, return d->bLength >= sizeof(*d) + 4 + 2; } +static bool validate_uac3_power_domain_unit(const void *p, + const struct usb_desc_validator *v) +{ + const struct uac3_power_domain_descriptor *d = p; + + if (d->bLength < sizeof(*d)) + return false; + /* baEntities[] + wPDomainDescrStr */ + return d->bLength >= sizeof(*d) + d->bNrEntities + 2; +} + static bool validate_midi_out_jack(const void *p, const struct usb_desc_validator *v) { @@ -285,6 +296,7 @@ static const struct usb_desc_validator audio_validators[] = { struct uac3_clock_multiplier_descriptor), /* UAC_VERSION_3, UAC3_SAMPLE_RATE_CONVERTER: not implemented yet */ /* UAC_VERSION_3, UAC3_CONNECTORS: not implemented yet */ + FUNC(UAC_VERSION_3, UAC3_POWER_DOMAIN, validate_uac3_power_domain_unit), { } /* terminator */ }; From ecfd41166b72b67d3bdeb88d224ff445f6163869 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Aug 2025 10:12:43 +0200 Subject: [PATCH 0337/1307] ALSA: usb-audio: Validate UAC3 cluster segment descriptors UAC3 class segment descriptors need to be verified whether their sizes match with the declared lengths and whether they fit with the allocated buffer sizes, too. Otherwise malicious firmware may lead to the unexpected OOB accesses. Fixes: 11785ef53228 ("ALSA: usb-audio: Initial Power Domain support") Reported-and-tested-by: Youngjun Lee Cc: Link: https://patch.msgid.link/20250814081245.8902-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index ad6ced780634..acf3dc2d79e0 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -341,20 +341,28 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor len = le16_to_cpu(cluster->wLength); c = 0; - p += sizeof(struct uac3_cluster_header_descriptor); + p += sizeof(*cluster); + len -= sizeof(*cluster); - while (((p - (void *)cluster) < len) && (c < channels)) { + while (len > 0 && (c < channels)) { struct uac3_cluster_segment_descriptor *cs_desc = p; u16 cs_len; u8 cs_type; + if (len < sizeof(*p)) + break; cs_len = le16_to_cpu(cs_desc->wLength); + if (len < cs_len) + break; cs_type = cs_desc->bSegmentType; if (cs_type == UAC3_CHANNEL_INFORMATION) { struct uac3_cluster_information_segment_descriptor *is = p; unsigned char map; + if (cs_len < sizeof(*is)) + break; + /* * TODO: this conversion is not complete, update it * after adding UAC3 values to asound.h @@ -456,6 +464,7 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor chmap->map[c++] = map; } p += cs_len; + len -= cs_len; } if (channels < c) @@ -881,7 +890,7 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, u64 badd_formats = 0; unsigned int num_channels; struct audioformat *fp; - u16 cluster_id, wLength; + u16 cluster_id, wLength, cluster_wLength; int clock = 0; int err; @@ -1011,6 +1020,16 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, return ERR_PTR(-EIO); } + cluster_wLength = le16_to_cpu(cluster->wLength); + if (cluster_wLength < sizeof(*cluster) || + cluster_wLength > wLength) { + dev_err(&dev->dev, + "%u:%d : invalid Cluster Descriptor size\n", + iface_no, altno); + kfree(cluster); + return ERR_PTR(-EIO); + } + num_channels = cluster->bNrChannels; chmap = convert_chmap_v3(cluster); kfree(cluster); From c345102d1feed3de8aa9b9ec7d18b3fbba62deb7 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Wed, 13 Aug 2025 18:08:42 +0800 Subject: [PATCH 0338/1307] ALSA: hda/tas2781: Normalize the volume kcontrol name Change the name of the kcontrol from "Gain" to "Volume". Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250813100842.12224-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index 45ac5e41bd4f..06c7bc2b9e9d 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -265,7 +265,7 @@ static const struct snd_kcontrol_new tas2770_snd_controls[] = { }; static const struct snd_kcontrol_new tas2781_snd_controls[] = { - ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL, + ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Volume", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, tas2781_amp_putvol, amp_vol_tlv), ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0, From 35f6bedccf4c4280f02d48e4f7d194e64e9a62d8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 13 Aug 2025 18:08:53 +0900 Subject: [PATCH 0339/1307] ata: libata-eh: Fix link state check for IDE/PATA ports Commit 4371fe1ba400 ("ata: libata-eh: Avoid unnecessary resets when revalidating devices") replaced the call to ata_phys_link_offline() in ata_eh_revalidate_and_attach() with the new function ata_eh_link_established() which relaxes the checks on a device link state to account for low power mode transitions. However, this change assumed that the device port has a valid scr_read method to obtain the SStatus register for the port. This is not always the case, especially with older IDE/PATA adapters (e.g. PATA/IDE devices emulated with QEMU). For such adapter, ata_eh_link_established() will always return false, causing ata_eh_revalidate_and_attach() to go into its error path and ultimately to the device being disabled. Avoid this by restoring the previous behavior, which is to assume that the link is online if reading the port SStatus register fails. While at it, also fix the spelling of SStatus in the comment describing the function ata_eh_link_established(). Reported-by: Shin'ichiro Kawasaki Fixes: 4371fe1ba400 ("ata: libata-eh: Avoid unnecessary resets when revalidating devices") Signed-off-by: Damien Le Moal Tested-by: Shin'ichiro Kawasaki Reviewed-by: Niklas Cassel --- drivers/ata/libata-eh.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 2946ae6d4b2c..2586e77ebf45 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2075,7 +2075,7 @@ static void ata_eh_get_success_sense(struct ata_link *link) * Check if a link is established. This is a relaxed version of * ata_phys_link_online() which accounts for the fact that this is potentially * called after changing the link power management policy, which may not be - * reflected immediately in the SSTAUS register (e.g., we may still be seeing + * reflected immediately in the SStatus register (e.g., we may still be seeing * the PHY in partial, slumber or devsleep Partial power management state. * So check that: * - A device is still present, that is, DET is 1h (Device presence detected @@ -2089,8 +2089,13 @@ static bool ata_eh_link_established(struct ata_link *link) u32 sstatus; u8 det, ipm; + /* + * For old IDE/PATA adapters that do not have a valid scr_read method, + * or if reading the SStatus register fails, assume that the device is + * present. Device probe will determine if that is really the case. + */ if (sata_scr_read(link, SCR_STATUS, &sstatus)) - return false; + return true; det = sstatus & 0x0f; ipm = (sstatus >> 8) & 0x0f; From ea177a1b1efc6e42e73ee4a17581842cd254e006 Mon Sep 17 00:00:00 2001 From: Rudi Heitbaum Date: Tue, 12 Aug 2025 06:55:15 +0000 Subject: [PATCH 0340/1307] drm/rockchip: cdn-dp: select bridge for cdp-dp Select drm bridge connector when building cdp-dp. This was missed in previous commit causing build failure. Fixes: afbbca25d06e ("drm/rockchip: cdn-dp: Convert to drm bridge") Signed-off-by: Rudi Heitbaum Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/aJrlUzV1u4i65NRe@8eba3d7b3083 --- drivers/gpu/drm/rockchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index ab525668939a..faf50d872be3 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -53,6 +53,7 @@ config ROCKCHIP_CDN_DP bool "Rockchip cdn DP" depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m) select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions From a52dffaa46c2c5ff0b311c4dc1288581f7b9109e Mon Sep 17 00:00:00 2001 From: Piotr Zalewski Date: Sun, 6 Jul 2025 08:36:58 +0000 Subject: [PATCH 0341/1307] drm/rockchip: vop2: make vp registers nonvolatile Make video port registers nonvolatile. As DSP_CTRL register is written to twice due to gamma LUT enable bit which is set outside of the main DSP_CTRL initialization within atomic_enable (for rk356x case it is also necessary to always disable gamma LUT before writing a new LUT) there is a chance that DSP_CTRL value read-out in gamma LUT init/update code is not the one which was written by the preceding DSP_CTRL initialization code within atomic_enable. This might result in misconfigured DSP_CTRL which leads to no visual output[1]. Since DSP_CTRL write takes effect after VSYNC[1] the issue is not always present. When tested on Pinetab2 with kernel 6.14 it happenes only when DRM is compiled as a module[1]. In order to confirm that it is a timing issue I inserted 18ms udelay before vop2_crtc_atomic_try_set_gamma in atomic enable and compiled DRM as module - this has also fixed the issue. [1] https://lore.kernel.org/linux-rockchip/562b38e5.a496.1975f09f983.Coremail.andyshrk@163.com/ Reported-by: Diederik de Haas Closes: https://lore.kernel.org/linux-rockchip/DAEVDSTMWI1E.J454VZN0R9MA@cknow.org/ Suggested-by: Andy Yan Signed-off-by: Piotr Zalewski Tested-by: Diederik de Haas Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250706083629.140332-2-pZ010001011111@proton.me --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 186f6452a7d3..b50927a824b4 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -2579,12 +2579,13 @@ static int vop2_win_init(struct vop2 *vop2) } /* - * The window registers are only updated when config done is written. - * Until that they read back the old value. As we read-modify-write - * these registers mark them as non-volatile. This makes sure we read - * the new values from the regmap register cache. + * The window and video port registers are only updated when config + * done is written. Until that they read back the old value. As we + * read-modify-write these registers mark them as non-volatile. This + * makes sure we read the new values from the regmap register cache. */ static const struct regmap_range vop2_nonvolatile_range[] = { + regmap_reg_range(RK3568_VP0_CTRL_BASE, RK3588_VP3_CTRL_BASE + 255), regmap_reg_range(0x1000, 0x23ff), }; From 58768b0563916ddcb73d8ed26ede664915f8df31 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Wed, 13 Aug 2025 19:22:56 -0700 Subject: [PATCH 0342/1307] ata: libata-scsi: Fix CDL control Delete extra checks for the ATA_DFLAG_CDL_ENABLED flag that prevent SET FEATURES command from being issued to a drive when NCQ commands are active. ata_mselect_control_ata_feature() sets / clears the ATA_DFLAG_CDL_ENABLED flag during the translation of MODE SELECT to SET FEATURES. If SET FEATURES gets deferred due to outstanding NCQ commands, the original MODE SELECT command will be re-queued. When the re-queued MODE SELECT goes through the ata_mselect_control_ata_feature() translation again, SET FEATURES will not be issued because ATA_DFLAG_CDL_ENABLED has been already set or cleared by the initial translation of MODE SELECT. The ATA_DFLAG_CDL_ENABLED checks in ata_mselect_control_ata_feature() are safe to remove because scsi_cdl_enable() implements a similar logic that avoids enabling CDL if it has been enabled already. Fixes: 17e897a45675 ("ata: libata-scsi: Improve CDL control") Cc: stable@vger.kernel.org Signed-off-by: Igor Pylypiv Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 57f674f51b0c..2ded5e476d6e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3904,21 +3904,16 @@ static int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, /* Check cdl_ctrl */ switch (buf[0] & 0x03) { case 0: - /* Disable CDL if it is enabled */ - if (!(dev->flags & ATA_DFLAG_CDL_ENABLED)) - return 0; + /* Disable CDL */ ata_dev_dbg(dev, "Disabling CDL\n"); cdl_action = 0; dev->flags &= ~ATA_DFLAG_CDL_ENABLED; break; case 0x02: /* - * Enable CDL if not already enabled. Since this is mutually - * exclusive with NCQ priority, allow this only if NCQ priority - * is disabled. + * Enable CDL. Since CDL is mutually exclusive with NCQ + * priority, allow this only if NCQ priority is disabled. */ - if (dev->flags & ATA_DFLAG_CDL_ENABLED) - return 0; if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED) { ata_dev_err(dev, "NCQ priority must be disabled to enable CDL\n"); From 535fd4c98452c87537a40610abba45daf5761ec6 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 31 Jul 2025 08:44:50 -0400 Subject: [PATCH 0343/1307] serial: sc16is7xx: fix bug in flow control levels init When trying to set MCR[2], XON1 is incorrectly accessed instead. And when writing to the TCR register to configure flow control levels, we are incorrectly writing to the MSR register. The default value of $00 is then used for TCR, which means that selectable trigger levels in FCR are used in place of TCR. TCR/TLR access requires EFR[4] (enable enhanced functions) and MCR[2] to be set. EFR[4] is already set in probe(). MCR access requires LCR[7] to be zero. Since LCR is set to $BF when trying to set MCR[2], XON1 is incorrectly accessed instead because MCR shares the same address space as XON1. Since MCR[2] is unmodified and still zero, when writing to TCR we are in fact writing to MSR because TCR/TLR registers share the same address space as MSR/SPR. Fix by first removing useless reconfiguration of EFR[4] (enable enhanced functions), as it is already enabled in sc16is7xx_probe() since commit 43c51bb573aa ("sc16is7xx: make sure device is in suspend once probed"). Now LCR is $00, which means that MCR access is enabled. Also remove regcache_cache_bypass() calls since we no longer access the enhanced registers set, and TCR is already declared as volatile (in fact by declaring MSR as volatile, which shares the same address). Finally disable access to TCR/TLR registers after modifying them by clearing MCR[2]. Note: the comment about "... and internal clock div" is wrong and can be ignored/removed as access to internal clock div registers (DLL/DLH) is permitted only when LCR[7] is logic 1, not when enhanced features is enabled. And DLL/DLH access is not needed in sc16is7xx_startup(). Fixes: dfeae619d781 ("serial: sc16is7xx") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20250731124451.1108864-1-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 3f38fba8f6ea..a668e0bb26b3 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1177,17 +1177,6 @@ static int sc16is7xx_startup(struct uart_port *port) sc16is7xx_port_write(port, SC16IS7XX_FCR_REG, SC16IS7XX_FCR_FIFO_BIT); - /* Enable EFR */ - sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, - SC16IS7XX_LCR_CONF_MODE_B); - - regcache_cache_bypass(one->regmap, true); - - /* Enable write access to enhanced features and internal clock div */ - sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, - SC16IS7XX_EFR_ENABLE_BIT, - SC16IS7XX_EFR_ENABLE_BIT); - /* Enable TCR/TLR */ sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_TCRTLR_BIT, @@ -1199,7 +1188,8 @@ static int sc16is7xx_startup(struct uart_port *port) SC16IS7XX_TCR_RX_RESUME(24) | SC16IS7XX_TCR_RX_HALT(48)); - regcache_cache_bypass(one->regmap, false); + /* Disable TCR/TLR access */ + sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_TCRTLR_BIT, 0); /* Now, initialize the UART */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_WORD_LEN_8); From ee047e1d85d73496541c54bd4f432c9464e13e65 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 12 Aug 2025 14:16:31 +0200 Subject: [PATCH 0344/1307] dt-bindings: serial: brcm,bcm7271-uart: Constrain clocks Lists should have fixed constraints, because binding must be specific in respect to hardware, thus add missing constraints to number of clocks. Cc: stable Fixes: 88a499cd70d4 ("dt-bindings: Add support for the Broadcom UART driver") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250812121630.67072-2-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml b/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml index 89c462653e2d..8cc848ae11cb 100644 --- a/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml +++ b/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml @@ -41,7 +41,7 @@ properties: - const: dma_intr2 clocks: - minItems: 1 + maxItems: 1 clock-names: const: sw_baud From 387d00028cccee7575f6416953bef62f849d83e3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 12 Aug 2025 22:21:50 -0500 Subject: [PATCH 0345/1307] dt-bindings: serial: 8250: move a constraint A block that required a "spacemit,k1-uart" compatible node to specify two clocks was placed in the wrong spot in the binding. Conor Dooley pointed out it belongs earlier in the file, as part of the initial "allOf". Fixes: 2c0594f9f0629 ("dt-bindings: serial: 8250: support an optional second clock") Cc: stable Reported-by: Conor Dooley Closes: https://lore.kernel.org/lkml/20250729-reshuffle-contented-e6def76b540b@spud/ Signed-off-by: Alex Elder Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250813032151.2330616-1-elder@riscstar.com Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/serial/8250.yaml | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index e46bee8d25bf..f59c0b37e8eb 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -48,7 +48,6 @@ allOf: oneOf: - required: [ clock-frequency ] - required: [ clocks ] - - if: properties: compatible: @@ -66,6 +65,28 @@ allOf: items: - const: core - const: bus + - if: + properties: + compatible: + contains: + enum: + - spacemit,k1-uart + - nxp,lpc1850-uart + then: + required: + - clocks + - clock-names + properties: + clocks: + minItems: 2 + clock-names: + minItems: 2 + else: + properties: + clocks: + maxItems: 1 + clock-names: + maxItems: 1 properties: compatible: @@ -264,29 +285,6 @@ required: - reg - interrupts -if: - properties: - compatible: - contains: - enum: - - spacemit,k1-uart - - nxp,lpc1850-uart -then: - required: - - clocks - - clock-names - properties: - clocks: - minItems: 2 - clock-names: - minItems: 2 -else: - properties: - clocks: - maxItems: 1 - clock-names: - maxItems: 1 - unevaluatedProperties: false examples: From a1b51534b532dd4f0499907865553ee9251bebc3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 12 Aug 2025 22:13:35 -0500 Subject: [PATCH 0346/1307] dt-bindings: serial: 8250: allow "main" and "uart" as clock names There are two compatible strings defined in "8250.yaml" that require two clocks to be specified, along with their names: - "spacemit,k1-uart", used in "spacemit/k1.dtsi" - "nxp,lpc1850-uart", used in "lpc/lpc18xx.dtsi" When only one clock is used, the name is not required. However there are two places that do specify a name: - In "mediatek/mt7623.dtsi", the clock for the "mediatek,mtk-btif" compatible serial device is named "main" - In "qca/ar9132.dtsi", the clock for the "ns8250" compatible serial device is named "uart" In commit d2db0d7815444 ("dt-bindings: serial: 8250: allow clock 'uartclk' and 'reg' for nxp,lpc1850-uart"), Frank Li added the restriction that two named clocks be used for the NXP platform mentioned above. Change that logic, so that an additional condition for (only) the SpacemiT platform similarly restricts the two clocks to have the names "core" and "bus". Finally, add "main" and "uart" as allowed names when a single clock is specified. Fixes: 2c0594f9f0629 ("dt-bindings: serial: 8250: support an optional second clock") Cc: stable Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507160314.wrC51lXX-lkp@intel.com/ Signed-off-by: Alex Elder Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250813031338.2328392-1-elder@riscstar.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/8250.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index f59c0b37e8eb..b243afa69a1a 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -59,7 +59,12 @@ allOf: items: - const: uartclk - const: reg - else: + - if: + properties: + compatible: + contains: + const: spacemit,k1-uart + then: properties: clock-names: items: @@ -183,6 +188,9 @@ properties: minItems: 1 maxItems: 2 oneOf: + - enum: + - main + - uart - items: - const: core - const: bus From f5b1819193667bf62c3c99d3921b9429997a14b2 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Tue, 1 Jul 2025 09:30:40 +0200 Subject: [PATCH 0347/1307] drm/mediatek: dsi: Fix DSI host and panel bridge pre-enable order Since commit c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable"), the bridge pre_enable callbacks are now called before crtc enable, and the bridge post_disable callbacks after the crtc disable. In the mediatek-drm driver, this change leads to transfer errors on mtk_dsi_host_transfer callback processing during the panel bridge pre-enable sequence because the DSI host bridge pre_enable and CRTC enable sequences, that are enabling the required clocks and PHY using mtk_dsi_poweron function, are called after. So, in order to fix this call order issue, request the DSI host bridge be pre-enabled before panel bridge by setting pre_enable_prev_first flag on DSI device bridge in the mtk_dsi_host_attach function. Fixes: c9b1150a68d9 ("drm/atomic-helper: Re-order bridge chain pre-enable and post-disable") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20250701-mediatek-drm-fix-dsi-panel-init-v1-1-7af4adb9fdeb@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index d7726091819c..0e2bcd5f67b7 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1002,6 +1002,12 @@ static int mtk_dsi_host_attach(struct mipi_dsi_host *host, return PTR_ERR(dsi->next_bridge); } + /* + * set flag to request the DSI host bridge be pre-enabled before device bridge + * in the chain, so the DSI host is ready when the device bridge is pre-enabled + */ + dsi->next_bridge->pre_enable_prev_first = true; + drm_bridge_add(&dsi->bridge); ret = component_add(host->dev, &mtk_dsi_component_ops); From 94eae6ee4c2df2031bca586405e9ec36e0b9ccf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 27 May 2025 14:06:37 +0200 Subject: [PATCH 0348/1307] drm/xe/pf: Set VF LMEM BAR size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LMEM is partitioned between multiple VFs and we expect that the more VFs we have, the less LMEM is assigned to each VF. This means that we can achieve full LMEM BAR access without the need to attempt full VF LMEM BAR resize via pci_resize_resource(). Always try to set the largest possible BAR size that allows to fit the number of enabled VFs and inform the user in case the resize attempt is not successful. Signed-off-by: Michał Winiarski Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250527120637.665506-7-michal.winiarski@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 32a4d1b98e6663101fd0abfaf151c48feea7abb1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_bars.h | 1 + drivers/gpu/drm/xe/xe_pci_sriov.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_bars.h b/drivers/gpu/drm/xe/regs/xe_bars.h index ce05b6ae832f..880140d6ccdc 100644 --- a/drivers/gpu/drm/xe/regs/xe_bars.h +++ b/drivers/gpu/drm/xe/regs/xe_bars.h @@ -7,5 +7,6 @@ #define GTTMMADR_BAR 0 /* MMIO + GTT */ #define LMEM_BAR 2 /* VRAM */ +#define VF_LMEM_BAR 9 /* VF VRAM */ #endif diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 447a7867eecb..af05db07162e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -3,6 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include +#include + +#include "regs/xe_bars.h" #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" @@ -128,6 +132,18 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, } } +static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u32 sizes; + + sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs); + if (!sizes) + return 0; + + return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes)); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -158,6 +174,12 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + if (IS_DGFX(xe)) { + err = resize_vf_vram_bar(xe, num_vfs); + if (err) + xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err); + } + err = pci_enable_sriov(pdev, num_vfs); if (err < 0) goto failed; From 9969779d0803f5dcd4460ae7aca2bc3fd91bff12 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 14 Aug 2025 10:20:42 -0700 Subject: [PATCH 0349/1307] Documentation/hw-vuln: Add VMSCAPE documentation VMSCAPE is a vulnerability that may allow a guest to influence the branch prediction in host userspace, particularly affecting hypervisors like QEMU. Add the documentation. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Reviewed-by: Dave Hansen --- Documentation/admin-guide/hw-vuln/index.rst | 1 + Documentation/admin-guide/hw-vuln/vmscape.rst | 110 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/vmscape.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 89ca636081b7..55d747511f83 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -26,3 +26,4 @@ are configurable at compile, boot or run time. rsb old_microcode indirect-target-selection + vmscape diff --git a/Documentation/admin-guide/hw-vuln/vmscape.rst b/Documentation/admin-guide/hw-vuln/vmscape.rst new file mode 100644 index 000000000000..d9b9a2b6c114 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/vmscape.rst @@ -0,0 +1,110 @@ +.. SPDX-License-Identifier: GPL-2.0 + +VMSCAPE +======= + +VMSCAPE is a vulnerability that may allow a guest to influence the branch +prediction in host userspace. It particularly affects hypervisors like QEMU. + +Even if a hypervisor may not have any sensitive data like disk encryption keys, +guest-userspace may be able to attack the guest-kernel using the hypervisor as +a confused deputy. + +Affected processors +------------------- + +The following CPU families are affected by VMSCAPE: + +**Intel processors:** + - Skylake generation (Parts without Enhanced-IBRS) + - Cascade Lake generation - (Parts affected by ITS guest/host separation) + - Alder Lake and newer (Parts affected by BHI) + +Note that, BHI affected parts that use BHB clearing software mitigation e.g. +Icelake are not vulnerable to VMSCAPE. + +**AMD processors:** + - Zen series (families 0x17, 0x19, 0x1a) + +** Hygon processors:** + - Family 0x18 + +Mitigation +---------- + +Conditional IBPB +---------------- + +Kernel tracks when a CPU has run a potentially malicious guest and issues an +IBPB before the first exit to userspace after VM-exit. If userspace did not run +between VM-exit and the next VM-entry, no IBPB is issued. + +Note that the existing userspace mitigation against Spectre-v2 is effective in +protecting the userspace. They are insufficient to protect the userspace VMMs +from a malicious guest. This is because Spectre-v2 mitigations are applied at +context switch time, while the userspace VMM can run after a VM-exit without a +context switch. + +Vulnerability enumeration and mitigation is not applied inside a guest. This is +because nested hypervisors should already be deploying IBPB to isolate +themselves from nested guests. + +SMT considerations +------------------ + +When Simultaneous Multi-Threading (SMT) is enabled, hypervisors can be +vulnerable to cross-thread attacks. For complete protection against VMSCAPE +attacks in SMT environments, STIBP should be enabled. + +The kernel will issue a warning if SMT is enabled without adequate STIBP +protection. Warning is not issued when: + +- SMT is disabled +- STIBP is enabled system-wide +- Intel eIBRS is enabled (which implies STIBP protection) + +System information and options +------------------------------ + +The sysfs file showing VMSCAPE mitigation status is: + + /sys/devices/system/cpu/vulnerabilities/vmscape + +The possible values in this file are: + + * 'Not affected': + + The processor is not vulnerable to VMSCAPE attacks. + + * 'Vulnerable': + + The processor is vulnerable and no mitigation has been applied. + + * 'Mitigation: IBPB before exit to userspace': + + Conditional IBPB mitigation is enabled. The kernel tracks when a CPU has + run a potentially malicious guest and issues an IBPB before the first + exit to userspace after VM-exit. + + * 'Mitigation: IBPB on VMEXIT': + + IBPB is issued on every VM-exit. This occurs when other mitigations like + RETBLEED or SRSO are already issuing IBPB on VM-exit. + +Mitigation control on the kernel command line +---------------------------------------------- + +The mitigation can be controlled via the ``vmscape=`` command line parameter: + + * ``vmscape=off``: + + Disable the VMSCAPE mitigation. + + * ``vmscape=ibpb``: + + Enable conditional IBPB mitigation (default when CONFIG_MITIGATION_VMSCAPE=y). + + * ``vmscape=force``: + + Force vulnerability detection and mitigation even on processors that are + not known to be affected. From a508cec6e5215a3fbc7e73ae86a5c5602187934d Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 14 Aug 2025 10:20:42 -0700 Subject: [PATCH 0350/1307] x86/vmscape: Enumerate VMSCAPE bug The VMSCAPE vulnerability may allow a guest to cause Branch Target Injection (BTI) in userspace hypervisors. Kernels (both host and guest) have existing defenses against direct BTI attacks from guests. There are also inter-process BTI mitigations which prevent processes from attacking each other. However, the threat in this case is to a userspace hypervisor within the same process as the attacker. Userspace hypervisors have access to their own sensitive data like disk encryption keys and also typically have access to all guest data. This means guest userspace may use the hypervisor as a confused deputy to attack sensitive guest kernel data. There are no existing mitigations for these attacks. Introduce X86_BUG_VMSCAPE for this vulnerability and set it on affected Intel and AMD CPUs. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 65 ++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 602957dd2609..b6fa5c33c85d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -550,4 +550,5 @@ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ #define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ +#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 34a054181c4d..2b87c93e6609 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1236,6 +1236,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS_NATIVE_ONLY BIT(9) /* CPU is affected by Transient Scheduler Attacks */ #define TSA BIT(10) +/* CPU is affected by VMSCAPE */ +#define VMSCAPE BIT(11) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1247,44 +1249,55 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS), VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO), VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), - VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), - VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS), - VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS), - VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS), - VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS), - VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS), - VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS), + VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE), + VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), - VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO | TSA), - VULNBL_AMD(0x1a, SRSO), + VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE), + VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE), + VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE), + VULNBL_AMD(0x1a, SRSO | VMSCAPE), {} }; @@ -1543,6 +1556,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } } + /* + * Set the bug only on bare-metal. A nested hypervisor should already be + * deploying IBPB to isolate itself from nested guests. + */ + if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) && + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) + setup_force_cpu_bug(X86_BUG_VMSCAPE); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; From 2f8f173413f1cbf52660d04df92d0069c4306d25 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 14 Aug 2025 10:20:42 -0700 Subject: [PATCH 0351/1307] x86/vmscape: Add conditional IBPB mitigation VMSCAPE is a vulnerability that exploits insufficient branch predictor isolation between a guest and a userspace hypervisor (like QEMU). Existing mitigations already protect kernel/KVM from a malicious guest. Userspace can additionally be protected by flushing the branch predictors after a VMexit. Since it is the userspace that consumes the poisoned branch predictors, conditionally issue an IBPB after a VMexit and before returning to userspace. Workloads that frequently switch between hypervisor and userspace will incur the most overhead from the new IBPB. This new IBPB is not integrated with the existing IBPB sites. For instance, a task can use the existing speculation control prctl() to get an IBPB at context switch time. With this implementation, the IBPB is doubled up: one at context switch and another before running userspace. The intent is to integrate and optimize these cases post-embargo. [ dhansen: elaborate on suboptimal IBPB solution ] Suggested-by: Dave Hansen Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Acked-by: Sean Christopherson --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/entry-common.h | 7 +++++++ arch/x86/include/asm/nospec-branch.h | 2 ++ arch/x86/kernel/cpu/bugs.c | 8 ++++++++ arch/x86/kvm/x86.c | 9 +++++++++ 5 files changed, 27 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b6fa5c33c85d..c8e177016cc4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -494,6 +494,7 @@ #define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ #define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ +#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ /* * BUG word(s) diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index d535a97c7284..ce3eb6d5fdf9 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -93,6 +93,13 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, * 8 (ia32) bits. */ choose_random_kstack_offset(rdtsc()); + + /* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */ + if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) && + this_cpu_read(x86_ibpb_exit_to_user)) { + indirect_branch_prediction_barrier(); + this_cpu_write(x86_ibpb_exit_to_user, false); + } } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 10f261678749..e29f82466f43 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -530,6 +530,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } +DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); + static inline void indirect_branch_prediction_barrier(void) { asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b74bf937cd9f..410f8df8b77a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -105,6 +105,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); +/* + * Set when the CPU has run a potentially malicious guest. An IBPB will + * be needed to before running userspace. That IBPB will flush the branch + * predictor content. + */ +DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user); +EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user); + u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; static u64 __ro_after_init x86_arch_cap_msr; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1c49bc681c4..58d19443c9a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11007,6 +11007,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_fpu.xfd_err) wrmsrq(MSR_IA32_XFD_ERR, 0); + /* + * Mark this CPU as needing a branch predictor flush before running + * userspace. Must be done before enabling preemption to ensure it gets + * set for the CPU that actually ran the guest, and not the CPU that it + * may migrate to. + */ + if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER)) + this_cpu_write(x86_ibpb_exit_to_user, true); + /* * Consume any pending interrupts, including the possible source of * VM-Exit on SVM and any ticks that occur between VM-Exit and now. From 556c1ad666ad90c50ec8fccb930dd5046cfbecfb Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 14 Aug 2025 10:20:42 -0700 Subject: [PATCH 0352/1307] x86/vmscape: Enable the mitigation Enable the previously added mitigation for VMscape. Add the cmdline vmscape={off|ibpb|force} and sysfs reporting. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Reviewed-by: Dave Hansen --- .../ABI/testing/sysfs-devices-system-cpu | 1 + .../admin-guide/kernel-parameters.txt | 11 +++ arch/x86/Kconfig | 9 ++ arch/x86/kernel/cpu/bugs.c | 90 +++++++++++++++++++ drivers/base/cpu.c | 3 + include/linux/cpu.h | 1 + 6 files changed, 115 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index ab8cd337f43a..8aed6d94c4cd 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -586,6 +586,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/tsa /sys/devices/system/cpu/vulnerabilities/tsx_async_abort + /sys/devices/system/cpu/vulnerabilities/vmscape Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 747a55abf494..5a7a83c411e9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3829,6 +3829,7 @@ srbds=off [X86,INTEL] ssbd=force-off [ARM64] tsx_async_abort=off [X86] + vmscape=off [X86] Exceptions: This does not have any effect on @@ -8041,6 +8042,16 @@ vmpoff= [KNL,S390] Perform z/VM CP command after power off. Format: + vmscape= [X86] Controls mitigation for VMscape attacks. + VMscape attacks can leak information from a userspace + hypervisor to a guest via speculative side-channels. + + off - disable the mitigation + ibpb - use Indirect Branch Prediction Barrier + (IBPB) mitigation (default) + force - force vulnerability detection even on + unaffected processors + vsyscall= [X86-64,EARLY] Controls the behavior of vsyscalls (i.e. calls to fixed addresses of 0xffffffffff600x00 from legacy diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 58d890fe2100..52c8910ba2ef 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2701,6 +2701,15 @@ config MITIGATION_TSA security vulnerability on AMD CPUs which can lead to forwarding of invalid info to subsequent instructions and thus can affect their timing and thereby cause a leakage. + +config MITIGATION_VMSCAPE + bool "Mitigate VMSCAPE" + depends on KVM + default y + help + Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security + vulnerability on Intel and AMD CPUs that may allow a guest to do + Spectre v2 style attacks on userspace hypervisor. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 410f8df8b77a..c81024dfc4c8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -96,6 +96,9 @@ static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); static void __init tsa_select_mitigation(void); static void __init tsa_apply_mitigation(void); +static void __init vmscape_select_mitigation(void); +static void __init vmscape_update_mitigation(void); +static void __init vmscape_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -270,6 +273,7 @@ void __init cpu_select_mitigations(void) its_select_mitigation(); bhi_select_mitigation(); tsa_select_mitigation(); + vmscape_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -301,6 +305,7 @@ void __init cpu_select_mitigations(void) bhi_update_mitigation(); /* srso_update_mitigation() depends on retbleed_update_mitigation(). */ srso_update_mitigation(); + vmscape_update_mitigation(); spectre_v1_apply_mitigation(); spectre_v2_apply_mitigation(); @@ -318,6 +323,7 @@ void __init cpu_select_mitigations(void) its_apply_mitigation(); bhi_apply_mitigation(); tsa_apply_mitigation(); + vmscape_apply_mitigation(); } /* @@ -3322,6 +3328,77 @@ static void __init srso_apply_mitigation(void) } } +#undef pr_fmt +#define pr_fmt(fmt) "VMSCAPE: " fmt + +enum vmscape_mitigations { + VMSCAPE_MITIGATION_NONE, + VMSCAPE_MITIGATION_AUTO, + VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER, + VMSCAPE_MITIGATION_IBPB_ON_VMEXIT, +}; + +static const char * const vmscape_strings[] = { + [VMSCAPE_MITIGATION_NONE] = "Vulnerable", + /* [VMSCAPE_MITIGATION_AUTO] */ + [VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER] = "Mitigation: IBPB before exit to userspace", + [VMSCAPE_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT", +}; + +static enum vmscape_mitigations vmscape_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE; + +static int __init vmscape_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + vmscape_mitigation = VMSCAPE_MITIGATION_NONE; + } else if (!strcmp(str, "ibpb")) { + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; + } else if (!strcmp(str, "force")) { + setup_force_cpu_bug(X86_BUG_VMSCAPE); + vmscape_mitigation = VMSCAPE_MITIGATION_AUTO; + } else { + pr_err("Ignoring unknown vmscape=%s option.\n", str); + } + + return 0; +} +early_param("vmscape", vmscape_parse_cmdline); + +static void __init vmscape_select_mitigation(void) +{ + if (cpu_mitigations_off() || + !boot_cpu_has_bug(X86_BUG_VMSCAPE) || + !boot_cpu_has(X86_FEATURE_IBPB)) { + vmscape_mitigation = VMSCAPE_MITIGATION_NONE; + return; + } + + if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; +} + +static void __init vmscape_update_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_VMSCAPE)) + return; + + if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB || + srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT) + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT; + + pr_info("%s\n", vmscape_strings[vmscape_mitigation]); +} + +static void __init vmscape_apply_mitigation(void) +{ + if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER) + setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER); +} + #undef pr_fmt #define pr_fmt(fmt) fmt @@ -3570,6 +3647,11 @@ static ssize_t tsa_show_state(char *buf) return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); } +static ssize_t vmscape_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3636,6 +3718,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_TSA: return tsa_show_state(buf); + case X86_BUG_VMSCAPE: + return vmscape_show_state(buf); + default: break; } @@ -3727,6 +3812,11 @@ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *bu { return cpu_show_common(dev, attr, buf, X86_BUG_TSA); } + +ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE); +} #endif void __warn_thunk(void) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index efc575a00edd..008da0354fba 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -603,6 +603,7 @@ CPU_SHOW_VULN_FALLBACK(ghostwrite); CPU_SHOW_VULN_FALLBACK(old_microcode); CPU_SHOW_VULN_FALLBACK(indirect_target_selection); CPU_SHOW_VULN_FALLBACK(tsa); +CPU_SHOW_VULN_FALLBACK(vmscape); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -622,6 +623,7 @@ static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); +static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -642,6 +644,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_old_microcode.attr, &dev_attr_indirect_target_selection.attr, &dev_attr_tsa.attr, + &dev_attr_vmscape.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b91b993f58ee..487b3bf2e1ea 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -83,6 +83,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From 6449f5baf9c78a7a442d64f4a61378a21c5db113 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 14 Aug 2025 10:20:43 -0700 Subject: [PATCH 0353/1307] x86/bugs: Move cpu_bugs_smt_update() down cpu_bugs_smt_update() uses global variables from different mitigations. For SMT updates it can't currently use vmscape_mitigation that is defined after it. Since cpu_bugs_smt_update() depends on many other mitigations, move it after all mitigations are defined. With that, it can use vmscape_mitigation in a moment. No functional change. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Dave Hansen --- arch/x86/kernel/cpu/bugs.c | 165 +++++++++++++++++++------------------ 1 file changed, 83 insertions(+), 82 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c81024dfc4c8..1f8c1c51d057 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2551,88 +2551,6 @@ static void update_mds_branch_idle(void) } } -#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" -#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" -#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" - -void cpu_bugs_smt_update(void) -{ - mutex_lock(&spec_ctrl_mutex); - - if (sched_smt_active() && unprivileged_ebpf_enabled() && - spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) - pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); - - switch (spectre_v2_user_stibp) { - case SPECTRE_V2_USER_NONE: - break; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: - update_stibp_strict(); - break; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: - update_indir_branch_cond(); - break; - } - - switch (mds_mitigation) { - case MDS_MITIGATION_FULL: - case MDS_MITIGATION_AUTO: - case MDS_MITIGATION_VMWERV: - if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) - pr_warn_once(MDS_MSG_SMT); - update_mds_branch_idle(); - break; - case MDS_MITIGATION_OFF: - break; - } - - switch (taa_mitigation) { - case TAA_MITIGATION_VERW: - case TAA_MITIGATION_AUTO: - case TAA_MITIGATION_UCODE_NEEDED: - if (sched_smt_active()) - pr_warn_once(TAA_MSG_SMT); - break; - case TAA_MITIGATION_TSX_DISABLED: - case TAA_MITIGATION_OFF: - break; - } - - switch (mmio_mitigation) { - case MMIO_MITIGATION_VERW: - case MMIO_MITIGATION_AUTO: - case MMIO_MITIGATION_UCODE_NEEDED: - if (sched_smt_active()) - pr_warn_once(MMIO_MSG_SMT); - break; - case MMIO_MITIGATION_OFF: - break; - } - - switch (tsa_mitigation) { - case TSA_MITIGATION_USER_KERNEL: - case TSA_MITIGATION_VM: - case TSA_MITIGATION_AUTO: - case TSA_MITIGATION_FULL: - /* - * TSA-SQ can potentially lead to info leakage between - * SMT threads. - */ - if (sched_smt_active()) - static_branch_enable(&cpu_buf_idle_clear); - else - static_branch_disable(&cpu_buf_idle_clear); - break; - case TSA_MITIGATION_NONE: - case TSA_MITIGATION_UCODE_NEEDED: - break; - } - - mutex_unlock(&spec_ctrl_mutex); -} - #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt @@ -3402,6 +3320,89 @@ static void __init vmscape_apply_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) fmt +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" +#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" +#define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n" + +void cpu_bugs_smt_update(void) +{ + mutex_lock(&spec_ctrl_mutex); + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + + switch (spectre_v2_user_stibp) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + case SPECTRE_V2_USER_STRICT_PREFERRED: + update_stibp_strict(); + break; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + update_indir_branch_cond(); + break; + } + + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_AUTO: + case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); + update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } + + switch (taa_mitigation) { + case TAA_MITIGATION_VERW: + case TAA_MITIGATION_AUTO: + case TAA_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(TAA_MSG_SMT); + break; + case TAA_MITIGATION_TSX_DISABLED: + case TAA_MITIGATION_OFF: + break; + } + + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_AUTO: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + + mutex_unlock(&spec_ctrl_mutex); +} + #ifdef CONFIG_SYSFS #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" From b7cc9887231526ca4fa89f3fa4119e47c2dc7b1e Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 14 Aug 2025 10:20:43 -0700 Subject: [PATCH 0354/1307] x86/vmscape: Warn when STIBP is disabled with SMT Cross-thread attacks are generally harder as they require the victim to be co-located on a core. However, with VMSCAPE the adversary targets belong to the same guest execution, that are more likely to get co-located. In particular, a thread that is currently executing userspace hypervisor (after the IBPB) may still be targeted by a guest execution from a sibling thread. Issue a warning about the potential risk, except when: - SMT is disabled - STIBP is enabled system-wide - Intel eIBRS is enabled (which implies STIBP protection) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen --- arch/x86/kernel/cpu/bugs.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1f8c1c51d057..fa32615db71d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -3400,6 +3400,28 @@ void cpu_bugs_smt_update(void) break; } + switch (vmscape_mitigation) { + case VMSCAPE_MITIGATION_NONE: + case VMSCAPE_MITIGATION_AUTO: + break; + case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT: + case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER: + /* + * Hypervisors can be attacked across-threads, warn for SMT when + * STIBP is not already enabled system-wide. + * + * Intel eIBRS (!AUTOIBRS) implies STIBP on. + */ + if (!sched_smt_active() || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS))) + break; + pr_warn_once(VMSCAPE_MSG_SMT); + break; + } + mutex_unlock(&spec_ctrl_mutex); } From 1548549e17e374a126e9a4e9edab8bb041fbd67e Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Wed, 13 Aug 2025 13:16:33 +0200 Subject: [PATCH 0355/1307] MAINTAINERS: update s390/net Remove Thorsten Winkler as maintainer and add Aswin Karuvally as reviewer. Thank you Thorsten for your support, welcome Aswin! Signed-off-by: Alexandra Winter Acked-by: Thorsten Winkler Acked-by: Aswin Karuvally Link: https://patch.msgid.link/20250813111633.241111-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index daf520a13bdf..2720544cd91f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22174,7 +22174,7 @@ F: arch/s390/mm S390 NETWORK DRIVERS M: Alexandra Winter -M: Thorsten Winkler +R: Aswin Karuvally L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported From fd980bf6e9cdae885105685259421164f843ca55 Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 13 Aug 2025 19:25:59 +0530 Subject: [PATCH 0356/1307] net: xilinx: axienet: Fix RX skb ring management in DMAengine mode Submit multiple descriptors in axienet_rx_cb() to fill Rx skb ring. This ensures the ring "catches up" on previously missed allocations. Increment Rx skb ring head pointer after BD is successfully allocated. Previously, head pointer was incremented before verifying if descriptor is successfully allocated and has valid entries, which could lead to ring state inconsistency if descriptor setup failed. These changes improve reliability by maintaining adequate descriptor availability and ensuring proper ring buffer state management. Fixes: 6a91b846af85 ("net: axienet: Introduce dmaengine support") Signed-off-by: Suraj Gupta Link: https://patch.msgid.link/20250813135559.1555652-1-suraj.gupta2@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 6011d7eae0c7..0d8a05fe541a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1160,6 +1160,7 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) struct axienet_local *lp = data; struct sk_buff *skb; u32 *app_metadata; + int i; skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++); skb = skbuf_dma->skb; @@ -1178,7 +1179,10 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) u64_stats_add(&lp->rx_packets, 1); u64_stats_add(&lp->rx_bytes, rx_len); u64_stats_update_end(&lp->rx_stat_sync); - axienet_rx_submit_desc(lp->ndev); + + for (i = 0; i < CIRC_SPACE(lp->rx_ring_head, lp->rx_ring_tail, + RX_BUF_NUM_DEFAULT); i++) + axienet_rx_submit_desc(lp->ndev); dma_async_issue_pending(lp->rx_chan); } @@ -1457,7 +1461,6 @@ static void axienet_rx_submit_desc(struct net_device *ndev) if (!skbuf_dma) return; - lp->rx_ring_head++; skb = netdev_alloc_skb(ndev, lp->max_frm_size); if (!skb) return; @@ -1482,6 +1485,7 @@ static void axienet_rx_submit_desc(struct net_device *ndev) skbuf_dma->desc = dma_rx_desc; dma_rx_desc->callback_param = lp; dma_rx_desc->callback_result = axienet_dma_rx_cb; + lp->rx_ring_head++; dmaengine_submit(dma_rx_desc); return; From d1547bf460baec718b3398365f8de33d25c5f36f Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Wed, 13 Aug 2025 10:10:54 +0800 Subject: [PATCH 0357/1307] net: bridge: fix soft lockup in br_multicast_query_expired() When set multicast_query_interval to a large value, the local variable 'time' in br_multicast_send_query() may overflow. If the time is smaller than jiffies, the timer will expire immediately, and then call mod_timer() again, which creates a loop and may trigger the following soft lockup issue. watchdog: BUG: soft lockup - CPU#1 stuck for 221s! [rb_consumer:66] CPU: 1 UID: 0 PID: 66 Comm: rb_consumer Not tainted 6.16.0+ #259 PREEMPT(none) Call Trace: __netdev_alloc_skb+0x2e/0x3a0 br_ip6_multicast_alloc_query+0x212/0x1b70 __br_multicast_send_query+0x376/0xac0 br_multicast_send_query+0x299/0x510 br_multicast_query_expired.constprop.0+0x16d/0x1b0 call_timer_fn+0x3b/0x2a0 __run_timers+0x619/0x950 run_timer_softirq+0x11c/0x220 handle_softirqs+0x18e/0x560 __irq_exit_rcu+0x158/0x1a0 sysvec_apic_timer_interrupt+0x76/0x90 This issue can be reproduced with: ip link add br0 type bridge echo 1 > /sys/class/net/br0/bridge/multicast_querier echo 0xffffffffffffffff > /sys/class/net/br0/bridge/multicast_query_interval ip link set dev br0 up The multicast_startup_query_interval can also cause this issue. Similar to the commit 99b40610956a ("net: bridge: mcast: add and enforce query interval minimum"), add check for the query interval maximum to fix this issue. Link: https://lore.kernel.org/netdev/20250806094941.1285944-1-wangliang74@huawei.com/ Link: https://lore.kernel.org/netdev/20250812091818.542238-1-wangliang74@huawei.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Suggested-by: Nikolay Aleksandrov Signed-off-by: Wang Liang Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250813021054.1643649-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_private.h | 2 ++ 2 files changed, 18 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1377f31b719c..8ce145938b02 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4818,6 +4818,14 @@ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN; } + if (intvl_jiffies > BR_MULTICAST_QUERY_INTVL_MAX) { + br_info(brmctx->br, + "trying to set multicast query interval above maximum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MAX), + jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MAX)); + intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MAX; + } + brmctx->multicast_query_interval = intvl_jiffies; } @@ -4834,6 +4842,14 @@ void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN; } + if (intvl_jiffies > BR_MULTICAST_STARTUP_QUERY_INTVL_MAX) { + br_info(brmctx->br, + "trying to set multicast startup query interval above maximum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MAX), + jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MAX)); + intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MAX; + } + brmctx->multicast_startup_query_interval = intvl_jiffies; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b159aae594c0..8de0904b9627 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -31,6 +31,8 @@ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000) #define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN +#define BR_MULTICAST_QUERY_INTVL_MAX msecs_to_jiffies(86400000) /* 24 hours */ +#define BR_MULTICAST_STARTUP_QUERY_INTVL_MAX BR_MULTICAST_QUERY_INTVL_MAX #define BR_HWDOM_MAX BITS_PER_LONG From 52bf272636bda69587952b35ae97690b8dc89941 Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 12 Aug 2025 23:57:57 +0000 Subject: [PATCH 0358/1307] net/sched: Fix backlog accounting in qdisc_dequeue_internal This issue applies for the following qdiscs: hhf, fq, fq_codel, and fq_pie, and occurs in their change handlers when adjusting to the new limit. The problem is the following in the values passed to the subsequent qdisc_tree_reduce_backlog call given a tbf parent: When the tbf parent runs out of tokens, skbs of these qdiscs will be placed in gso_skb. Their peek handlers are qdisc_peek_dequeued, which accounts for both qlen and backlog. However, in the case of qdisc_dequeue_internal, ONLY qlen is accounted for when pulling from gso_skb. This means that these qdiscs are missing a qdisc_qstats_backlog_dec when dropping packets to satisfy the new limit in their change handlers. One can observe this issue with the following (with tc patched to support a limit of 0): export TARGET=fq tc qdisc del dev lo root tc qdisc add dev lo root handle 1: tbf rate 8bit burst 100b latency 1ms tc qdisc replace dev lo handle 3: parent 1:1 $TARGET limit 1000 echo ''; echo 'add child'; tc -s -d qdisc show dev lo ping -I lo -f -c2 -s32 -W0.001 127.0.0.1 2>&1 >/dev/null echo ''; echo 'after ping'; tc -s -d qdisc show dev lo tc qdisc change dev lo handle 3: parent 1:1 $TARGET limit 0 echo ''; echo 'after limit drop'; tc -s -d qdisc show dev lo tc qdisc replace dev lo handle 2: parent 1:1 sfq echo ''; echo 'post graft'; tc -s -d qdisc show dev lo The second to last show command shows 0 packets but a positive number (74) of backlog bytes. The problem becomes clearer in the last show command, where qdisc_purge_queue triggers qdisc_tree_reduce_backlog with the positive backlog and causes an underflow in the tbf parent's backlog (4096 Mb instead of 0). To fix this issue, the codepath for all clients of qdisc_dequeue_internal has been simplified: codel, pie, hhf, fq, fq_pie, and fq_codel. qdisc_dequeue_internal handles the backlog adjustments for all cases that do not directly use the dequeue handler. The old fq_codel_change limit adjustment loop accumulated the arguments to the subsequent qdisc_tree_reduce_backlog call through the cstats field. However, this is confusing and error prone as fq_codel_dequeue could also potentially mutate this field (which qdisc_dequeue_internal calls in the non gso_skb case), so we have unified the code here with other qdiscs. Fixes: 2d3cbfd6d54a ("net_sched: Flush gso_skb list too during ->change()") Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250812235725.45243-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 11 ++++++++--- net/sched/sch_codel.c | 12 +++++++----- net/sched/sch_fq.c | 12 +++++++----- net/sched/sch_fq_codel.c | 12 +++++++----- net/sched/sch_fq_pie.c | 12 +++++++----- net/sched/sch_hhf.c | 12 +++++++----- net/sched/sch_pie.c | 12 +++++++----- 7 files changed, 50 insertions(+), 33 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 638948be4c50..738cd5b13c62 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1038,12 +1038,17 @@ static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool dir skb = __skb_dequeue(&sch->gso_skb); if (skb) { sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); return skb; } - if (direct) - return __qdisc_dequeue_head(&sch->q); - else + if (direct) { + skb = __qdisc_dequeue_head(&sch->q); + if (skb) + qdisc_qstats_backlog_dec(sch, skb); + return skb; + } else { return sch->dequeue(sch); + } } static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index c93761040c6e..fa0314679e43 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -101,9 +101,9 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { static int codel_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CODEL_MAX + 1]; - unsigned int qlen, dropped = 0; int err; err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt, @@ -142,15 +142,17 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, WRITE_ONCE(q->params.ecn, !!nla_get_u32(tb[TCA_CODEL_ECN])); - qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, true); - dropped += qdisc_pkt_len(skb); - qdisc_qstats_backlog_dec(sch, skb); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_qdisc_drop(skb, sch); } - qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 902ff5470607..fee922da2f99 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1013,11 +1013,11 @@ static int fq_load_priomap(struct fq_sched_data *q, static int fq_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct fq_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_MAX + 1]; - int err, drop_count = 0; - unsigned drop_len = 0; u32 fq_log; + int err; err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy, NULL); @@ -1135,16 +1135,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, err = fq_resize(sch, fq_log); sch_tree_lock(sch); } + while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); if (!skb) break; - drop_len += qdisc_pkt_len(skb); + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); - drop_count++; } - qdisc_tree_reduce_backlog(sch, drop_count, drop_len); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return err; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 2a0f3a513bfa..a14142392939 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -366,6 +366,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; u32 quantum = 0; @@ -443,13 +444,14 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->memory_usage > q->memory_limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); - q->cstats.drop_len += qdisc_pkt_len(skb); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); - q->cstats.drop_count++; } - qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); - q->cstats.drop_count = 0; - q->cstats.drop_len = 0; + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index b0e34daf1f75..7b96bc3ff891 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -287,10 +287,9 @@ static struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch) static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct fq_pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_PIE_MAX + 1]; - unsigned int len_dropped = 0; - unsigned int num_dropped = 0; int err; err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack); @@ -368,11 +367,14 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); - len_dropped += qdisc_pkt_len(skb); - num_dropped += 1; + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); } - qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 5aa434b46707..2d4855e28a28 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -508,9 +508,9 @@ static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { static int hhf_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_HHF_MAX + 1]; - unsigned int qlen, prev_backlog; int err; u64 non_hh_quantum; u32 new_quantum = q->quantum; @@ -561,15 +561,17 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, usecs_to_jiffies(us)); } - qlen = sch->q.qlen; - prev_backlog = sch->qstats.backlog; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, false); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); } - qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, - prev_backlog - sch->qstats.backlog); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index ad46ee3ed5a9..0a377313b6a9 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -141,9 +141,9 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { static int pie_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_PIE_MAX + 1]; - unsigned int qlen, dropped = 0; int err; err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, @@ -193,15 +193,17 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR])); /* Drop excess packets if new limit is lower */ - qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = qdisc_dequeue_internal(sch, true); - dropped += qdisc_pkt_len(skb); - qdisc_qstats_backlog_dec(sch, skb); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_qdisc_drop(skb, sch); } - qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; From 8c06cbdcbaea34d7b96d76df4d6669275c1d291a Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 12 Aug 2025 23:58:26 +0000 Subject: [PATCH 0359/1307] selftests/tc-testing: Check backlog stats in gso_skb case Add tests to ensure proper backlog accounting in hhf, codel, pie, fq, fq_pie, and fq_codel qdiscs. We check for the bug pattern originally found in fq, fq_pie, and fq_codel, which was an underflow in the tbf parent backlog stats upon child qdisc removal. Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250812235808.45281-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 198 ++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 23a61e5b99d0..998e5a2f4579 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -185,6 +185,204 @@ "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] }, + { + "id": "34c0", + "name": "Test TBF with HHF Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "hhf" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 hhf limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 hhf limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "fd68", + "name": "Test TBF with CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "514e", + "name": "Test TBF with PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "6c97", + "name": "Test TBF with FQ Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "5d0b", + "name": "Test TBF with FQ_CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "21c3", + "name": "Test TBF with FQ_PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, { "id": "a4bb", "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", From 065c31f2c6915b38f45b1c817b31f41f62eaa774 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Wed, 13 Aug 2025 15:16:31 +0800 Subject: [PATCH 0360/1307] rtase: Fix Rx descriptor CRC error bit definition The CRC error bit is located at bit 17 in the Rx descriptor, but the driver was incorrectly using bit 16. Fix it. Fixes: a36e9f5cfe9e ("rtase: Add support for a pci table in this module") Signed-off-by: Justin Lai Link: https://patch.msgid.link/20250813071631.7566-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h index 20decdeb9fdb..b9209eb6ea73 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase.h +++ b/drivers/net/ethernet/realtek/rtase/rtase.h @@ -241,7 +241,7 @@ union rtase_rx_desc { #define RTASE_RX_RES BIT(20) #define RTASE_RX_RUNT BIT(19) #define RTASE_RX_RWT BIT(18) -#define RTASE_RX_CRC BIT(16) +#define RTASE_RX_CRC BIT(17) #define RTASE_RX_V6F BIT(31) #define RTASE_RX_V4F BIT(30) #define RTASE_RX_UDPT BIT(29) From d73915fdc0011d536c03856be7ec451f6a5fb4ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 15:42:18 -0700 Subject: [PATCH 0361/1307] lib/crypto: sha: Update Kconfig help for SHA1 and SHA256 Update the help text for CRYPTO_LIB_SHA1 and CRYPTO_LIB_SHA256 to reflect the addition of HMAC support, and to be consistent with CRYPTO_LIB_SHA512. Link: https://lore.kernel.org/r/20250731224218.137947-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index c2b65b6a9bb6..1e6b008f8fca 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -140,8 +140,8 @@ config CRYPTO_LIB_CHACHA20POLY1305 config CRYPTO_LIB_SHA1 tristate help - The SHA-1 library functions. Select this if your module uses any of - the functions from . + The SHA-1 and HMAC-SHA1 library functions. Select this if your module + uses any of the functions from . config CRYPTO_LIB_SHA1_ARCH bool @@ -157,9 +157,9 @@ config CRYPTO_LIB_SHA1_ARCH config CRYPTO_LIB_SHA256 tristate help - Enable the SHA-256 library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. + The SHA-224, SHA-256, HMAC-SHA224, and HMAC-SHA256 library functions. + Select this if your module uses any of these functions from + . config CRYPTO_LIB_SHA256_ARCH bool From fd7e5de4b2eddd34e3567cd419812d8869ef4f13 Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Thu, 14 Aug 2025 02:51:57 -0400 Subject: [PATCH 0362/1307] lib/crypto: ensure generated *.S files are removed on make clean make clean does not check the kernel config when removing files. As such, additions to clean-files under CONFIG_ARM or CONFIG_ARM64 are not evaluated. For example, when building on arm64, this means that lib/crypto/arm64/sha{256,512}-core.S are left over after make clean. Set clean-files unconditionally to ensure that make clean removes these files. Fixes: e96cb9507f2d ("lib/crypto: sha256: Consolidate into single module") Fixes: 24c91b62ac50 ("lib/crypto: arm/sha512: Migrate optimized SHA-512 code to library") Fixes: 60e3f1e9b7a5 ("lib/crypto: arm64/sha512: Migrate optimized SHA-512 code to library") Signed-off-by: Tal Zussman Link: https://lore.kernel.org/r/20250814-crypto_clean-v2-1-659a2dc86302@columbia.edu Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e4151be2ebd4..539d5d59a50e 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -100,7 +100,6 @@ ifeq ($(CONFIG_ARM),y) libsha256-y += arm/sha256-ce.o arm/sha256-core.o $(obj)/arm/sha256-core.S: $(src)/arm/sha256-armv4.pl $(call cmd,perlasm) -clean-files += arm/sha256-core.S AFLAGS_arm/sha256-core.o += $(aflags-thumb2-y) endif @@ -108,7 +107,6 @@ ifeq ($(CONFIG_ARM64),y) libsha256-y += arm64/sha256-core.o $(obj)/arm64/sha256-core.S: $(src)/arm64/sha2-armv8.pl $(call cmd,perlasm_with_args) -clean-files += arm64/sha256-core.S libsha256-$(CONFIG_KERNEL_MODE_NEON) += arm64/sha256-ce.o endif @@ -132,7 +130,6 @@ ifeq ($(CONFIG_ARM),y) libsha512-y += arm/sha512-core.o $(obj)/arm/sha512-core.S: $(src)/arm/sha512-armv4.pl $(call cmd,perlasm) -clean-files += arm/sha512-core.S AFLAGS_arm/sha512-core.o += $(aflags-thumb2-y) endif @@ -140,7 +137,6 @@ ifeq ($(CONFIG_ARM64),y) libsha512-y += arm64/sha512-core.o $(obj)/arm64/sha512-core.S: $(src)/arm64/sha2-armv8.pl $(call cmd,perlasm_with_args) -clean-files += arm64/sha512-core.S libsha512-$(CONFIG_KERNEL_MODE_NEON) += arm64/sha512-ce-core.o endif @@ -167,3 +163,7 @@ obj-$(CONFIG_PPC) += powerpc/ obj-$(CONFIG_RISCV) += riscv/ obj-$(CONFIG_S390) += s390/ obj-$(CONFIG_X86) += x86/ + +# clean-files must be defined unconditionally +clean-files += arm/sha256-core.S arm/sha512-core.S +clean-files += arm64/sha256-core.S arm64/sha512-core.S From fad2cf04e91fd3c4310731537bf98d1f8a0b4137 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Aug 2025 10:28:05 +0200 Subject: [PATCH 0363/1307] scsi: fnic: Remove a useless struct mempool forward declaration struct mempool doesn't currently exist, and thus also isn't used in fnic.h, remove it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250812082808.371119-1-hch@lst.de Reviewed-by: Karan Tilak Kumar Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index c2fdc6553e62..1199d701c3f5 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -323,8 +323,6 @@ enum fnic_state { FNIC_IN_ETH_TRANS_FC_MODE, }; -struct mempool; - enum fnic_role_e { FNIC_ROLE_FCP_INITIATOR = 0, }; From 50a8c08b8b69399a09c2dbcad8ef3fef9d9349d2 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Wed, 6 Aug 2025 17:52:24 +0800 Subject: [PATCH 0364/1307] drm/bridge: analogix_dp: Fix bailout for devm_drm_bridge_alloc() devm_drm_bridge_alloc() returns ERR_PTR on failure instead of a NULL pointer, so use IS_ERR() to check the returned pointer. While at it, on failure, return ERR_CAST(dp) instead of ERR_PTR(-ENOMEM) in order not to depend on devm_drm_bridge_alloc() error code implementation. Fixes: 48f05c3b4b70 ("drm/bridge: analogix_dp: Use devm_drm_bridge_alloc() API") Signed-off-by: Liu Ying Reviewed-by: Luca Ceresoli Link: https://lore.kernel.org/r/20250806095224.527938-1-victor.liu@nxp.com --- drivers/gpu/drm/bridge/analogix/analogix_dp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index ed35e567d117..efe534977d12 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1474,8 +1474,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) dp = devm_drm_bridge_alloc(dev, struct analogix_dp_device, bridge, &analogix_dp_bridge_funcs); - if (!dp) - return ERR_PTR(-ENOMEM); + if (IS_ERR(dp)) + return ERR_CAST(dp); dp->dev = &pdev->dev; dp->dpms_mode = DRM_MODE_DPMS_OFF; From 823f95575d85454ccad7d5b684aec42e57b962f6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 12 Aug 2025 16:02:59 +0300 Subject: [PATCH 0365/1307] scsi: ufs: ufs-pci: Add support for Intel Wildcat Lake Add PCI ID to support Intel Wildcat Lake, same as MTL. Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20250812130259.109645-1-adrian.hunter@intel.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index b39239f641f2..b87e03777395 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -630,6 +630,7 @@ static const struct pci_device_id ufshcd_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xA847), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { PCI_VDEVICE(INTEL, 0x7747), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { PCI_VDEVICE(INTEL, 0xE447), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x4D47), (kernel_ulong_t)&ufs_intel_mtl_hba_vops }, { } /* terminate list */ }; From 9dcf111dd3e7ed5fce82bb108e3a3fc001c07225 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Aug 2025 08:49:08 +0300 Subject: [PATCH 0366/1307] scsi: qla4xxx: Prevent a potential error pointer dereference The qla4xxx_get_ep_fwdb() function is supposed to return NULL on error, but qla4xxx_ep_connect() returns error pointers. Propagating the error pointers will lead to an Oops in the caller, so change the error pointers to NULL. Fixes: 13483730a13b ("[SCSI] qla4xxx: fix flash/ddb support") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/aJwnVKS9tHsw1tEu@stanley.mountain Reviewed-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index a39f1da4ce47..a761c0aa5127 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -6606,6 +6606,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0); vfree(dst_addr); + if (IS_ERR(ep)) + return NULL; return ep; } From 2b986b9e917bc88f81aa1ed386af63b26c983f1d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 14 Aug 2025 20:24:37 +0200 Subject: [PATCH 0367/1307] bpf, cpumap: Disable page_pool direct xdp_return need larger scope When running an XDP bpf_prog on the remote CPU in cpumap code then we must disable the direct return optimization that xdp_return can perform for mem_type page_pool. This optimization assumes code is still executing under RX-NAPI of the original receiving CPU, which isn't true on this remote CPU. The cpumap code already disabled this via helpers xdp_set_return_frame_no_direct() and xdp_clear_return_frame_no_direct(), but the scope didn't include xdp_do_flush(). When doing XDP_REDIRECT towards e.g devmap this causes the function bq_xmit_all() to run with direct return optimization enabled. This can lead to hard to find bugs. The issue only happens when bq_xmit_all() cannot ndo_xdp_xmit all frames and them frees them via xdp_return_frame_rx_napi(). Fix by expanding scope to include xdp_do_flush(). This was found by Dragos Tatulea. Fixes: 11941f8a8536 ("bpf: cpumap: Implement generic cpumap") Reported-by: Dragos Tatulea Reported-by: Chris Arges Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Tested-by: Chris Arges Link: https://patch.msgid.link/175519587755.3008742.1088294435150406835.stgit@firesoul --- kernel/bpf/cpumap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index b2b7b8ec2c2a..c46360b27871 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -186,7 +186,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, struct xdp_buff xdp; int i, nframes = 0; - xdp_set_return_frame_no_direct(); xdp.rxq = &rxq; for (i = 0; i < n; i++) { @@ -231,7 +230,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, } } - xdp_clear_return_frame_no_direct(); stats->pass += nframes; return nframes; @@ -255,6 +253,7 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, rcu_read_lock(); bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + xdp_set_return_frame_no_direct(); ret->xdp_n = cpu_map_bpf_prog_run_xdp(rcpu, frames, ret->xdp_n, stats); if (unlikely(ret->skb_n)) @@ -264,6 +263,7 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, if (stats->redirect) xdp_do_flush(); + xdp_clear_return_frame_no_direct(); bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); From e4414b01c1cd9887bbde92f946c1ba94e40d6d64 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 14 Aug 2025 22:06:55 +0200 Subject: [PATCH 0368/1307] bpf: Check the helper function is valid in get_helper_proto kernel test robot reported verifier bug [1] where the helper func pointer could be NULL due to disabled config option. As Alexei suggested we could check on that in get_helper_proto directly. Marking tail_call helper func with BPF_PTR_POISON, because it is unused by design. [1] https://lore.kernel.org/oe-lkp/202507160818.68358831-lkp@intel.com Reported-by: kernel test robot Reported-by: syzbot+a9ed3d9132939852d0df@syzkaller.appspotmail.com Suggested-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Paul Chaignon Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20250814200655.945632-1-jolsa@kernel.org Closes: https://lore.kernel.org/oe-lkp/202507160818.68358831-lkp@intel.com --- kernel/bpf/core.c | 5 ++++- kernel/bpf/verifier.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5d1650af899d..f8ac77d08ca7 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -3024,7 +3024,10 @@ EXPORT_SYMBOL_GPL(bpf_event_output); /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { - .func = NULL, + /* func is unused for tail_call, we set it to pass the + * get_helper_proto check + */ + .func = BPF_PTR_POISON, .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_CTX, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c4f69a9e9af6..c89e2b1bc644 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11354,7 +11354,7 @@ static int get_helper_proto(struct bpf_verifier_env *env, int func_id, return -EINVAL; *ptr = env->ops->get_func_proto(func_id, env->prog); - return *ptr ? 0 : -EINVAL; + return *ptr && (*ptr)->func ? 0 : -EINVAL; } static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, From 8503d0fcb1086a7cfe26df67ca4bd9bd9e99bdec Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 Aug 2025 08:40:27 -0700 Subject: [PATCH 0369/1307] iommu/amd: Avoid stack buffer overflow from kernel cmdline While the kernel command line is considered trusted in most environments, avoid writing 1 byte past the end of "acpiid" if the "str" argument is maximum length. Reported-by: Simcha Kosman Closes: https://lore.kernel.org/all/AS8P193MB2271C4B24BCEDA31830F37AE84A52@AS8P193MB2271.EURP193.PROD.OUTLOOK.COM Fixes: b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter") Signed-off-by: Kees Cook Reviewed-by: Ankit Soni Link: https://lore.kernel.org/r/20250804154023.work.970-kees@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 7b5af6176de9..8de689b2c5ed 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3638,7 +3638,7 @@ static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; char *hid, *uid, *p, *addr; - char acpiid[ACPIID_LEN] = {0}; + char acpiid[ACPIID_LEN + 1] = { }; /* size with NULL terminator */ int i; addr = strchr(str, '@'); @@ -3664,7 +3664,7 @@ static int __init parse_ivrs_acpihid(char *str) /* We have the '@', make it the terminator to get just the acpiid */ *addr++ = 0; - if (strlen(str) > ACPIID_LEN + 1) + if (strlen(str) > ACPIID_LEN) goto not_found; if (sscanf(str, "=%s", acpiid) != 1) From 41f0200c718cf1826959a082a5374838c15bd242 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 14 Aug 2025 12:30:38 -0700 Subject: [PATCH 0370/1307] iommu/tegra241-cmdqv: Fix missing cpu_to_le64 at lvcmdq_err_map Sparse reported a warning: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:305:47: sparse: expected restricted __le64 sparse: got unsigned long long Add cpu_to_le64() to fix that. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508142105.Jb5Smjsg-lkp@intel.com/ Suggested-by: Pranjal Shrivastava Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20250814193039.2265813-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index be1aaaf8cd17..378104cd395e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -301,9 +301,11 @@ static void tegra241_vintf_user_handle_error(struct tegra241_vintf *vintf) struct iommu_vevent_tegra241_cmdqv vevent_data; int i; - for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) - vevent_data.lvcmdq_err_map[i] = - readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) { + u64 err = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + + vevent_data.lvcmdq_err_map[i] = cpu_to_le64(err); + } iommufd_viommu_report_event(viommu, IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV, &vevent_data, sizeof(vevent_data)); From b64fdd422a85025b5e91ead794db9d3ef970e369 Mon Sep 17 00:00:00 2001 From: Yunseong Kim Date: Tue, 12 Aug 2025 18:10:47 +0000 Subject: [PATCH 0371/1307] perf: Avoid undefined behavior from stopping/starting inactive events Calling pmu->start()/stop() on perf events in PERF_EVENT_STATE_OFF can leave event->hw.idx at -1. When PMU drivers later attempt to use this negative index as a shift exponent in bitwise operations, it leads to UBSAN shift-out-of-bounds reports. The issue is a logical flaw in how event groups handle throttling when some members are intentionally disabled. Based on the analysis and the reproducer provided by Mark Rutland (this issue on both arm64 and x86-64). The scenario unfolds as follows: 1. A group leader event is configured with a very aggressive sampling period (e.g., sample_period = 1). This causes frequent interrupts and triggers the throttling mechanism. 2. A child event in the same group is created in a disabled state (.disabled = 1). This event remains in PERF_EVENT_STATE_OFF. Since it hasn't been scheduled onto the PMU, its event->hw.idx remains initialized at -1. 3. When throttling occurs, perf_event_throttle_group() and later perf_event_unthrottle_group() iterate through all siblings, including the disabled child event. 4. perf_event_throttle()/unthrottle() are called on this inactive child event, which then call event->pmu->start()/stop(). 5. The PMU driver receives the event with hw.idx == -1 and attempts to use it as a shift exponent. e.g., in macros like PMCNTENSET(idx), leading to the UBSAN report. The throttling mechanism attempts to start/stop events that are not actively scheduled on the hardware. Move the state check into perf_event_throttle()/perf_event_unthrottle() so that inactive events are skipped entirely. This ensures only active events with a valid hw.idx are processed, preventing undefined behavior and silencing UBSAN warnings. The corrected check ensures true before proceeding with PMU operations. The problem can be reproduced with the syzkaller reproducer: Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group") Signed-off-by: Yunseong Kim Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20250812181046.292382-2-ysk@kzalloc.com --- kernel/events/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 8060c2857bb2..872122e074e5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2665,6 +2665,9 @@ static void perf_log_itrace_start(struct perf_event *event); static void perf_event_unthrottle(struct perf_event *event, bool start) { + if (event->state != PERF_EVENT_STATE_ACTIVE) + return; + event->hw.interrupts = 0; if (start) event->pmu->start(event, 0); @@ -2674,6 +2677,9 @@ static void perf_event_unthrottle(struct perf_event *event, bool start) static void perf_event_throttle(struct perf_event *event) { + if (event->state != PERF_EVENT_STATE_ACTIVE) + return; + event->hw.interrupts = MAX_INTERRUPTS; event->pmu->stop(event, 0); if (event == event->group_leader) From de5d7d3f27ddd4046736f558a40e252ddda82013 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 28 Jul 2025 17:08:44 +0800 Subject: [PATCH 0372/1307] Bluetooth: hci_sync: Avoid adding default advertising on startup list_empty(&hdev->adv_instances) is always true during startup, so an advertising instance is added by default. Call trace: dump_backtrace+0x94/0xec show_stack+0x18/0x24 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x24 hci_setup_ext_adv_instance_sync+0x17c/0x328 hci_powered_update_adv_sync+0xb4/0x12c hci_powered_update_sync+0x54/0x70 hci_power_on_sync+0xe4/0x278 hci_set_powered_sync+0x28/0x34 set_powered_sync+0x40/0x58 hci_cmd_sync_work+0x94/0x100 process_one_work+0x168/0x444 worker_thread+0x378/0x3f4 kthread+0x108/0x10c ret_from_fork+0x10/0x20 Link: https://github.com/bluez/bluez/issues/1442 Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 2b4f21fbf9c1..7397b6b50ccb 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3344,7 +3344,7 @@ static int hci_powered_update_adv_sync(struct hci_dev *hdev) * advertising data. This also applies to the case * where BR/EDR was toggled during the AUTO_OFF phase. */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) && list_empty(&hdev->adv_instances)) { if (ext_adv_capable(hdev)) { err = hci_setup_ext_adv_instance_sync(hdev, 0x00); From ca88be1a2725a42f8dbad579181611d9dcca8e88 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 24 Jul 2025 16:43:18 -0400 Subject: [PATCH 0373/1307] Bluetooth: hci_sync: Fix scan state after PA Sync has been established Passive scanning is used to program the address of the peer to be synchronized, so once HCI_EV_LE_PA_SYNC_ESTABLISHED is received it needs to be updated after clearing HCI_PA_SYNC then call hci_update_passive_scan_sync to return it to its original state. Fixes: 6d0417e4e1cf ("Bluetooth: hci_conn: Fix not setting conn_timeout for Broadcast Receiver") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 7397b6b50ccb..387c128f2ba0 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6985,8 +6985,6 @@ static void create_pa_complete(struct hci_dev *hdev, void *data, int err) hci_dev_lock(hdev); - hci_dev_clear_flag(hdev, HCI_PA_SYNC); - if (!hci_conn_valid(hdev, conn)) clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); @@ -7080,6 +7078,11 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + + /* Update passive scan since HCI_PA_SYNC flag has been cleared */ + hci_update_passive_scan_sync(hdev); + return err; } From aee29c18a38d479c2f058c9b6a39b0527cf81d10 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 24 Jul 2025 16:36:27 -0400 Subject: [PATCH 0374/1307] Bluetooth: ISO: Fix getname not returning broadcast fields getname shall return iso_bc fields for both BIS_LINK and PA_LINK since the likes of bluetoothd do use the getpeername to retrieve the SID both when enumerating the broadcasters and when synchronizing. Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 7bd3aa0a6db9..eaffd25570e3 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1347,7 +1347,7 @@ static int iso_sock_getname(struct socket *sock, struct sockaddr *addr, bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst); sa->iso_bdaddr_type = iso_pi(sk)->dst_type; - if (hcon && hcon->type == BIS_LINK) { + if (hcon && (hcon->type == BIS_LINK || hcon->type == PA_LINK)) { sa->iso_bc->bc_sid = iso_pi(sk)->bc_sid; sa->iso_bc->bc_num_bis = iso_pi(sk)->bc_num_bis; memcpy(sa->iso_bc->bc_bis, iso_pi(sk)->bc_bis, From d36349ea73d805bb72cbc24ab90cb1da4ad5c379 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 28 Jul 2025 13:51:01 -0400 Subject: [PATCH 0375/1307] Bluetooth: hci_conn: Fix running bis_cleanup for hci_conn->type PA_LINK Connections with type of PA_LINK shall be considered temporary just to track the lifetime of PA Sync setup, once the BIG Sync is established and connection are created with BIS_LINK the existing PA_LINK connection shall not longer use bis_cleanup otherwise it terminates the PA Sync when that shall be left to BIS_LINK connection to do it. Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 12 +++++++++++- net/bluetooth/hci_event.c | 7 ++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7d1e79f69cd1..f8b20b609a03 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -830,7 +830,17 @@ static void bis_cleanup(struct hci_conn *conn) /* Check if ISO connection is a BIS and terminate advertising * set and BIG if there are no other connections using it. */ - bis = hci_conn_hash_lookup_big(hdev, conn->iso_qos.bcast.big); + bis = hci_conn_hash_lookup_big_state(hdev, + conn->iso_qos.bcast.big, + BT_CONNECTED, + HCI_ROLE_MASTER); + if (bis) + return; + + bis = hci_conn_hash_lookup_big_state(hdev, + conn->iso_qos.bcast.big, + BT_CONNECT, + HCI_ROLE_MASTER); if (bis) return; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8aa5039b975a..4f0a6116291e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6957,9 +6957,14 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, continue; } - if (ev->status != 0x42) + if (ev->status != 0x42) { /* Mark PA sync as established */ set_bit(HCI_CONN_PA_SYNC, &bis->flags); + /* Reset cleanup callback of PA Sync so it doesn't + * terminate the sync when deleting the connection. + */ + conn->cleanup = NULL; + } bis->sync_handle = conn->sync_handle; bis->iso_qos.bcast.big = ev->handle; From 3ba486c5f3ce2c22ffd29c0103404cdbe21912b3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 29 Jul 2025 12:11:09 -0400 Subject: [PATCH 0376/1307] Bluetooth: hci_conn: Fix not cleaning up Broadcaster/Broadcast Source This fixes Broadcaster/Broadcast Source not sending HCI_OP_LE_TERM_BIG because HCI_CONN_PER_ADV where not being set. Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f8b20b609a03..ab6fe5b0cc0f 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2259,7 +2259,7 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, * the start periodic advertising and create BIG commands have * been queued */ - hci_conn_hash_list_state(hdev, bis_mark_per_adv, PA_LINK, + hci_conn_hash_list_state(hdev, bis_mark_per_adv, BIS_LINK, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ From 099799fa9b76c5c02b49e07005a85117a25b01ea Mon Sep 17 00:00:00 2001 From: Jiande Lu Date: Thu, 24 Jul 2025 16:51:17 +0800 Subject: [PATCH 0377/1307] Bluetooth: btmtk: Fix wait_on_bit_timeout interruption during shutdown During the shutdown process, an interrupt occurs that prematurely terminates the wait for the expected event. This change replaces TASK_INTERRUPTIBLE with TASK_UNINTERRUPTIBLE in the wait_on_bit_timeout call to ensure the shutdown process completes as intended without being interrupted by signals. Fixes: d019930b0049 ("Bluetooth: btmtk: move btusb_mtk_hci_wmt_sync to btmtk.c") Signed-off-by: Jiande Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 4390fd571dbd..a8c520dc09e1 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -642,12 +642,7 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, * WMT command. */ err = wait_on_bit_timeout(&data->flags, BTMTK_TX_WAIT_VND_EVT, - TASK_INTERRUPTIBLE, HCI_INIT_TIMEOUT); - if (err == -EINTR) { - bt_dev_err(hdev, "Execution of wmt command interrupted"); - clear_bit(BTMTK_TX_WAIT_VND_EVT, &data->flags); - goto err_free_wc; - } + TASK_UNINTERRUPTIBLE, HCI_INIT_TIMEOUT); if (err) { bt_dev_err(hdev, "Execution of wmt command timed out"); From 709788b154caf042874d765628ffa860f0bb0d1e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 4 Aug 2025 09:54:05 -0400 Subject: [PATCH 0378/1307] Bluetooth: hci_core: Fix using {cis,bis}_capable for current settings {cis,bis}_capable only indicates the controller supports the feature since it doesn't check that LE is enabled so it shall not be used for current setting, instead this introduces {cis,bis}_enabled macros that can be used to indicate that these features are currently enabled. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Fixes: ae7533613133 ("Bluetooth: Check for ISO support in controller") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 4 ++-- include/net/bluetooth/hci_core.h | 13 ++++++++++++- net/bluetooth/hci_sync.c | 4 ++-- net/bluetooth/iso.c | 14 +++++++------- net/bluetooth/mgmt.c | 10 +++++----- 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index ada5b56a4413..e5751f3070b8 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -647,7 +647,7 @@ static inline void sco_exit(void) #if IS_ENABLED(CONFIG_BT_LE) int iso_init(void); int iso_exit(void); -bool iso_enabled(void); +bool iso_inited(void); #else static inline int iso_init(void) { @@ -659,7 +659,7 @@ static inline int iso_exit(void) return 0; } -static inline bool iso_enabled(void) +static inline bool iso_inited(void) { return false; } diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4dc11c66f7b8..bc29f2e2e16f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1915,6 +1915,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); !hci_dev_test_flag(dev, HCI_RPA_EXPIRED)) #define adv_rpa_valid(adv) (bacmp(&adv->random_addr, BDADDR_ANY) && \ !adv->rpa_expired) +#define le_enabled(dev) (lmp_le_capable(dev) && \ + hci_dev_test_flag(dev, HCI_LE_ENABLED)) #define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M)) @@ -1981,14 +1983,23 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* CIS Master/Slave and BIS support */ #define iso_capable(dev) (cis_capable(dev) || bis_capable(dev)) +#define iso_enabled(dev) (le_enabled(dev) && iso_capable(dev)) #define cis_capable(dev) \ (cis_central_capable(dev) || cis_peripheral_capable(dev)) +#define cis_enabled(dev) (le_enabled(dev) && cis_capable(dev)) #define cis_central_capable(dev) \ ((dev)->le_features[3] & HCI_LE_CIS_CENTRAL) +#define cis_central_enabled(dev) \ + (le_enabled(dev) && cis_central_capable(dev)) #define cis_peripheral_capable(dev) \ ((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL) +#define cis_peripheral_enabled(dev) \ + (le_enabled(dev) && cis_peripheral_capable(dev)) #define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER) -#define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) +#define bis_enabled(dev) (le_enabled(dev) && bis_capable(dev)) +#define sync_recv_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) +#define sync_recv_enabled(dev) (le_enabled(dev) && sync_recv_capable(dev)) #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 387c128f2ba0..aa7d7a8ec3ee 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4531,14 +4531,14 @@ static int hci_le_set_host_feature_sync(struct hci_dev *hdev) { struct hci_cp_le_set_host_feature cp; - if (!cis_capable(hdev)) + if (!iso_capable(hdev)) return 0; memset(&cp, 0, sizeof(cp)); /* Connected Isochronous Channels (Host Support) */ cp.bit_number = 32; - cp.bit_value = 1; + cp.bit_value = iso_enabled(hdev) ? 0x01 : 0x00; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_HOST_FEATURE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index eaffd25570e3..5ce823ca3aaf 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2483,11 +2483,11 @@ static const struct net_proto_family iso_sock_family_ops = { .create = iso_sock_create, }; -static bool iso_inited; +static bool inited; -bool iso_enabled(void) +bool iso_inited(void) { - return iso_inited; + return inited; } int iso_init(void) @@ -2496,7 +2496,7 @@ int iso_init(void) BUILD_BUG_ON(sizeof(struct sockaddr_iso) > sizeof(struct sockaddr)); - if (iso_inited) + if (inited) return -EALREADY; err = proto_register(&iso_proto, 0); @@ -2524,7 +2524,7 @@ int iso_init(void) iso_debugfs = debugfs_create_file("iso", 0444, bt_debugfs, NULL, &iso_debugfs_fops); - iso_inited = true; + inited = true; return 0; @@ -2535,7 +2535,7 @@ int iso_init(void) int iso_exit(void) { - if (!iso_inited) + if (!inited) return -EALREADY; bt_procfs_cleanup(&init_net, "iso"); @@ -2549,7 +2549,7 @@ int iso_exit(void) proto_unregister(&iso_proto); - iso_inited = false; + inited = false; return 0; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1ce682038b51..c42dffe77daf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -922,16 +922,16 @@ static u32 get_current_settings(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED)) settings |= MGMT_SETTING_WIDEBAND_SPEECH; - if (cis_central_capable(hdev)) + if (cis_central_enabled(hdev)) settings |= MGMT_SETTING_CIS_CENTRAL; - if (cis_peripheral_capable(hdev)) + if (cis_peripheral_enabled(hdev)) settings |= MGMT_SETTING_CIS_PERIPHERAL; - if (bis_capable(hdev)) + if (bis_enabled(hdev)) settings |= MGMT_SETTING_ISO_BROADCASTER; - if (sync_recv_capable(hdev)) + if (sync_recv_enabled(hdev)) settings |= MGMT_SETTING_ISO_SYNC_RECEIVER; if (ll_privacy_capable(hdev)) @@ -4513,7 +4513,7 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, } if (IS_ENABLED(CONFIG_BT_LE)) { - flags = iso_enabled() ? BIT(0) : 0; + flags = iso_inited() ? BIT(0) : 0; memcpy(rp->features[idx].uuid, iso_socket_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); idx++; From 3dcf7175f2c04bd3a7d50db3fa42a0bd933b6e23 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 4 Aug 2025 14:05:03 -0400 Subject: [PATCH 0379/1307] Bluetooth: hci_core: Fix using ll_privacy_capable for current settings ll_privacy_capable only indicates that the controller supports the feature but it doesnt' check that LE is enabled so it end up being marked as active in the current settings when it shouldn't. Fixes: ad383c2c65a5 ("Bluetooth: hci_sync: Enable advertising when LL privacy is enabled") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/mgmt.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bc29f2e2e16f..bb30bde6f0e8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1934,6 +1934,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) #define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) +#define ll_privacy_enabled(dev) (le_enabled(dev) && ll_privacy_capable(dev)) #define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \ ((dev)->commands[39] & 0x04)) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c42dffe77daf..3166f5fb876b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -934,7 +934,7 @@ static u32 get_current_settings(struct hci_dev *hdev) if (sync_recv_enabled(hdev)) settings |= MGMT_SETTING_ISO_SYNC_RECEIVER; - if (ll_privacy_capable(hdev)) + if (ll_privacy_enabled(hdev)) settings |= MGMT_SETTING_LL_PRIVACY; return settings; From 4d19cd228bbe8ff84a63fe7b11bc756b4b4370c7 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 7 Aug 2025 15:56:03 +0800 Subject: [PATCH 0380/1307] Bluetooth: hci_sync: Prevent unintended PA sync when SID is 0xFF After LE Extended Scan times out, conn->sid remains 0xFF, so the PA sync creation process should be aborted. Btmon snippet from PA sync with SID=0xFF: < HCI Command: LE Set Extended.. (0x08|0x0042) plen 6 #74726 [hci0] 863.107927 Extended scan: Enabled (0x01) Filter duplicates: Enabled (0x01) Duration: 0 msec (0x0000) Period: 0.00 sec (0x0000) > HCI Event: Command Complete (0x0e) plen 4 #74727 [hci0] 863.109389 LE Set Extended Scan Enable (0x08|0x0042) ncmd 1 Status: Success (0x00) < HCI Command: LE Periodic Ad.. (0x08|0x0044) plen 14 #74728 [hci0] 865.141168 Options: 0x0000 Use advertising SID, Advertiser Address Type and address Reporting initially enabled SID: 0xff Adv address type: Random (0x01) Adv address: 0D:D7:2C:E7:42:46 (Non-Resolvable) Skip: 0x0000 Sync timeout: 20000 msec (0x07d0) Sync CTE type: 0x0000 > HCI Event: Command Status (0x0f) plen 4 #74729 [hci0] 865.143223 LE Periodic Advertising Create Sync (0x08|0x0044) ncmd 1 Status: Success (0x00) Fixes: e2d471b7806b ("Bluetooth: ISO: Fix not using SID from adv report") Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index aa7d7a8ec3ee..31d72b9683ef 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -7045,10 +7045,13 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) /* SID has not been set listen for HCI_EV_LE_EXT_ADV_REPORT to update * it. */ - if (conn->sid == HCI_SID_INVALID) - __hci_cmd_sync_status_sk(hdev, HCI_OP_NOP, 0, NULL, - HCI_EV_LE_EXT_ADV_REPORT, - conn->conn_timeout, NULL); + if (conn->sid == HCI_SID_INVALID) { + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_NOP, 0, NULL, + HCI_EV_LE_EXT_ADV_REPORT, + conn->conn_timeout, NULL); + if (err == -ETIMEDOUT) + goto done; + } memset(&cp, 0, sizeof(cp)); cp.options = qos->bcast.options; @@ -7078,6 +7081,7 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); +done: hci_dev_clear_flag(hdev, HCI_PA_SYNC); /* Update passive scan since HCI_PA_SYNC flag has been cleared */ From 0b3725dbf61b51e7c663834811b3691157ae17d6 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 9 Aug 2025 11:36:20 +0300 Subject: [PATCH 0381/1307] Bluetooth: hci_event: fix MTU for BN == 0 in CIS Established BN == 0x00 in CIS Established means no isochronous data for the corresponding direction (Core v6.1 pp. 2394). In this case SDU MTU should be 0. However, the specification does not say the Max_PDU_C_To_P or P_To_C are then zero. Intel AX210 in Framed CIS mode sets nonzero Max_PDU for direction with zero BN. This causes failure later when we try to LE Setup ISO Data Path for disabled direction, which is disallowed (Core v6.1 pp. 2750). Fix by setting SDU MTU to 0 if BN == 0. Fixes: 2be22f1941d5f ("Bluetooth: hci_event: Fix parsing of CIS Established Event") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4f0a6116291e..fe7cdd67ad2a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6745,8 +6745,8 @@ static void hci_le_cis_established_evt(struct hci_dev *hdev, void *data, qos->ucast.out.latency = DIV_ROUND_CLOSEST(get_unaligned_le24(ev->p_latency), 1000); - qos->ucast.in.sdu = le16_to_cpu(ev->c_mtu); - qos->ucast.out.sdu = le16_to_cpu(ev->p_mtu); + qos->ucast.in.sdu = ev->c_bn ? le16_to_cpu(ev->c_mtu) : 0; + qos->ucast.out.sdu = ev->p_bn ? le16_to_cpu(ev->p_mtu) : 0; qos->ucast.in.phy = ev->c_phy; qos->ucast.out.phy = ev->p_phy; break; @@ -6760,8 +6760,8 @@ static void hci_le_cis_established_evt(struct hci_dev *hdev, void *data, qos->ucast.in.latency = DIV_ROUND_CLOSEST(get_unaligned_le24(ev->p_latency), 1000); - qos->ucast.out.sdu = le16_to_cpu(ev->c_mtu); - qos->ucast.in.sdu = le16_to_cpu(ev->p_mtu); + qos->ucast.out.sdu = ev->c_bn ? le16_to_cpu(ev->c_mtu) : 0; + qos->ucast.in.sdu = ev->p_bn ? le16_to_cpu(ev->p_mtu) : 0; qos->ucast.out.phy = ev->c_phy; qos->ucast.in.phy = ev->p_phy; break; From b5ca88927e353185b3d9ac4362d33e5aeb25771f Mon Sep 17 00:00:00 2001 From: Thomas Bertschinger Date: Thu, 14 Aug 2025 17:54:28 -0600 Subject: [PATCH 0382/1307] fhandle: do_handle_open() should get FD with user flags In f07c7cc4684a, do_handle_open() was switched to use the automatic cleanup method for getting a FD. In that change it was also switched to pass O_CLOEXEC unconditionally to get_unused_fd_flags() instead of passing the user-specified flags. I don't see anything in that commit description that indicates this was intentional, so I am assuming it was an oversight. With this fix, the FD will again be opened with, or without, O_CLOEXEC according to what the user requested. Fixes: f07c7cc4684a ("fhandle: simplify error handling") Signed-off-by: Thomas Bertschinger Link: https://lore.kernel.org/20250814235431.995876-4-tahbertschinger@gmail.com Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/fhandle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index 7c236f64cdea..68a7d2861c58 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -402,7 +402,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, if (retval) return retval; - CLASS(get_unused_fd, fd)(O_CLOEXEC); + CLASS(get_unused_fd, fd)(open_flag); if (fd < 0) return fd; From a3de58b12ce074ec05b8741fa28d62ccb1070468 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Aug 2025 22:45:50 +0100 Subject: [PATCH 0383/1307] netfs: Fix unbuffered write error handling If all the subrequests in an unbuffered write stream fail, the subrequest collector doesn't update the stream->transferred value and it retains its initial LONG_MAX value. Unfortunately, if all active streams fail, then we take the smallest value of { LONG_MAX, LONG_MAX, ... } as the value to set in wreq->transferred - which is then returned from ->write_iter(). LONG_MAX was chosen as the initial value so that all the streams can be quickly assessed by taking the smallest value of all stream->transferred - but this only works if we've set any of them. Fix this by adding a flag to indicate whether the value in stream->transferred is valid and checking that when we integrate the values. stream->transferred can then be initialised to zero. This was found by running the generic/750 xfstest against cifs with cache=none. It splices data to the target file. Once (if) it has used up all the available scratch space, the writes start failing with ENOSPC. This causes ->write_iter() to fail. However, it was returning wreq->transferred, i.e. LONG_MAX, rather than an error (because it thought the amount transferred was non-zero) and iter_file_splice_write() would then try to clean up that amount of pipe bufferage - leading to an oops when it overran. The kernel log showed: CIFS: VFS: Send error in write = -28 followed by: BUG: kernel NULL pointer dereference, address: 0000000000000008 with: RIP: 0010:iter_file_splice_write+0x3a4/0x520 do_splice+0x197/0x4e0 or: RIP: 0010:pipe_buf_release (include/linux/pipe_fs_i.h:282) iter_file_splice_write (fs/splice.c:755) Also put a warning check into splice to announce if ->write_iter() returned that it had written more than it was asked to. Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Reported-by: Xiaoli Feng Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220445 Signed-off-by: David Howells Link: https://lore.kernel.org/915443.1755207950@warthog.procyon.org.uk cc: Paulo Alcantara cc: Steve French cc: Shyam Prasad N cc: netfs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/read_collect.c | 4 +++- fs/netfs/write_collect.c | 10 ++++++++-- fs/netfs/write_issue.c | 4 ++-- fs/splice.c | 3 +++ include/linux/netfs.h | 1 + 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 3e804da1e1eb..a95e7aadafd0 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -281,8 +281,10 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { notes |= MADE_PROGRESS; } else { - if (!stream->failed) + if (!stream->failed) { stream->transferred += transferred; + stream->transferred_valid = true; + } if (front->transferred < front->len) set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); notes |= MADE_PROGRESS; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 0f3a36852a4d..cbf3d9194c7b 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -254,6 +254,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) if (front->start + front->transferred > stream->collected_to) { stream->collected_to = front->start + front->transferred; stream->transferred = stream->collected_to - wreq->start; + stream->transferred_valid = true; notes |= MADE_PROGRESS; } if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { @@ -356,6 +357,7 @@ bool netfs_write_collection(struct netfs_io_request *wreq) { struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; + bool transferred_valid = false; int s; _enter("R=%x", wreq->debug_id); @@ -376,12 +378,16 @@ bool netfs_write_collection(struct netfs_io_request *wreq) continue; if (!list_empty(&stream->subrequests)) return false; - if (stream->transferred < transferred) + if (stream->transferred_valid && + stream->transferred < transferred) { transferred = stream->transferred; + transferred_valid = true; + } } /* Okay, declare that all I/O is complete. */ - wreq->transferred = transferred; + if (transferred_valid) + wreq->transferred = transferred; trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); if (wreq->io_streams[1].active && diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 50bee2c4130d..0584cba1a043 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -118,12 +118,12 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; wreq->io_streams[0].issue_write = ictx->ops->issue_write; wreq->io_streams[0].collected_to = start; - wreq->io_streams[0].transferred = LONG_MAX; + wreq->io_streams[0].transferred = 0; wreq->io_streams[1].stream_nr = 1; wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; wreq->io_streams[1].collected_to = start; - wreq->io_streams[1].transferred = LONG_MAX; + wreq->io_streams[1].transferred = 0; if (fscache_resources_valid(&wreq->cache_resources)) { wreq->io_streams[1].avail = true; wreq->io_streams[1].active = true; diff --git a/fs/splice.c b/fs/splice.c index 4d6df083e0c0..f5094b6d00a0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -739,6 +739,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, sd.pos = kiocb.ki_pos; if (ret <= 0) break; + WARN_ONCE(ret > sd.total_len - left, + "Splice Exceeded! ret=%zd tot=%zu left=%zu\n", + ret, sd.total_len, left); sd.num_spliced += ret; sd.total_len -= ret; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 185bd8196503..98c96d649bf9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -150,6 +150,7 @@ struct netfs_io_stream { bool active; /* T if stream is active */ bool need_retry; /* T if this stream needs retrying */ bool failed; /* T if this stream failed */ + bool transferred_valid; /* T is ->transferred is valid */ }; /* From 0b2d71a7c82628bb36fd43e80193bcc2693c239a Mon Sep 17 00:00:00 2001 From: "Adrian Huang (Lenovo)" Date: Thu, 14 Aug 2025 17:44:53 +0800 Subject: [PATCH 0384/1307] pidfs: Fix memory leak in pidfd_info() After running the program 'ioctl_pidfd03' of Linux Test Project (LTP) or the program 'pidfd_info_test' in 'tools/testing/selftests/pidfd' of the kernel source, kmemleak reports the following memory leaks: # cat /sys/kernel/debug/kmemleak unreferenced object 0xff110020e5988000 (size 8216): comm "ioctl_pidfd03", pid 10853, jiffies 4294800031 hex dump (first 32 bytes): 02 40 00 00 00 00 00 00 10 00 00 00 00 00 00 00 .@.............. 00 00 00 00 af 01 00 00 80 00 00 00 00 00 00 00 ................ backtrace (crc 69483047): kmem_cache_alloc_node_noprof+0x2fb/0x410 copy_process+0x178/0x1740 kernel_clone+0x99/0x3b0 __do_sys_clone3+0xbe/0x100 do_syscall_64+0x7b/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... unreferenced object 0xff11002097b70000 (size 8216): comm "pidfd_info_test", pid 11840, jiffies 4294889165 hex dump (first 32 bytes): 06 40 00 00 00 00 00 00 10 00 00 00 00 00 00 00 .@.............. 00 00 00 00 b5 00 00 00 80 00 00 00 00 00 00 00 ................ backtrace (crc a6286bb7): kmem_cache_alloc_node_noprof+0x2fb/0x410 copy_process+0x178/0x1740 kernel_clone+0x99/0x3b0 __do_sys_clone3+0xbe/0x100 do_syscall_64+0x7b/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... The leak occurs because pidfd_info() obtains a task_struct via get_pid_task() but never calls put_task_struct() to drop the reference, leaving task->usage unbalanced. Fix the issue by adding '__free(put_task) = NULL' to the local variable 'task', ensuring that put_task_struct() is automatically invoked when the variable goes out of scope. Fixes: 7477d7dce48a ("pidfs: allow to retrieve exit information") Signed-off-by: Adrian Huang (Lenovo) Link: https://lore.kernel.org/20250814094453.15232-1-adrianhuang0701@gmail.com Signed-off-by: Christian Brauner --- fs/pidfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index edc35522d75c..108e7527f837 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -296,12 +296,12 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags) static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) { struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; + struct task_struct *task __free(put_task) = NULL; struct pid *pid = pidfd_pid(file); size_t usize = _IOC_SIZE(cmd); struct pidfd_info kinfo = {}; struct pidfs_exit_info *exit_info; struct user_namespace *user_ns; - struct task_struct *task; struct pidfs_attr *attr; const struct cred *c; __u64 mask; From 0eaf7c7e85da7495c0e03a99375707fc954f5e7b Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Tue, 5 Aug 2025 22:14:51 +0300 Subject: [PATCH 0385/1307] Bluetooth: hci_conn: do return error from hci_enhanced_setup_sync() The commit e07a06b4eb41 ("Bluetooth: Convert SCO configure_datapath to hci_sync") missed to update the *return* statement under the *case* of BT_CODEC_TRANSPARENT in hci_enhanced_setup_sync(), which led to returning success (0) instead of the negative error code (-EINVAL). However, the result of hci_enhanced_setup_sync() seems to be ignored anyway, since NULL gets passed to hci_cmd_sync_queue() as the last argument in that case and the only function interested in that result is specified by that argument. Fixes: e07a06b4eb41 ("Bluetooth: Convert SCO configure_datapath to hci_sync") Signed-off-by: Sergey Shtylyov Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ab6fe5b0cc0f..7a879290dd28 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -339,7 +339,8 @@ static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data) case BT_CODEC_TRANSPARENT: if (!find_next_esco_param(conn, esco_param_msbc, ARRAY_SIZE(esco_param_msbc))) - return false; + return -EINVAL; + param = &esco_param_msbc[conn->attempt - 1]; cp.tx_coding_format.id = 0x03; cp.rx_coding_format.id = 0x03; From e489317d2fd9a51a81bdcbe15a73ddde8246e6d6 Mon Sep 17 00:00:00 2001 From: Neeraj Sanjay Kale Date: Mon, 4 Aug 2025 16:00:15 +0530 Subject: [PATCH 0386/1307] Bluetooth: btnxpuart: Uses threaded IRQ for host wakeup handling This replaces devm_request_irq() with devm_request_threaded_irq(). On iMX93 11x11 EVK platform, the BT chip's BT_WAKE_OUT pin is connected to an I2C GPIO expander instead of directly been connected to iMX GPIO. When I2C GPIO expander's (PCAL6524) host driver receives an interrupt on it's INTR line, the driver's interrupt handler needs to query the interrupt source with PCAL6524 first, and then call the actual interrupt handler, in this case the IRQ handler in BTNXPUART. In order to handle interrupts when such I2C GPIO expanders are between the host and interrupt source, devm_request_threaded_irq() is needed. This commit also removes the IRQF_TRIGGER_FALLING flag, to allow setting the IRQ trigger type from the device tree setting instead of hardcoding in the driver. Signed-off-by: Neeraj Sanjay Kale Reviewed-by: Sherry Sun Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btnxpuart.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 73a4a325c867..76e7f857fb7d 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -543,10 +543,10 @@ static int ps_setup(struct hci_dev *hdev) } if (psdata->wakeup_source) { - ret = devm_request_irq(&serdev->dev, psdata->irq_handler, - ps_host_wakeup_irq_handler, - IRQF_ONESHOT | IRQF_TRIGGER_FALLING, - dev_name(&serdev->dev), nxpdev); + ret = devm_request_threaded_irq(&serdev->dev, psdata->irq_handler, + NULL, ps_host_wakeup_irq_handler, + IRQF_ONESHOT, + dev_name(&serdev->dev), nxpdev); if (ret) bt_dev_info(hdev, "error setting wakeup IRQ handler, ignoring\n"); disable_irq(psdata->irq_handler); From 9d4b01a0bf8d2163ae129c9c537cb0753ad5a2aa Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 14 Aug 2025 11:57:19 -0400 Subject: [PATCH 0387/1307] Bluetooth: hci_core: Fix not accounting for BIS/CIS/PA links separately This fixes the likes of hci_conn_num(CIS_LINK) returning the total of ISO connection which includes BIS_LINK as well, so this splits the iso_num into each link type and introduces hci_iso_num that can be used in places where the total number of ISO connection still needs to be used. Fixes: 23205562ffc8 ("Bluetooth: separate CIS_LINK and BIS_LINK link types") Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bb30bde6f0e8..6906af7a8f24 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -129,7 +129,9 @@ struct hci_conn_hash { struct list_head list; unsigned int acl_num; unsigned int sco_num; - unsigned int iso_num; + unsigned int cis_num; + unsigned int bis_num; + unsigned int pa_num; unsigned int le_num; unsigned int le_num_peripheral; }; @@ -1014,9 +1016,13 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) h->sco_num++; break; case CIS_LINK: + h->cis_num++; + break; case BIS_LINK: + h->bis_num++; + break; case PA_LINK: - h->iso_num++; + h->pa_num++; break; } } @@ -1042,9 +1048,13 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) h->sco_num--; break; case CIS_LINK: + h->cis_num--; + break; case BIS_LINK: + h->bis_num--; + break; case PA_LINK: - h->iso_num--; + h->pa_num--; break; } } @@ -1061,9 +1071,11 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) case ESCO_LINK: return h->sco_num; case CIS_LINK: + return h->cis_num; case BIS_LINK: + return h->bis_num; case PA_LINK: - return h->iso_num; + return h->pa_num; default: return 0; } @@ -1073,7 +1085,15 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; - return c->acl_num + c->sco_num + c->le_num + c->iso_num; + return c->acl_num + c->sco_num + c->le_num + c->cis_num + c->bis_num + + c->pa_num; +} + +static inline unsigned int hci_iso_count(struct hci_dev *hdev) +{ + struct hci_conn_hash *c = &hdev->conn_hash; + + return c->cis_num + c->bis_num; } static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn) From c08ba63078dd6046c279df37795cb77e784e1ec9 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 16 Jul 2025 15:41:35 -0500 Subject: [PATCH 0388/1307] virt: sev-guest: Satisfy linear mapping requirement in get_derived_key() Commit 7ffeb2fc2670 ("x86/sev: Document requirement for linear mapping of guest request buffers") added a check that requires the guest request buffers to be in the linear mapping. The get_derived_key() function was passing a buffer that was allocated on the stack, resulting in the call to snp_send_guest_request() returning an error. Update the get_derived_key() function to use an allocated buffer instead of a stack buffer. Fixes: 7ffeb2fc2670 ("x86/sev: Document requirement for linear mapping of guest request buffers") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/9b764ca9fc79199a091aac684c4926e2080ca7a8.1752698495.git.thomas.lendacky@amd.com --- drivers/virt/coco/sev-guest/sev-guest.c | 27 +++++++++++-------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index d2b3ae7113ab..b01ec99106cd 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -116,13 +116,11 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { + struct snp_derived_key_resp *derived_key_resp __free(kfree) = NULL; struct snp_derived_key_req *derived_key_req __free(kfree) = NULL; - struct snp_derived_key_resp derived_key_resp = {0}; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_guest_req req = {}; int rc, resp_len; - /* Response data is 64 bytes and max authsize for GCM is 16 bytes. */ - u8 buf[64 + 16]; if (!arg->req_data || !arg->resp_data) return -EINVAL; @@ -132,8 +130,9 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque * response payload. Make sure that it has enough space to cover the * authtag. */ - resp_len = sizeof(derived_key_resp.data) + mdesc->ctx->authsize; - if (sizeof(buf) < resp_len) + resp_len = sizeof(derived_key_resp->data) + mdesc->ctx->authsize; + derived_key_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT); + if (!derived_key_resp) return -ENOMEM; derived_key_req = kzalloc(sizeof(*derived_key_req), GFP_KERNEL_ACCOUNT); @@ -149,23 +148,21 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque req.vmpck_id = mdesc->vmpck_id; req.req_buf = derived_key_req; req.req_sz = sizeof(*derived_key_req); - req.resp_buf = buf; + req.resp_buf = derived_key_resp; req.resp_sz = resp_len; req.exit_code = SVM_VMGEXIT_GUEST_REQUEST; rc = snp_send_guest_request(mdesc, &req); arg->exitinfo2 = req.exitinfo2; - if (rc) - return rc; - - memcpy(derived_key_resp.data, buf, sizeof(derived_key_resp.data)); - if (copy_to_user((void __user *)arg->resp_data, &derived_key_resp, - sizeof(derived_key_resp))) - rc = -EFAULT; + if (!rc) { + if (copy_to_user((void __user *)arg->resp_data, derived_key_resp, + sizeof(derived_key_resp->data))) + rc = -EFAULT; + } /* The response buffer contains the sensitive data, explicitly clear it. */ - memzero_explicit(buf, sizeof(buf)); - memzero_explicit(&derived_key_resp, sizeof(derived_key_resp)); + memzero_explicit(derived_key_resp, sizeof(*derived_key_resp)); + return rc; } From 3ee9cebd0a5e7ea47eb35cec95eaa1a866af982d Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 13 Aug 2025 10:26:59 -0500 Subject: [PATCH 0389/1307] x86/sev: Ensure SVSM reserved fields in a page validation entry are initialized to zero In order to support future versions of the SVSM_CORE_PVALIDATE call, all reserved fields within a PVALIDATE entry must be set to zero as an SVSM should be ensuring all reserved fields are zero in order to support future usage of reserved areas based on the protocol version. Fixes: fcd042e86422 ("x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Joerg Roedel Cc: Link: https://lore.kernel.org/7cde412f8b057ea13a646fb166b1ca023f6a5031.1755098819.git.thomas.lendacky@amd.com --- arch/x86/boot/startup/sev-shared.c | 1 + arch/x86/coco/sev/core.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 7a706db87b93..4ab0dbd043c6 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -785,6 +785,7 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) pc->entry[0].page_size = RMP_PG_SIZE_4K; pc->entry[0].action = validate; pc->entry[0].ignore_cf = 0; + pc->entry[0].rsvd = 0; pc->entry[0].pfn = paddr >> PAGE_SHIFT; /* Protocol 0, Call ID 1 */ diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index fc59ce78c477..43ecc6b9fb9c 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -227,6 +227,7 @@ static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action, pe->page_size = RMP_PG_SIZE_4K; pe->action = action; pe->ignore_cf = 0; + pe->rsvd = 0; pe->pfn = pfn; pe++; @@ -257,6 +258,7 @@ static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int d pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; pe->action = e->operation == SNP_PAGE_STATE_PRIVATE; pe->ignore_cf = 0; + pe->rsvd = 0; pe->pfn = e->gfn; pe++; From ed6c4b657bca3b39f7b11cba1405931aeb490f3d Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 15 Aug 2025 09:01:54 +0200 Subject: [PATCH 0390/1307] x86/cpuid: Remove transitional header All CPUID call sites were updated at commit: 968e30006807 ("x86/cpuid: Set as the main CPUID header") to include instead of . The header was still retained as a wrapper, just in case some new code in -next started using it. Now that everything is merged to Linus' tree, remove the header. Signed-off-by: Ahmed S. Darwish Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250815070227.19981-2-darwi@linutronix.de --- arch/x86/include/asm/cpuid.h | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 arch/x86/include/asm/cpuid.h diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h deleted file mode 100644 index d5749b25fa10..000000000000 --- a/arch/x86/include/asm/cpuid.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_X86_CPUID_H -#define _ASM_X86_CPUID_H - -#include - -#endif /* _ASM_X86_CPUID_H */ From eafae0fdd115a71b3a200ef1a31f86da04bac77f Mon Sep 17 00:00:00 2001 From: Evgeniy Harchenko Date: Fri, 15 Aug 2025 12:58:14 +0300 Subject: [PATCH 0391/1307] ALSA: hda/realtek: Add support for HP EliteBook x360 830 G6 and EliteBook 830 G6 The HP EliteBook x360 830 G6 and HP EliteBook 830 G6 have Realtek HDA codec ALC215. It needs the ALC285_FIXUP_HP_GPIO_LED quirk to enable the mute LED. Cc: Signed-off-by: Evgeniy Harchenko Link: https://patch.msgid.link/20250815095814.75845-1-evgeniyharchenko.dev@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index e90c4047ea62..db8e6352b942 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6368,6 +6368,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8548, "HP EliteBook x360 830 G6", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x854a, "HP EliteBook 830 G6", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), From b3eaf14f4c63fd6abc7b68c6d7a07c5680a6d8e5 Mon Sep 17 00:00:00 2001 From: Shinji Nomoto Date: Thu, 22 May 2025 15:10:58 +0900 Subject: [PATCH 0392/1307] cpupower: Fix a bug where the -t option of the set subcommand was not working. The set subcommand's -t option is documented as being available for boost configuration, but it was not actually functioning due to a bug in the option handling. Link: https://lore.kernel.org/r/20250522061122.2149188-2-fj5851bi@fujitsu.com Signed-off-by: Shinji Nomoto Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpupower-set.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 0677b58374ab..59ace394cf3e 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -62,8 +62,8 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "b:e:m:", - set_opts, NULL)) != -1) { + while ((ret = getopt_long(argc, argv, "b:e:m:t:", + set_opts, NULL)) != -1) { switch (ret) { case 'b': if (params.perf_bias) From 357d1fc38aad2cf4ea6626138cbf68299d20170c Mon Sep 17 00:00:00 2001 From: Shinji Nomoto Date: Thu, 22 May 2025 15:10:59 +0900 Subject: [PATCH 0393/1307] cpupower: Allow control of boost feature on non-x86 based systems with boost support. The cpufreq subsystem has a generic sysfs interface for controlling boost (/sys/devices/system/cpu/cpufreq/boost). The sysfs interface can be used to enable boost control from the cpupower command on non-x86 platforms as well. So, allow boost controlling on non-x86 system if boost sysfs file exists. The set subcommand enables/disables the boost feature using the following syntax: cpupower set --boost 1 cpupower set --boost 0 The --boost option is an alias for --turbo-boost. We provided the neutral option name because the name "turbo boost" is specific to Intel technology. The frequency-info subcommand displays the enabled/disabled state of the boost feature as follows: boost state support: Active: yes (or no) Link: https://lore.kernel.org/r/20250522061122.2149188-3-fj5851bi@fujitsu.com Signed-off-by: Shinji Nomoto Signed-off-by: Shuah Khan --- tools/power/cpupower/man/cpupower-set.1 | 7 +- tools/power/cpupower/utils/cpufreq-info.c | 16 ++++- tools/power/cpupower/utils/cpupower-set.c | 1 + tools/power/cpupower/utils/helpers/helpers.h | 14 ++-- tools/power/cpupower/utils/helpers/misc.c | 76 ++++++++++++++------ 5 files changed, 81 insertions(+), 33 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index 500653ef98c7..8ac82b6f9189 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 @@ -81,10 +81,11 @@ Refer to the AMD P-State kernel documentation for further information. .RE .PP -\-\-turbo\-boost, \-t +\-\-turbo\-boost, \-\-boost, \-t .RS 4 -This option is used to enable or disable the turbo boost feature on -supported Intel and AMD processors. +This option is used to enable or disable the boost feature on +supported Intel and AMD processors, and other boost supported systems. +(The --boost option is an alias for the --turbo-boost option) This option takes as parameter either \fB1\fP to enable, or \fB0\fP to disable the feature. diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index fc750e127404..7d3732f5f2f6 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -128,7 +128,7 @@ static int get_boost_mode_x86(unsigned int cpu) /* ToDo: Make this more global */ unsigned long pstates[MAX_HW_PSTATES] = {0,}; - ret = cpufreq_has_boost_support(cpu, &support, &active, &b_states); + ret = cpufreq_has_x86_boost_support(cpu, &support, &active, &b_states); if (ret) { printf(_("Error while evaluating Boost Capabilities" " on CPU %d -- are you root?\n"), cpu); @@ -204,6 +204,18 @@ static int get_boost_mode_x86(unsigned int cpu) return 0; } +static int get_boost_mode_generic(unsigned int cpu) +{ + bool active; + + if (!cpufreq_has_generic_boost_support(&active)) { + printf(_(" boost state support:\n")); + printf(_(" Active: %s\n"), active ? _("yes") : _("no")); + } + + return 0; +} + /* --boost / -b */ static int get_boost_mode(unsigned int cpu) @@ -214,6 +226,8 @@ static int get_boost_mode(unsigned int cpu) cpupower_cpu_info.vendor == X86_VENDOR_HYGON || cpupower_cpu_info.vendor == X86_VENDOR_INTEL) return get_boost_mode_x86(cpu); + else + get_boost_mode_generic(cpu); freqs = cpufreq_get_boost_frequencies(cpu); if (freqs) { diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 59ace394cf3e..c2117e5650dd 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -21,6 +21,7 @@ static struct option set_opts[] = { {"epp", required_argument, NULL, 'e'}, {"amd-pstate-mode", required_argument, NULL, 'm'}, {"turbo-boost", required_argument, NULL, 't'}, + {"boost", required_argument, NULL, 't'}, { }, }; diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 95749b8ee475..82ea62bdf5a2 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -103,6 +103,9 @@ extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ +int cpufreq_has_generic_boost_support(bool *active); +int cpupower_set_turbo_boost(int turbo_boost); + /* X86 ONLY ****************************************/ #if defined(__i386__) || defined(__x86_64__) @@ -118,7 +121,6 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); extern int cpupower_set_epp(unsigned int cpu, char *epp); extern int cpupower_set_amd_pstate_mode(char *mode); -extern int cpupower_set_turbo_boost(int turbo_boost); /* Read/Write msr ****************************/ @@ -139,8 +141,8 @@ extern int decode_pstates(unsigned int cpu, int boost_states, /* AMD HW pstate decoding **************************/ -extern int cpufreq_has_boost_support(unsigned int cpu, int *support, - int *active, int * states); +int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, + int *active, int *states); /* AMD P-State stuff **************************/ bool cpupower_amd_pstate_enabled(void); @@ -181,13 +183,11 @@ static inline int cpupower_set_epp(unsigned int cpu, char *epp) { return -1; }; static inline int cpupower_set_amd_pstate_mode(char *mode) { return -1; }; -static inline int cpupower_set_turbo_boost(int turbo_boost) -{ return -1; }; /* Read/Write msr ****************************/ -static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, - int *active, int * states) +static inline int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, + int *active, int *states) { return -1; } static inline bool cpupower_amd_pstate_enabled(void) diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 76e461ff4f74..166dc1e470ea 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -8,15 +8,14 @@ #include "helpers/helpers.h" #include "helpers/sysfs.h" #include "cpufreq.h" +#include "cpupower_intern.h" #if defined(__i386__) || defined(__x86_64__) -#include "cpupower_intern.h" - #define MSR_AMD_HWCR 0xc0010015 -int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, - int *states) +int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, + int *states) { int ret; unsigned long long val; @@ -124,24 +123,6 @@ int cpupower_set_amd_pstate_mode(char *mode) return 0; } -int cpupower_set_turbo_boost(int turbo_boost) -{ - char path[SYSFS_PATH_MAX]; - char linebuf[2] = {}; - - snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); - - if (!is_valid_path(path)) - return -1; - - snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); - - if (cpupower_write_sysfs(path, linebuf, 2) <= 0) - return -1; - - return 0; -} - bool cpupower_amd_pstate_enabled(void) { char *driver = cpufreq_get_driver(0); @@ -160,6 +141,39 @@ bool cpupower_amd_pstate_enabled(void) #endif /* #if defined(__i386__) || defined(__x86_64__) */ +int cpufreq_has_generic_boost_support(bool *active) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + unsigned long val; + char *endp; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -EACCES; + + if (cpupower_read_sysfs(path, linebuf, 2) <= 0) + return -EINVAL; + + val = strtoul(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -EINVAL; + + switch (val) { + case 0: + *active = false; + break; + case 1: + *active = true; + break; + default: + return -EINVAL; + } + + return 0; +} + /* get_cpustate * * Gather the information of all online CPUs into bitmask struct @@ -259,3 +273,21 @@ void print_speed(unsigned long speed, int no_rounding) } } } + +int cpupower_set_turbo_boost(int turbo_boost) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -1; + + snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); + + if (cpupower_write_sysfs(path, linebuf, 2) <= 0) + return -1; + + return 0; +} From f604d3aaf64ff0d90cc875295474d3abf4155629 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 14 Aug 2025 15:06:40 +0200 Subject: [PATCH 0394/1307] mlxsw: spectrum: Forward packets with an IPv4 link-local source IP By default, the device does not forward IPv4 packets with a link-local source IP (i.e., 169.254.0.0/16). This behavior does not align with the kernel which does forward them. Fix by instructing the device to forward such packets instead of dropping them. Fixes: ca360db4b825 ("mlxsw: spectrum: Disable DIP_LINK_LOCAL check in hardware pipeline") Reported-by: Zoey Mertes Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Link: https://patch.msgid.link/6721e6b2c96feb80269e72ce8d0b426e2f32d99c.1755174341.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 ++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 618957d65663..9a2d64a0a858 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2375,6 +2375,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { ROUTER_EXP, false), MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD, ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_LINK_LOCAL, FORWARD, + ROUTER_EXP, false), /* Multicast Router Traps */ MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 80ee5c4825dc..9962dc157901 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -94,6 +94,7 @@ enum { MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LINK_LOCAL = 0x16C, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LINK_LOCAL = 0x16D, MLXSW_TRAP_ID_DISCARD_ROUTER_IRIF_EN = 0x178, MLXSW_TRAP_ID_DISCARD_ROUTER_ERIF_EN = 0x179, MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B, From 5e0b2177bdba99c2487480e9864825f742b684ee Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 14 Aug 2025 15:06:41 +0200 Subject: [PATCH 0395/1307] selftest: forwarding: router: Add a test case for IPv4 link-local source IP Add a test case which checks that packets with an IPv4 link-local source IP are forwarded and not dropped. Signed-off-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://patch.msgid.link/3c2e0b17d99530f57bef5ddff9af284fa0c9b667.1755174341.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/router.sh | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index b98ea9449b8b..dfb6646cb97b 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -18,6 +18,8 @@ # | 2001:db8:1::1/64 2001:db8:2::1/64 | # | | # +-----------------------------------------------------------------+ +# +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ALL_TESTS=" ping_ipv4 @@ -27,6 +29,7 @@ ALL_TESTS=" ipv4_sip_equal_dip ipv6_sip_equal_dip ipv4_dip_link_local + ipv4_sip_link_local " NUM_NETIFS=4 @@ -330,6 +333,32 @@ ipv4_dip_link_local() tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower } +ipv4_sip_link_local() +{ + local sip=169.254.1.1 + + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf."$rp1".rp_filter 0 + + tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \ + flower src_ip "$sip" action pass + + $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \ + -A "$sip" -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "IPv4 source IP is link-local" + + tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf."$rp1".rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + trap cleanup EXIT setup_prepare From c87bd4dd43a624109c3cc42d843138378a7f4548 Mon Sep 17 00:00:00 2001 From: Thijs Raymakers Date: Mon, 4 Aug 2025 08:44:05 +0200 Subject: [PATCH 0396/1307] KVM: x86: use array_index_nospec with indices that come from guest min and dest_id are guest-controlled indices. Using array_index_nospec() after the bounds checks clamps these values to mitigate speculative execution side-channels. Signed-off-by: Thijs Raymakers Cc: stable@vger.kernel.org Cc: Sean Christopherson Cc: Paolo Bonzini Cc: Greg Kroah-Hartman Fixes: 715062970f37 ("KVM: X86: Implement PV sched yield hypercall") Fixes: bdf7ffc89922 ("KVM: LAPIC: Fix pv ipis out-of-bounds access") Fixes: 4180bf1b655a ("KVM: X86: Implement "send IPI" hypercall") Link: https://lore.kernel.org/r/20250804064405.4802-1-thijs@raymakers.nl Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 2 ++ arch/x86/kvm/x86.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8172c2042dd6..5fc437341e03 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -810,6 +810,8 @@ static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, if (min > map->max_apic_id) return 0; + min = array_index_nospec(min, map->max_apic_id + 1); + for_each_set_bit(i, ipi_bitmap, min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { if (map->phys_map[min + i]) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1c49bc681c4..604490b1cb19 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9908,8 +9908,11 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) rcu_read_lock(); map = rcu_dereference(vcpu->kvm->arch.apic_map); - if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id]) - target = map->phys_map[dest_id]->vcpu; + if (likely(map) && dest_id <= map->max_apic_id) { + dest_id = array_index_nospec(dest_id, map->max_apic_id + 1); + if (map->phys_map[dest_id]) + target = map->phys_map[dest_id]->vcpu; + } rcu_read_unlock(); From 989fe6771266bdb82a815d78802c5aa7c918fdfd Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Wed, 13 Aug 2025 20:54:04 +0800 Subject: [PATCH 0397/1307] drm/nouveau/gsp: fix mismatched alloc/free for kvmalloc() Replace kfree() with kvfree() for memory allocated by kvmalloc(). Compile-tested only. Cc: stable@vger.kernel.org Fixes: 8a8b1ec5261f ("drm/nouveau/gsp: split rpc handling out on its own") Signed-off-by: Qianfeng Rong Reviewed-by: Timur Tabi Acked-by: Zhi Wang Link: https://lore.kernel.org/r/20250813125412.96178-1-rongqianfeng@vivo.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c index 9d06ff722fea..0dc4782df8c0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c @@ -325,7 +325,7 @@ r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 gsp_rpc_len, int *retries) rpc = r535_gsp_msgq_peek(gsp, sizeof(*rpc), info.retries); if (IS_ERR_OR_NULL(rpc)) { - kfree(buf); + kvfree(buf); return rpc; } @@ -334,7 +334,7 @@ r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 gsp_rpc_len, int *retries) rpc = r535_gsp_msgq_recv_one_elem(gsp, &info); if (IS_ERR_OR_NULL(rpc)) { - kfree(buf); + kvfree(buf); return rpc; } From 6cc44e9618f03f1deb9a092698c0b0ce20990221 Mon Sep 17 00:00:00 2001 From: Javier Garcia Date: Sun, 10 Aug 2025 17:07:06 +0200 Subject: [PATCH 0398/1307] drm: Add directive to format code in comment Add formatting directive line in function `drm_gpuvm_sm_map_exec_lock()` comment to clear warning messages shown bellow that appears generating documentation `make htmldocs`. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2444: Unexpected indentation. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2446: Block quote ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2450: Definition list ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2451: Definition list ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2455: Unexpected indentation. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2456: Definition list ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2457: Definition list ends without a blank line; unexpected unindent. Warning: ./drivers/gpu/drm/drm_gpuvm.c:2458: Definition list ends without a blank line; unexpected unindent. Fixes: 471920ce25d5 ("drm/gpuvm: Add locking helpers") Signed-off-by: Javier Garcia Reviewed-by: Bagas Sanjaya Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250810150706.305040-1-rampxxxx@gmail.com [ Fix typo in commit message. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/drm_gpuvm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index bbc7fecb6f4a..74d949995a72 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -2432,6 +2432,8 @@ static const struct drm_gpuvm_ops lock_ops = { * * The expected usage is: * + * .. code-block:: c + * * vm_bind { * struct drm_exec exec; * From d19c541d269eddd9702b687fd4ba587c7da497a2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Aug 2025 17:37:47 +0100 Subject: [PATCH 0399/1307] KVM: arm64: Correctly populate FAR_EL2 on nested SEA injection vcpu_write_sys_reg()'s signature is not totally obvious, and it is rather easy to write something that looks correct, except that... Oh wait... Swap addr and FAR_EL2 to restore some sanity in the nested SEA department. Fixes: 9aba641b9ec2a ("KVM: arm64: nv: Respect exception routing rules for SEAs") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250813163747.2591317-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/emulate-nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 90cb4b7ae0ff..af69c897c2c3 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2833,7 +2833,7 @@ int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW); esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL; - vcpu_write_sys_reg(vcpu, FAR_EL2, addr); + vcpu_write_sys_reg(vcpu, addr, FAR_EL2); if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE) return kvm_inject_nested(vcpu, esr, except_type_serror); From 12da2b92ad50e6602b4c5e9073d71f2368b70b63 Mon Sep 17 00:00:00 2001 From: Chandra Mohan Sundar Date: Thu, 14 Aug 2025 22:00:10 +0530 Subject: [PATCH 0400/1307] net: libwx: Fix the size in RSS hash key population While trying to fill a random RSS key, the size of the pointer is being used rather than the actual size of the RSS key. Fix by passing an appropriate value of the RSS key. This issue was reported by static coverity analyser. Fixes: eb4898fde1de8 ("net: libwx: add wangxun vf common api") Signed-off-by: Chandra Mohan Sundar Link: https://patch.msgid.link/20250814163014.613004-1-chandramohan.explore@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c index 5d48df7a849f..3023ea2732ef 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c @@ -192,7 +192,7 @@ void wx_setup_vfmrqc_vf(struct wx *wx) u8 i, j; /* Fill out hash function seeds */ - netdev_rss_key_fill(wx->rss_key, sizeof(wx->rss_key)); + netdev_rss_key_fill(wx->rss_key, WX_RSS_KEY_SIZE); for (i = 0; i < WX_RSS_KEY_SIZE / 4; i++) wr32(wx, WX_VXRSSRK(i), wx->rss_key[i]); From db2e7bcee11cd57f95fef3c6cbb562d0577eb84a Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 24 Jul 2025 18:54:41 +0200 Subject: [PATCH 0401/1307] drm: nova-drm: fix 32-bit arm build In 32-bit arm, the build fails with: error[E0308]: mismatched types --> drivers/gpu/drm/nova/file.rs:42:28 | 42 | getparam.set_value(value); | --------- ^^^^^ expected `u64`, found `u32` | | | arguments to this method are incorrect | note: method defined here --> drivers/gpu/drm/nova/uapi.rs:29:12 | 29 | pub fn set_value(&self, v: u64) { | ^^^^^^^^^ ------ help: you can convert a `u32` to a `u64` | 42 | getparam.set_value(value.into()); | +++++++ The reason is that `Getparam::set_value` takes a `u64` (from the UAPI), but `pci::Device::resource_len()` returns a `resource_size_t`, which is a `phys_addr_t`, which may be 32- or 64-bit. Thus add an `into()` call to support the 32-bit case, while allowing the Clippy lint that complains in the 64-bit case where the type is the same. Fixes: cdeaeb9dd762 ("drm: nova-drm: add initial driver skeleton") Signed-off-by: Miguel Ojeda Reviewed-by: Christian Schrefl Link: https://lore.kernel.org/r/20250724165441.2105632-1-ojeda@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nova/file.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nova/file.rs b/drivers/gpu/drm/nova/file.rs index 7e59a34b830d..4fe62cf98a23 100644 --- a/drivers/gpu/drm/nova/file.rs +++ b/drivers/gpu/drm/nova/file.rs @@ -39,7 +39,8 @@ pub(crate) fn get_param( _ => return Err(EINVAL), }; - getparam.set_value(value); + #[allow(clippy::useless_conversion)] + getparam.set_value(value.into()); Ok(0) } From 1f403699c40f0806a707a9a6eed3b8904224021a Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 12 Aug 2025 15:19:32 +0800 Subject: [PATCH 0402/1307] drm/mediatek: Fix device/node reference count leaks in mtk_drm_get_all_drm_priv Using device_find_child() and of_find_device_by_node() to locate devices could cause an imbalance in the device's reference count. device_find_child() and of_find_device_by_node() both call get_device() to increment the reference count of the found device before returning the pointer. In mtk_drm_get_all_drm_priv(), these references are never released through put_device(), resulting in permanent reference count increments. Additionally, the for_each_child_of_node() iterator fails to release node references in all code paths. This leaks device node references when loop termination occurs before reaching MAX_CRTC. These reference count leaks may prevent device/node resources from being properly released during driver unbind operations. As comment of device_find_child() says, 'NOTE: you will need to drop the reference with put_device() after use'. Cc: stable@vger.kernel.org Fixes: 1ef7ed48356c ("drm/mediatek: Modify mediatek-drm for mt8195 multi mmsys support") Signed-off-by: Ma Ke Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20250812071932.471730-1-make24@iscas.ac.cn/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index d5e6bab36414..f8a817689e16 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -387,19 +387,19 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) of_id = of_match_node(mtk_drm_of_ids, node); if (!of_id) - continue; + goto next_put_node; pdev = of_find_device_by_node(node); if (!pdev) - continue; + goto next_put_node; drm_dev = device_find_child(&pdev->dev, NULL, mtk_drm_match); if (!drm_dev) - continue; + goto next_put_device_pdev_dev; temp_drm_priv = dev_get_drvdata(drm_dev); if (!temp_drm_priv) - continue; + goto next_put_device_drm_dev; if (temp_drm_priv->data->main_len) all_drm_priv[CRTC_MAIN] = temp_drm_priv; @@ -411,10 +411,17 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) if (temp_drm_priv->mtk_drm_bound) cnt++; - if (cnt == MAX_CRTC) { - of_node_put(node); +next_put_device_drm_dev: + put_device(drm_dev); + +next_put_device_pdev_dev: + put_device(&pdev->dev); + +next_put_node: + of_node_put(node); + + if (cnt == MAX_CRTC) break; - } } if (drm_priv->data->mmsys_dev_num == cnt) { From c27973211ffcdf0a092eec265d5993e64b89adaf Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Fri, 15 Aug 2025 12:00:28 +0800 Subject: [PATCH 0403/1307] md: keep recovery_cp in mdp_superblock_s commit 907a99c314a5 ("md: rename recovery_cp to resync_offset") replaces recovery_cp with resync_offset in mdp_superblock_s which is in md_p.h. md_p.h is used in userspace too. So mdadm building fails because of this. This patch revert this change. Fixes: 907a99c314a5 ("md: rename recovery_cp to resync_offset") Signed-off-by: Xiao Ni Link: https://lore.kernel.org/linux-raid/20250815040028.18085-1-xni@redhat.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 6 +++--- include/uapi/linux/raid/md_p.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 772cffe02ff5..3836fc7eff67 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1423,7 +1423,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru else { if (sb->events_hi == sb->cp_events_hi && sb->events_lo == sb->cp_events_lo) { - mddev->resync_offset = sb->resync_offset; + mddev->resync_offset = sb->recovery_cp; } else mddev->resync_offset = 0; } @@ -1551,13 +1551,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) mddev->minor_version = sb->minor_version; if (mddev->in_sync) { - sb->resync_offset = mddev->resync_offset; + sb->recovery_cp = mddev->resync_offset; sb->cp_events_hi = (mddev->events>>32); sb->cp_events_lo = (u32)mddev->events; if (mddev->resync_offset == MaxSector) sb->state = (1<< MD_SB_CLEAN); } else - sb->resync_offset = 0; + sb->recovery_cp = 0; sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_sectors << 9; diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index b13946287277..ac74133a4768 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -173,7 +173,7 @@ typedef struct mdp_superblock_s { #else #error unspecified endianness #endif - __u32 resync_offset; /* 11 resync checkpoint sector count */ + __u32 recovery_cp; /* 11 resync checkpoint sector count */ /* There are only valid for minor_version > 90 */ __u64 reshape_position; /* 12,13 next address in array-space for reshape */ __u32 new_level; /* 14 new level we are reshaping to */ From cb0780ad4333040a98e10f014b593ef738a3f31e Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Sat, 16 Aug 2025 08:25:33 +0800 Subject: [PATCH 0404/1307] md: add helper rdev_needs_recovery() Add a helper for checking if an rdev needs recovery. Signed-off-by: Zheng Qixing Link: https://lore.kernel.org/linux-raid/20250816002534.1754356-2-zhengqixing@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3836fc7eff67..abd327ade4bd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4839,6 +4839,15 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len) static struct md_sysfs_entry md_metadata = __ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store); +static bool rdev_needs_recovery(struct md_rdev *rdev, sector_t sectors) +{ + return rdev->raid_disk >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(Faulty, &rdev->flags) && + !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < sectors; +} + enum sync_action md_sync_action(struct mddev *mddev) { unsigned long recovery = mddev->recovery; @@ -8995,11 +9004,7 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) start = MaxSector; rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < start) + if (rdev_needs_recovery(rdev, start)) start = rdev->recovery_offset; rcu_read_unlock(); @@ -9358,12 +9363,8 @@ void md_do_sync(struct md_thread *thread) test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < mddev->curr_resync) + if (mddev->delta_disks >= 0 && + rdev_needs_recovery(rdev, mddev->curr_resync)) rdev->recovery_offset = mddev->curr_resync; rcu_read_unlock(); } From b7ee30f0efd12f42735ae233071015389407966c Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Sat, 16 Aug 2025 08:25:34 +0800 Subject: [PATCH 0405/1307] md: fix sync_action incorrect display during resync During raid resync, if a disk becomes faulty, the operation is briefly interrupted. The MD_RECOVERY_RECOVER flag triggered by the disk failure causes sync_action to incorrectly show "recover" instead of "resync". The same issue affects reshape operations. Reproduction steps: mdadm -Cv /dev/md1 -l1 -n4 -e1.2 /dev/sd{a..d} // -> resync happened mdadm -f /dev/md1 /dev/sda // -> resync interrupted cat sync_action -> recover Add progress checks in md_sync_action() for resync/recover/reshape to ensure the interface correctly reports the actual operation type. Fixes: 4b10a3bc67c1 ("md: ensure resync is prioritized over recovery") Signed-off-by: Zheng Qixing Link: https://lore.kernel.org/linux-raid/20250816002534.1754356-3-zhengqixing@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index abd327ade4bd..1baaf52c603c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4848,9 +4848,33 @@ static bool rdev_needs_recovery(struct md_rdev *rdev, sector_t sectors) rdev->recovery_offset < sectors; } +static enum sync_action md_get_active_sync_action(struct mddev *mddev) +{ + struct md_rdev *rdev; + bool is_recover = false; + + if (mddev->resync_offset < MaxSector) + return ACTION_RESYNC; + + if (mddev->reshape_position != MaxSector) + return ACTION_RESHAPE; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (rdev_needs_recovery(rdev, MaxSector)) { + is_recover = true; + break; + } + } + rcu_read_unlock(); + + return is_recover ? ACTION_RECOVER : ACTION_IDLE; +} + enum sync_action md_sync_action(struct mddev *mddev) { unsigned long recovery = mddev->recovery; + enum sync_action active_action; /* * frozen has the highest priority, means running sync_thread will be @@ -4874,8 +4898,17 @@ enum sync_action md_sync_action(struct mddev *mddev) !test_bit(MD_RECOVERY_NEEDED, &recovery)) return ACTION_IDLE; - if (test_bit(MD_RECOVERY_RESHAPE, &recovery) || - mddev->reshape_position != MaxSector) + /* + * Check if any sync operation (resync/recover/reshape) is + * currently active. This ensures that only one sync operation + * can run at a time. Returns the type of active operation, or + * ACTION_IDLE if none are active. + */ + active_action = md_get_active_sync_action(mddev); + if (active_action != ACTION_IDLE) + return active_action; + + if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) return ACTION_RESHAPE; if (test_bit(MD_RECOVERY_RECOVER, &recovery)) From 715c7a36d59f54162a26fac1d1ed8dc087a24cf1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Aug 2025 12:43:23 -0700 Subject: [PATCH 0406/1307] selftests: tls: make the new data_steal test less flaky The CI has hit a couple of cases of: RUN global.data_steal ... tls.c:2762:data_steal:Expected recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT) (20000) == -1 (-1) data_steal: Test terminated by timeout FAIL global.data_steal Looks like the 2msec sleep is not long enough. Make the sleep longer, and then instead of second sleep wait for the thieving process to exit. That way we can be sure it called recv() before us. While at it also avoid trying to steal more than a record, this seems to be causing issues in manual testing as well. Fixes: d7e82594a45c ("selftests: tls: test TCP stealing data from under the TLS socket") Link: https://patch.msgid.link/20250814194323.2014650-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index d8cfcf9bb825..2b8387a83bc7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2748,17 +2748,18 @@ TEST(data_steal) { pid = fork(); ASSERT_GE(pid, 0); if (!pid) { - EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), - sizeof(buf)); + EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL), + sizeof(buf) / 2); exit(!__test_passed(_metadata)); } - usleep(2000); + usleep(10000); ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); - usleep(2000); + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); /* Don't check errno, the error will be different depending * on what random bytes TLS interpreted as the record length. @@ -2766,9 +2767,6 @@ TEST(data_steal) { close(fd); close(cfd); - - EXPECT_EQ(wait(&status), pid); - EXPECT_EQ(status, 0); } static void __attribute__((constructor)) fips_check(void) { From 39ca24675b7e351b8e681d924f417e455d4a7fc1 Mon Sep 17 00:00:00 2001 From: "Avizrat, Yaron" Date: Thu, 14 Aug 2025 10:44:07 +0000 Subject: [PATCH 0407/1307] MAINTAINERS: Change habanalabs maintainers I will be leaving Intel soon, Koby Elbaz & Konstantin Sinyuk will take the role of habanalabs driver maintainers. Signed-off-by: Yaron Avizrat Reviewed-by: Koby Elbaz Reviewed-by: Konstantin Sinyuk Link: https://patch.msgid.link/DM4PR11MB55491ACAA33DF29CEF3C67DAE935A@DM4PR11MB5549.namprd11.prod.outlook.com [lukas: wrap to 72 chars] Signed-off-by: Lukas Wunner --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 20ffd334e0a7..433fc4e5e68e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10666,7 +10666,8 @@ S: Maintained F: block/partitions/efi.* HABANALABS PCI DRIVER -M: Yaron Avizrat +M: Koby Elbaz +M: Konstantin Sinyuk L: dri-devel@lists.freedesktop.org S: Supported C: irc://irc.oftc.net/dri-devel From a44458dfd5bc0c79c6739c3f4c658361d3a5126b Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 21 Aug 2024 01:10:28 +0200 Subject: [PATCH 0408/1307] accel/habanalabs/gaudi2: Use kvfree() for memory allocated with kvcalloc() Use kvfree() to fix the following Coccinelle/coccicheck warning reported by kfree_mismatch.cocci: WARNING kvmalloc is used to allocate this memory at line 10398 Fixes: f728c17fc97a ("accel/habanalabs/gaudi2: move HMMU page tables to device memory") Reported-by: Qianfeng Rong Closes: https://patch.msgid.link/20250808085530.233737-1-rongqianfeng@vivo.com Signed-off-by: Thorsten Blum [lukas: acknowledge Qianfeng, adjust Thorsten's domain, add Fixes tag] Signed-off-by: Lukas Wunner Reviewed-by: Tomer Tayar Cc: stable@vger.kernel.org # v6.9+ Link: https://patch.msgid.link/20240820231028.136126-1-thorsten.blum@toblux.com --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index a38b88baadf2..5722e4128d3c 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -10437,7 +10437,7 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64); WREG32(sob_addr, 0); - kfree(lin_dma_pkts_arr); + kvfree(lin_dma_pkts_arr); return rc; } From 8a6ededaad2d2dcaac8e545bffee1073dca9db95 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 13 Aug 2025 12:16:06 +0300 Subject: [PATCH 0409/1307] iio: adc: bd79124: Add GPIOLIB dependency The bd79124 has ADC inputs which can be muxed to be GPIOs. The driver supports this by registering a GPIO-chip for channels which aren't used as ADC. The Kconfig entry does not handle the dependency to GPIOLIB, which causes errors: ERROR: modpost: "devm_gpiochip_add_data_with_key" [drivers/iio/adc/rohm-bd79124.ko] undefined! ERROR: modpost: "gpiochip_get_data" [drivers/iio/adc/rohm-bd79124.ko] undefined! at linking phase if GPIOLIB is not configured to be used. Fix this by adding dependency to the GPIOLIB. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508131533.5sSkq80B-lkp@intel.com/ Fixes: 3f57a3b9ab74 ("iio: adc: Support ROHM BD79124 ADC") Signed-off-by: Matti Vaittinen Reviewed-by: Bartosz Golaszewski Link: https://patch.msgid.link/6837249bddf358924e67566293944506206d2d62.1755076369.git.mazziesaccount@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 6de2abad0197..24f2572c487e 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -1300,7 +1300,7 @@ config RN5T618_ADC config ROHM_BD79124 tristate "Rohm BD79124 ADC driver" - depends on I2C + depends on I2C && GPIOLIB select REGMAP_I2C select IIO_ADC_HELPER help From d1dfcdd30140c031ae091868fb5bed084132bca1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Aug 2025 17:50:14 +0200 Subject: [PATCH 0410/1307] pcmcia: omap_cf: Mark driver struct with __refdata to prevent section mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As described in the added code comment, a reference to .exit.text is ok for drivers registered via platform_driver_probe(). Make this explicit to prevent the following section mismatch warning WARNING: modpost: drivers/pcmcia/omap_cf: section mismatch in reference: omap_cf_driver+0x4 (section: .data) -> omap_cf_remove (section: .exit.text) that triggers on an omap1_defconfig + CONFIG_OMAP_CF=m build. Signed-off-by: Geert Uytterhoeven Acked-by: Aaro Koskinen Reviewed-by: Uwe Kleine-König Signed-off-by: Dominik Brodowski --- drivers/pcmcia/omap_cf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c index 1b1dff56ec7b..733777367c3b 100644 --- a/drivers/pcmcia/omap_cf.c +++ b/drivers/pcmcia/omap_cf.c @@ -302,7 +302,13 @@ static void __exit omap_cf_remove(struct platform_device *pdev) kfree(cf); } -static struct platform_driver omap_cf_driver = { +/* + * omap_cf_remove() lives in .exit.text. For drivers registered via + * platform_driver_probe() this is ok because they cannot get unbound at + * runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. + */ +static struct platform_driver omap_cf_driver __refdata = { .driver = { .name = driver_name, }, From 44822df89e8f3386871d9cad563ece8e2fd8f0e7 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 12 Aug 2025 15:25:09 +0800 Subject: [PATCH 0411/1307] pcmcia: Fix a NULL pointer dereference in __iodyn_find_io_region() In __iodyn_find_io_region(), pcmcia_make_resource() is assigned to res and used in pci_bus_alloc_resource(). There is a dereference of res in pci_bus_alloc_resource(), which could lead to a NULL pointer dereference on failure of pcmcia_make_resource(). Fix this bug by adding a check of res. Cc: stable@vger.kernel.org Fixes: 49b1153adfe1 ("pcmcia: move all pcmcia_resource_ops providers into one module") Signed-off-by: Ma Ke Signed-off-by: Dominik Brodowski --- drivers/pcmcia/rsrc_iodyn.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pcmcia/rsrc_iodyn.c b/drivers/pcmcia/rsrc_iodyn.c index b04b16496b0c..2677b577c1f8 100644 --- a/drivers/pcmcia/rsrc_iodyn.c +++ b/drivers/pcmcia/rsrc_iodyn.c @@ -62,6 +62,9 @@ static struct resource *__iodyn_find_io_region(struct pcmcia_socket *s, unsigned long min = base; int ret; + if (!res) + return NULL; + data.mask = align - 1; data.offset = base & data.mask; From b04e4551893fb8a06106a175ed7055d41a9279c4 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 11 Aug 2025 13:32:31 -0500 Subject: [PATCH 0412/1307] iio: adc: ad7380: fix missing max_conversion_rate_hz on adaq4381-4 Add max_conversion_rate_hz to the chip info for "adaq4381-4". Without this, the driver fails to probe because it tries to set the initial sample rate to 0 Hz, which is not valid. Fixes: bbeaec81a03e ("iio: ad7380: add support for SPI offload") Signed-off-by: David Lechner Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250811-iio-adc-ad7380-fix-missing-max_conversion_rate_hs-on-ad4381-4-v1-1-ffb728d7a71c@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index 6f7034b6c266..fa251dc1aae6 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -873,6 +873,7 @@ static const struct ad7380_chip_info adaq4381_4_chip_info = { .has_hardware_gain = true, .available_scan_masks = ad7380_4_channel_scan_masks, .timing_specs = &ad7380_4_timing, + .max_conversion_rate_hz = 4 * MEGA, }; static const struct spi_offload_config ad7380_offload_config = { From ce0e8efb8438469aedb94746603a66e2de91852b Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 11 Aug 2025 09:04:37 +0700 Subject: [PATCH 0413/1307] pcmcia: ds: Emphasize "really" epizeuxis pcmcia_dev_present() comment writes about small timeframe where the function returns NULL (racy condition), in epizeuxis device. In the past, it tripped contributors who misunderstood it for "really" duplicate ([1], [2]). Add comma between "really" words to emphasize the epizeuxis. Link: https://lore.kernel.org/all/YxT1As38WRZcyH0%2F@shine.dominikbrodowski.net/ [1] Link: https://lore.kernel.org/all/20220831083623.GA25639@isilmar-4.linta.de/ [2] Signed-off-by: Bagas Sanjaya Signed-off-by: Dominik Brodowski --- drivers/pcmcia/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index da6f66f357cc..18f4eef28dbc 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -1308,7 +1308,7 @@ static int pcmcia_bus_early_resume(struct pcmcia_socket *skt) * physically present, even if the call to this function returns * non-NULL. Furthermore, the device driver most likely is unbound * almost immediately, so the timeframe where pcmcia_dev_present - * returns NULL is probably really really small. + * returns NULL is probably really, really small. */ struct pcmcia_device *pcmcia_dev_present(struct pcmcia_device *_p_dev) { From 4bf1541e41d0540f9fcce8a32424ab05ae26fdca Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 30 Jun 2025 11:32:56 +0200 Subject: [PATCH 0414/1307] pcmcia: remove PCCARD_IODYN The config PCCARD_IODYN was last used in the config option PCMCIA_M8XX with its m8xx_pcmcia driver. This driver was removed with commit 39eb56da2b53 ("pcmcia: Remove m8xx_pcmcia driver"), included in v3.17, back in 2014. Since then, the config PCCARD_IODYN is unused. Remove the config option, the corresponding file included with this config and the corresponding definition in the pcmcia header file. Signed-off-by: Lukas Bulwahn Signed-off-by: Dominik Brodowski --- drivers/pcmcia/Kconfig | 3 - drivers/pcmcia/Makefile | 1 - drivers/pcmcia/rsrc_iodyn.c | 171 ------------------------------------ include/pcmcia/ss.h | 8 +- 4 files changed, 1 insertion(+), 182 deletions(-) delete mode 100644 drivers/pcmcia/rsrc_iodyn.c diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index dddb235dd020..660a95805524 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -250,7 +250,4 @@ config ELECTRA_CF config PCCARD_NONSTATIC bool -config PCCARD_IODYN - bool - endif # PCCARD diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile index c9d51b150682..d16a0317ce43 100644 --- a/drivers/pcmcia/Makefile +++ b/drivers/pcmcia/Makefile @@ -12,7 +12,6 @@ obj-$(CONFIG_PCMCIA) += pcmcia.o pcmcia_rsrc-y += rsrc_mgr.o pcmcia_rsrc-$(CONFIG_PCCARD_NONSTATIC) += rsrc_nonstatic.o -pcmcia_rsrc-$(CONFIG_PCCARD_IODYN) += rsrc_iodyn.o obj-$(CONFIG_PCCARD) += pcmcia_rsrc.o diff --git a/drivers/pcmcia/rsrc_iodyn.c b/drivers/pcmcia/rsrc_iodyn.c deleted file mode 100644 index 2677b577c1f8..000000000000 --- a/drivers/pcmcia/rsrc_iodyn.c +++ /dev/null @@ -1,171 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * rsrc_iodyn.c -- Resource management routines for MEM-static sockets. - * - * The initial developer of the original code is David A. Hinds - * . Portions created by David A. Hinds - * are Copyright (C) 1999 David A. Hinds. All Rights Reserved. - * - * (C) 1999 David A. Hinds - */ - -#include -#include -#include - -#include -#include -#include "cs_internal.h" - - -struct pcmcia_align_data { - unsigned long mask; - unsigned long offset; -}; - -static resource_size_t pcmcia_align(void *align_data, - const struct resource *res, - resource_size_t size, resource_size_t align) -{ - struct pcmcia_align_data *data = align_data; - resource_size_t start; - - start = (res->start & ~data->mask) + data->offset; - if (start < res->start) - start += data->mask + 1; - -#ifdef CONFIG_X86 - if (res->flags & IORESOURCE_IO) { - if (start & 0x300) - start = (start + 0x3ff) & ~0x3ff; - } -#endif - -#ifdef CONFIG_M68K - if (res->flags & IORESOURCE_IO) { - if ((res->start + size - 1) >= 1024) - start = res->end; - } -#endif - - return start; -} - - -static struct resource *__iodyn_find_io_region(struct pcmcia_socket *s, - unsigned long base, int num, - unsigned long align) -{ - struct resource *res = pcmcia_make_resource(0, num, IORESOURCE_IO, - dev_name(&s->dev)); - struct pcmcia_align_data data; - unsigned long min = base; - int ret; - - if (!res) - return NULL; - - data.mask = align - 1; - data.offset = base & data.mask; - -#ifdef CONFIG_PCI - if (s->cb_dev) { - ret = pci_bus_alloc_resource(s->cb_dev->bus, res, num, 1, - min, 0, pcmcia_align, &data); - } else -#endif - ret = allocate_resource(&ioport_resource, res, num, min, ~0UL, - 1, pcmcia_align, &data); - - if (ret != 0) { - kfree(res); - res = NULL; - } - return res; -} - -static int iodyn_find_io(struct pcmcia_socket *s, unsigned int attr, - unsigned int *base, unsigned int num, - unsigned int align, struct resource **parent) -{ - int i, ret = 0; - - /* Check for an already-allocated window that must conflict with - * what was asked for. It is a hack because it does not catch all - * potential conflicts, just the most obvious ones. - */ - for (i = 0; i < MAX_IO_WIN; i++) { - if (!s->io[i].res) - continue; - - if (!*base) - continue; - - if ((s->io[i].res->start & (align-1)) == *base) - return -EBUSY; - } - - for (i = 0; i < MAX_IO_WIN; i++) { - struct resource *res = s->io[i].res; - unsigned int try; - - if (res && (res->flags & IORESOURCE_BITS) != - (attr & IORESOURCE_BITS)) - continue; - - if (!res) { - if (align == 0) - align = 0x10000; - - res = s->io[i].res = __iodyn_find_io_region(s, *base, - num, align); - if (!res) - return -EINVAL; - - *base = res->start; - s->io[i].res->flags = - ((res->flags & ~IORESOURCE_BITS) | - (attr & IORESOURCE_BITS)); - s->io[i].InUse = num; - *parent = res; - return 0; - } - - /* Try to extend top of window */ - try = res->end + 1; - if ((*base == 0) || (*base == try)) { - if (adjust_resource(s->io[i].res, res->start, - resource_size(res) + num)) - continue; - *base = try; - s->io[i].InUse += num; - *parent = res; - return 0; - } - - /* Try to extend bottom of window */ - try = res->start - num; - if ((*base == 0) || (*base == try)) { - if (adjust_resource(s->io[i].res, - res->start - num, - resource_size(res) + num)) - continue; - *base = try; - s->io[i].InUse += num; - *parent = res; - return 0; - } - } - - return -EINVAL; -} - - -struct pccard_resource_ops pccard_iodyn_ops = { - .validate_mem = NULL, - .find_io = iodyn_find_io, - .find_mem = NULL, - .init = static_init, - .exit = NULL, -}; -EXPORT_SYMBOL(pccard_iodyn_ops); diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 7cf7dbbfa131..89aed99bfeae 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -227,12 +227,8 @@ struct pcmcia_socket { /* socket drivers must define the resource operations type they use. There - * are three options: + * are two options: * - pccard_static_ops iomem and ioport areas are assigned statically - * - pccard_iodyn_ops iomem areas is assigned statically, ioport - * areas dynamically - * If this option is selected, use - * "select PCCARD_IODYN" in Kconfig. * - pccard_nonstatic_ops iomem and ioport areas are assigned dynamically. * If this option is selected, use * "select PCCARD_NONSTATIC" in Kconfig. @@ -240,13 +236,11 @@ struct pcmcia_socket { */ extern struct pccard_resource_ops pccard_static_ops; #if defined(CONFIG_PCMCIA) || defined(CONFIG_PCMCIA_MODULE) -extern struct pccard_resource_ops pccard_iodyn_ops; extern struct pccard_resource_ops pccard_nonstatic_ops; #else /* If PCMCIA is not used, but only CARDBUS, these functions are not used * at all. Therefore, do not use the large (240K!) rsrc_nonstatic module */ -#define pccard_iodyn_ops pccard_static_ops #define pccard_nonstatic_ops pccard_static_ops #endif From 9e1ee333631a6d2b5f4ed9449ee2d595b6a87b81 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 11 Apr 2025 10:44:29 +0200 Subject: [PATCH 0415/1307] pcmcia: Use str_off_on() and str_yes_no() helpers Remove hard-coded strings by using the str_off_on() and str_yes_no() helper functions. Signed-off-by: Thorsten Blum Signed-off-by: Dominik Brodowski --- drivers/pcmcia/socket_sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pcmcia/socket_sysfs.c b/drivers/pcmcia/socket_sysfs.c index c7a906664c36..4eadd0485066 100644 --- a/drivers/pcmcia/socket_sysfs.c +++ b/drivers/pcmcia/socket_sysfs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -98,7 +99,7 @@ static ssize_t pccard_show_card_pm_state(struct device *dev, char *buf) { struct pcmcia_socket *s = to_socket(dev); - return sysfs_emit(buf, "%s\n", s->state & SOCKET_SUSPEND ? "off" : "on"); + return sysfs_emit(buf, "%s\n", str_off_on(s->state & SOCKET_SUSPEND)); } static ssize_t pccard_store_card_pm_state(struct device *dev, @@ -177,7 +178,7 @@ static ssize_t pccard_show_resource(struct device *dev, struct device_attribute *attr, char *buf) { struct pcmcia_socket *s = to_socket(dev); - return sysfs_emit(buf, "%s\n", s->resource_setup_done ? "yes" : "no"); + return sysfs_emit(buf, "%s\n", str_yes_no(s->resource_setup_done)); } static ssize_t pccard_store_resource(struct device *dev, From ecef14f70ec9344a10c817248d2ac6cddee5921e Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Thu, 20 Mar 2025 14:39:56 +0800 Subject: [PATCH 0416/1307] pcmcia: omap: Add missing check for platform_get_resource Add missing check for platform_get_resource() and return error if it fails to catch the error. Fixes: d87d44f7ab35 ("ARM: omap1: move CF chipselect setup to board file") Signed-off-by: Chen Ni Signed-off-by: Dominik Brodowski --- drivers/pcmcia/omap_cf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c index 733777367c3b..d6f24c7d1562 100644 --- a/drivers/pcmcia/omap_cf.c +++ b/drivers/pcmcia/omap_cf.c @@ -215,6 +215,8 @@ static int __init omap_cf_probe(struct platform_device *pdev) return -EINVAL; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; cf = kzalloc(sizeof *cf, GFP_KERNEL); if (!cf) From 750da5029fd914b647d3063dacdadf56b9a9a046 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 3 Mar 2025 01:01:25 +0000 Subject: [PATCH 0417/1307] pcmcia: cs: Remove unused pcmcia_get_socket_by_nr The last use of pcmcia_get_socket_by_nr() was removed in 2010 by commit 5716d415f8c5 ("pcmcia: remove obsolete ioctl") Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Dominik Brodowski --- drivers/pcmcia/cs.c | 17 ----------------- drivers/pcmcia/cs_internal.h | 1 - 2 files changed, 18 deletions(-) diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index c75f55e1250a..adbc486af2ea 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -229,23 +229,6 @@ void pcmcia_unregister_socket(struct pcmcia_socket *socket) EXPORT_SYMBOL(pcmcia_unregister_socket); -struct pcmcia_socket *pcmcia_get_socket_by_nr(unsigned int nr) -{ - struct pcmcia_socket *s; - - down_read(&pcmcia_socket_list_rwsem); - list_for_each_entry(s, &pcmcia_socket_list, socket_list) - if (s->sock == nr) { - up_read(&pcmcia_socket_list_rwsem); - return s; - } - up_read(&pcmcia_socket_list_rwsem); - - return NULL; - -} -EXPORT_SYMBOL(pcmcia_get_socket_by_nr); - static int socket_reset(struct pcmcia_socket *skt) { int status, i; diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 02a83ca44e77..5ac810ffda31 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -116,7 +116,6 @@ extern struct list_head pcmcia_socket_list; extern const struct class pcmcia_socket_class; int pccard_register_pcmcia(struct pcmcia_socket *s, struct pcmcia_callback *c); -struct pcmcia_socket *pcmcia_get_socket_by_nr(unsigned int nr); void pcmcia_parse_uevents(struct pcmcia_socket *socket, unsigned int events); #define PCMCIA_UEVENT_EJECT 0x0001 From 4a81f78caa53e0633cf311ca1526377d9bff7479 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Mon, 20 Jan 2025 21:10:06 +0800 Subject: [PATCH 0418/1307] pcmcia: Add error handling for add_interval() in do_validate_mem() In the do_validate_mem(), the call to add_interval() does not handle errors. If kmalloc() fails in add_interval(), it could result in a null pointer being inserted into the linked list, leading to illegal memory access when sub_interval() is called next. This patch adds an error handling for the add_interval(). If add_interval() returns an error, the function will return early with the error code. Fixes: 7b4884ca8853 ("pcmcia: validate late-added resources") Signed-off-by: Wentao Liang Signed-off-by: Dominik Brodowski --- drivers/pcmcia/rsrc_nonstatic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c index bf9d070a4496..da494fe451ba 100644 --- a/drivers/pcmcia/rsrc_nonstatic.c +++ b/drivers/pcmcia/rsrc_nonstatic.c @@ -375,7 +375,9 @@ static int do_validate_mem(struct pcmcia_socket *s, if (validate && !s->fake_cis) { /* move it to the validated data set */ - add_interval(&s_data->mem_db_valid, base, size); + ret = add_interval(&s_data->mem_db_valid, base, size); + if (ret) + return ret; sub_interval(&s_data->mem_db, base, size); } From a3c6eabe3bbd6b0e7124d68b2d3bc32fed17362e Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Sun, 10 Aug 2025 15:33:27 +0300 Subject: [PATCH 0419/1307] iio: adc: rzg2l: Cleanup suspend/resume path There is no need to manually track the runtime PM status in the driver. The pm_runtime_force_suspend() and pm_runtime_force_resume() functions already call pm_runtime_status_suspended() to check the runtime PM state. Additionally, avoid calling pm_runtime_put_autosuspend() during the suspend/resume path, as this would decrease the usage counter of a potential user that had the ADC open before the suspend/resume cycle. Fixes: 563cf94f9329 ("iio: adc: rzg2l_adc: Add suspend/resume support") Reviewed-by: Ulf Hansson Reviewed-by: Lad Prabhakar Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20250810123328.800104-2-claudiu.beznea.uj@bp.renesas.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rzg2l_adc.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c index 9674d48074c9..0cb5a67fd497 100644 --- a/drivers/iio/adc/rzg2l_adc.c +++ b/drivers/iio/adc/rzg2l_adc.c @@ -89,7 +89,6 @@ struct rzg2l_adc { struct completion completion; struct mutex lock; u16 last_val[RZG2L_ADC_MAX_CHANNELS]; - bool was_rpm_active; }; /** @@ -541,14 +540,9 @@ static int rzg2l_adc_suspend(struct device *dev) }; int ret; - if (pm_runtime_suspended(dev)) { - adc->was_rpm_active = false; - } else { - ret = pm_runtime_force_suspend(dev); - if (ret) - return ret; - adc->was_rpm_active = true; - } + ret = pm_runtime_force_suspend(dev); + if (ret) + return ret; ret = reset_control_bulk_assert(ARRAY_SIZE(resets), resets); if (ret) @@ -557,9 +551,7 @@ static int rzg2l_adc_suspend(struct device *dev) return 0; rpm_restore: - if (adc->was_rpm_active) - pm_runtime_force_resume(dev); - + pm_runtime_force_resume(dev); return ret; } @@ -577,11 +569,9 @@ static int rzg2l_adc_resume(struct device *dev) if (ret) return ret; - if (adc->was_rpm_active) { - ret = pm_runtime_force_resume(dev); - if (ret) - goto resets_restore; - } + ret = pm_runtime_force_resume(dev); + if (ret) + goto resets_restore; ret = rzg2l_adc_hw_init(dev, adc); if (ret) @@ -590,10 +580,7 @@ static int rzg2l_adc_resume(struct device *dev) return 0; rpm_restore: - if (adc->was_rpm_active) { - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - } + pm_runtime_force_suspend(dev); resets_restore: reset_control_bulk_assert(ARRAY_SIZE(resets), resets); return ret; From c69e13965f26b8058f538ea8bdbd2d7718cf1fbe Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Sun, 10 Aug 2025 15:33:28 +0300 Subject: [PATCH 0420/1307] iio: adc: rzg2l_adc: Set driver data before enabling runtime PM When stress-testing the system by repeatedly unbinding and binding the ADC device in a loop, and the ADC is a supplier for another device (e.g., a thermal hardware block that reads temperature through the ADC), it may happen that the ADC device is runtime-resumed immediately after runtime PM is enabled, triggered by its consumer. At this point, since drvdata is not yet set and the driver's runtime PM callbacks rely on it, a crash can occur. To avoid this, set drvdata just after it was allocated. Fixes: 89ee8174e8c8 ("iio: adc: rzg2l_adc: Simplify the runtime PM code") Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20250810123328.800104-3-claudiu.beznea.uj@bp.renesas.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rzg2l_adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c index 0cb5a67fd497..cadb0446bc29 100644 --- a/drivers/iio/adc/rzg2l_adc.c +++ b/drivers/iio/adc/rzg2l_adc.c @@ -427,6 +427,8 @@ static int rzg2l_adc_probe(struct platform_device *pdev) if (!indio_dev) return -ENOMEM; + platform_set_drvdata(pdev, indio_dev); + adc = iio_priv(indio_dev); adc->hw_params = device_get_match_data(dev); @@ -459,8 +461,6 @@ static int rzg2l_adc_probe(struct platform_device *pdev) if (ret) return ret; - platform_set_drvdata(pdev, indio_dev); - ret = rzg2l_adc_hw_init(dev, adc); if (ret) return dev_err_probe(&pdev->dev, ret, From 433b99e922943efdfd62b9a8e3ad1604838181f2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sat, 2 Aug 2025 17:44:21 +0100 Subject: [PATCH 0421/1307] iio: light: as73211: Ensure buffer holes are zeroed Given that the buffer is copied to a kfifo that ultimately user space can read, ensure we zero it. Fixes: 403e5586b52e ("iio: light: as73211: New driver") Reviewed-by: Matti Vaittinen Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250802164436.515988-2-jic23@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/as73211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/as73211.c b/drivers/iio/light/as73211.c index 68f60dc3c79d..32719f584c47 100644 --- a/drivers/iio/light/as73211.c +++ b/drivers/iio/light/as73211.c @@ -639,7 +639,7 @@ static irqreturn_t as73211_trigger_handler(int irq __always_unused, void *p) struct { __le16 chan[4]; aligned_s64 ts; - } scan; + } scan = { }; int data_result, ret; mutex_lock(&data->mutex); From 5eb4b9a4cdbb70d70377fe8fb2920b75910e5024 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 13 Aug 2025 15:21:59 +0200 Subject: [PATCH 0422/1307] params: Replace deprecated strcpy() with strscpy() and memcpy() strcpy() is deprecated; use strscpy() and memcpy() instead. In param_set_copystring(), we can safely use memcpy() because we already know the length of the source string 'val' and that it is guaranteed to be NUL-terminated within the first 'kps->maxlen' bytes. Link: https://github.com/KSPP/linux/issues/88 Signed-off-by: Thorsten Blum Reviewed-by: Daniel Gomez Reviewed-by: Petr Pavlu Link: https://lore.kernel.org/r/20250813132200.184064-2-thorsten.blum@linux.dev Signed-off-by: Daniel Gomez --- kernel/params.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index b92d64161b75..b96cfd693c99 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -513,13 +513,14 @@ EXPORT_SYMBOL(param_array_ops); int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; + const size_t len = strnlen(val, kps->maxlen); - if (strnlen(val, kps->maxlen) == kps->maxlen) { + if (len == kps->maxlen) { pr_err("%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); return -ENOSPC; } - strcpy(kps->string, val); + memcpy(kps->string, val, len + 1); return 0; } EXPORT_SYMBOL(param_set_copystring); @@ -841,7 +842,7 @@ static void __init param_sysfs_builtin(void) dot = strchr(kp->name, '.'); if (!dot) { /* This happens for core_param() */ - strcpy(modname, "kernel"); + strscpy(modname, "kernel"); name_len = 0; } else { name_len = dot - kp->name + 1; From 0f580d5d3d9d9cd0953695cd32e43aac3a946338 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 16 Aug 2025 22:42:15 +0200 Subject: [PATCH 0423/1307] rust: alloc: fix `rusttest` by providing `Cmalloc::aligned_layout` too Commit fde578c86281 ("rust: alloc: replace aligned_size() with Kmalloc::aligned_layout()") provides a public `aligned_layout` function in `Kamlloc`, but not in `Cmalloc`, and thus uses of it will trigger an error in `rusttest`. Such a user appeared in the following commit 22ab0641b939 ("rust: drm: ensure kmalloc() compatible Layout"): error[E0599]: no function or associated item named `aligned_layout` found for struct `alloc::allocator_test::Cmalloc` in the current scope --> rust/kernel/drm/device.rs:100:31 | 100 | let layout = Kmalloc::aligned_layout(Layout::new::()); | ^^^^^^^^^^^^^^ function or associated item not found in `Cmalloc` | ::: rust/kernel/alloc/allocator_test.rs:19:1 | 19 | pub struct Cmalloc; | ------------------ function or associated item `aligned_layout` not found for this struct Thus add an equivalent one for `Cmalloc`. Fixes: fde578c86281 ("rust: alloc: replace aligned_size() with Kmalloc::aligned_layout()") Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250816204215.2719559-1-ojeda@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/allocator_test.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index a3074480bd8d..90dd987d40e4 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -22,6 +22,17 @@ pub type Vmalloc = Kmalloc; pub type KVmalloc = Kmalloc; +impl Cmalloc { + /// Returns a [`Layout`] that makes [`Kmalloc`] fulfill the requested size and alignment of + /// `layout`. + pub fn aligned_layout(layout: Layout) -> Layout { + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of + // `layout.align()` which together with the slab guarantees means that `Kmalloc` will return + // a properly aligned object (see comments in `kmalloc()` for more information). + layout.pad_to_align() + } +} + extern "C" { #[link_name = "aligned_alloc"] fn libc_aligned_alloc(align: usize, size: usize) -> *mut crate::ffi::c_void; From a3dc32c635bae0ae569f489e00de0e8f015bfc25 Mon Sep 17 00:00:00 2001 From: Zenm Chen Date: Thu, 14 Aug 2025 00:24:15 +0800 Subject: [PATCH 0424/1307] USB: storage: Ignore driver CD mode for Realtek multi-mode Wi-Fi dongles Many Realtek USB Wi-Fi dongles released in recent years have two modes: one is driver CD mode which has Windows driver onboard, another one is Wi-Fi mode. Add the US_FL_IGNORE_DEVICE quirk for these multi-mode devices. Otherwise, usb_modeswitch may fail to switch them to Wi-Fi mode. Currently there are only two USB IDs known to be used by these multi-mode Wi-Fi dongles: 0bda:1a2b and 0bda:a192. Information about Mercury MW310UH in /sys/kernel/debug/usb/devices. T: Bus=02 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 12 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=a192 Rev= 2.00 S: Manufacturer=Realtek S: Product=DISK C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0b(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Information about D-Link AX9U rev. A1 in /sys/kernel/debug/usb/devices. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 55 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=1a2b Rev= 0.00 S: Manufacturer=Realtek S: Product=DISK C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Cc: stable Signed-off-by: Zenm Chen Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250813162415.2630-1-zenmchen@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index bee9f1e8003d..dfa5276a5a43 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1501,6 +1501,28 @@ UNUSUAL_DEV( 0x0bc2, 0x3332, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_WP_DETECT ), +/* + * Reported by Zenm Chen + * Ignore driver CD mode, otherwise usb_modeswitch may fail to switch + * the device into Wi-Fi mode. + */ +UNUSUAL_DEV( 0x0bda, 0x1a2b, 0x0000, 0xffff, + "Realtek", + "DISK", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_DEVICE ), + +/* + * Reported by Zenm Chen + * Ignore driver CD mode, otherwise usb_modeswitch may fail to switch + * the device into Wi-Fi mode. + */ +UNUSUAL_DEV( 0x0bda, 0xa192, 0x0000, 0xffff, + "Realtek", + "DISK", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_DEVICE ), + UNUSUAL_DEV( 0x0d49, 0x7310, 0x0000, 0x9999, "Maxtor", "USB to SATA", From 45eae113dccaf8e502090ecf5b3d9e9b805add6f Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Fri, 8 Aug 2025 18:23:05 +0530 Subject: [PATCH 0425/1307] usb: dwc3: Remove WARN_ON for device endpoint command timeouts This commit addresses a rarely observed endpoint command timeout which causes kernel panic due to warn when 'panic_on_warn' is enabled and unnecessary call trace prints when 'panic_on_warn' is disabled. It is seen during fast software-controlled connect/disconnect testcases. The following is one such endpoint command timeout that we observed: 1. Connect ======= ->dwc3_thread_interrupt ->dwc3_ep0_interrupt ->configfs_composite_setup ->composite_setup ->usb_ep_queue ->dwc3_gadget_ep0_queue ->__dwc3_gadget_ep0_queue ->__dwc3_ep0_do_control_data ->dwc3_send_gadget_ep_cmd 2. Disconnect ========== ->dwc3_thread_interrupt ->dwc3_gadget_disconnect_interrupt ->dwc3_ep0_reset_state ->dwc3_ep0_end_control_data ->dwc3_send_gadget_ep_cmd In the issue scenario, in Exynos platforms, we observed that control transfers for the previous connect have not yet been completed and end transfer command sent as a part of the disconnect sequence and processing of USB_ENDPOINT_HALT feature request from the host timeout. This maybe an expected scenario since the controller is processing EP commands sent as a part of the previous connect. It maybe better to remove WARN_ON in all places where device endpoint commands are sent to avoid unnecessary kernel panic due to warn. Cc: stable Co-developed-by: Akash M Signed-off-by: Akash M Signed-off-by: Selvarasu Ganesan Acked-by: Thinh Nguyen Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250808125315.1607-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 20 ++++++++++++++++---- drivers/usb/dwc3/gadget.c | 10 ++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 666ac432f52d..b4229aa13f37 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -288,7 +288,9 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 8, DWC3_TRBCTL_CONTROL_SETUP, false); ret = dwc3_ep0_start_trans(dep); - WARN_ON(ret < 0); + if (ret < 0) + dev_err(dwc->dev, "ep0 out start transfer failed: %d\n", ret); + for (i = 2; i < DWC3_ENDPOINTS_NUM; i++) { struct dwc3_ep *dwc3_ep; @@ -1061,7 +1063,9 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, ret = dwc3_ep0_start_trans(dep); } - WARN_ON(ret < 0); + if (ret < 0) + dev_err(dwc->dev, + "ep0 data phase start transfer failed: %d\n", ret); } static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) @@ -1078,7 +1082,12 @@ static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep) { - WARN_ON(dwc3_ep0_start_control_status(dep)); + int ret; + + ret = dwc3_ep0_start_control_status(dep); + if (ret) + dev_err(dwc->dev, + "ep0 status phase start transfer failed: %d\n", ret); } static void dwc3_ep0_do_control_status(struct dwc3 *dwc, @@ -1121,7 +1130,10 @@ void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep) cmd |= DWC3_DEPCMD_PARAM(dep->resource_index); memset(¶ms, 0, sizeof(params)); ret = dwc3_send_gadget_ep_cmd(dep, cmd, ¶ms); - WARN_ON_ONCE(ret); + if (ret) + dev_err_ratelimited(dwc->dev, + "ep0 data phase end transfer failed: %d\n", ret); + dep->resource_index = 0; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 68fa2813e5f4..554f997eb8c4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1772,7 +1772,11 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int dep->flags |= DWC3_EP_DELAY_STOP; return 0; } - WARN_ON_ONCE(ret); + + if (ret) + dev_err_ratelimited(dep->dwc->dev, + "end transfer failed: %d\n", ret); + dep->resource_index = 0; if (!interrupt) @@ -4048,7 +4052,9 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc) dep->flags &= ~DWC3_EP_STALL; ret = dwc3_send_clear_stall_ep_cmd(dep); - WARN_ON_ONCE(ret); + if (ret) + dev_err_ratelimited(dwc->dev, + "failed to clear STALL on %s\n", dep->name); } } From cabb6c5f4d9e7f49bdf8c0a13c74bd93ee35f45a Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Fri, 15 Aug 2025 11:31:51 -0700 Subject: [PATCH 0426/1307] usb: typec: maxim_contaminant: disable low power mode when reading comparator values Low power mode is enabled when reading CC resistance as part of `max_contaminant_read_resistance_kohm()` and left in that state. However, it's supposed to work with 1uA current source. To read CC comparator values current source is changed to 80uA. This causes a storm of CC interrupts as it (falsely) detects a potential contaminant. To prevent this, disable low power mode current sourcing before reading comparator values. Fixes: 02b332a06397 ("usb: typec: maxim_contaminant: Implement check_contaminant callback") Cc: stable Signed-off-by: Amit Sunil Dhamne Reviewed-by: Badhri Jagan Sridharan Rule: add Link: https://lore.kernel.org/stable/20250814-fix-upstream-contaminant-v1-1-801ce8089031%40google.com Link: https://lore.kernel.org/r/20250815-fix-upstream-contaminant-v2-1-6c8d6c3adafb@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/maxim_contaminant.c | 5 +++++ drivers/usb/typec/tcpm/tcpci_maxim.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/usb/typec/tcpm/maxim_contaminant.c b/drivers/usb/typec/tcpm/maxim_contaminant.c index 0cdda06592fd..818cfe226ac7 100644 --- a/drivers/usb/typec/tcpm/maxim_contaminant.c +++ b/drivers/usb/typec/tcpm/maxim_contaminant.c @@ -188,6 +188,11 @@ static int max_contaminant_read_comparators(struct max_tcpci_chip *chip, u8 *ven if (ret < 0) return ret; + /* Disable low power mode */ + ret = regmap_update_bits(regmap, TCPC_VENDOR_CC_CTRL2, CCLPMODESEL, + FIELD_PREP(CCLPMODESEL, + LOW_POWER_MODE_DISABLE)); + /* Sleep to allow comparators settle */ usleep_range(5000, 6000); ret = regmap_update_bits(regmap, TCPC_TCPC_CTRL, TCPC_TCPC_CTRL_ORIENTATION, PLUG_ORNT_CC1); diff --git a/drivers/usb/typec/tcpm/tcpci_maxim.h b/drivers/usb/typec/tcpm/tcpci_maxim.h index 76270d5c2838..b33540a42a95 100644 --- a/drivers/usb/typec/tcpm/tcpci_maxim.h +++ b/drivers/usb/typec/tcpm/tcpci_maxim.h @@ -21,6 +21,7 @@ #define CCOVPDIS BIT(6) #define SBURPCTRL BIT(5) #define CCLPMODESEL GENMASK(4, 3) +#define LOW_POWER_MODE_DISABLE 0 #define ULTRA_LOW_POWER_MODE 1 #define CCRPCTRL GENMASK(2, 0) #define UA_1_SRC 1 From a381c6d6f646226924809d0ad01a9465786da463 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Fri, 15 Aug 2025 11:31:52 -0700 Subject: [PATCH 0427/1307] usb: typec: maxim_contaminant: re-enable cc toggle if cc is open and port is clean Presently in `max_contaminant_is_contaminant()` if there's no contaminant detected previously, CC is open & stopped toggling and no contaminant is currently present, TCPC.RC would be programmed to do DRP toggling. However, it didn't actively look for a connection. This would lead to Type-C not detect *any* new connections. Hence, in the above situation, re-enable toggling & program TCPC to look for a new connection. Also, return early if TCPC was looking for connection as this indicates TCPC has neither detected a potential connection nor a change in contaminant state. In addition, once dry detection is complete (port is dry), restart toggling. Fixes: 02b332a06397e ("usb: typec: maxim_contaminant: Implement check_contaminant callback") Cc: stable Signed-off-by: Amit Sunil Dhamne Reviewed-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20250815-fix-upstream-contaminant-v2-2-6c8d6c3adafb@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/maxim_contaminant.c | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/usb/typec/tcpm/maxim_contaminant.c b/drivers/usb/typec/tcpm/maxim_contaminant.c index 818cfe226ac7..af8da6dc60ae 100644 --- a/drivers/usb/typec/tcpm/maxim_contaminant.c +++ b/drivers/usb/typec/tcpm/maxim_contaminant.c @@ -329,6 +329,39 @@ static int max_contaminant_enable_dry_detection(struct max_tcpci_chip *chip) return 0; } +static int max_contaminant_enable_toggling(struct max_tcpci_chip *chip) +{ + struct regmap *regmap = chip->data.regmap; + int ret; + + /* Disable dry detection if enabled. */ + ret = regmap_update_bits(regmap, TCPC_VENDOR_CC_CTRL2, CCLPMODESEL, + FIELD_PREP(CCLPMODESEL, + LOW_POWER_MODE_DISABLE)); + if (ret) + return ret; + + ret = regmap_update_bits(regmap, TCPC_VENDOR_CC_CTRL1, CCCONNDRY, 0); + if (ret) + return ret; + + ret = max_tcpci_write8(chip, TCPC_ROLE_CTRL, TCPC_ROLE_CTRL_DRP | + FIELD_PREP(TCPC_ROLE_CTRL_CC1, + TCPC_ROLE_CTRL_CC_RD) | + FIELD_PREP(TCPC_ROLE_CTRL_CC2, + TCPC_ROLE_CTRL_CC_RD)); + if (ret) + return ret; + + ret = regmap_update_bits(regmap, TCPC_TCPC_CTRL, + TCPC_TCPC_CTRL_EN_LK4CONN_ALRT, + TCPC_TCPC_CTRL_EN_LK4CONN_ALRT); + if (ret) + return ret; + + return max_tcpci_write8(chip, TCPC_COMMAND, TCPC_CMD_LOOK4CONNECTION); +} + bool max_contaminant_is_contaminant(struct max_tcpci_chip *chip, bool disconnect_while_debounce, bool *cc_handled) { @@ -345,6 +378,12 @@ bool max_contaminant_is_contaminant(struct max_tcpci_chip *chip, bool disconnect if (ret < 0) return false; + if (cc_status & TCPC_CC_STATUS_TOGGLING) { + if (chip->contaminant_state == DETECTED) + return true; + return false; + } + if (chip->contaminant_state == NOT_DETECTED || chip->contaminant_state == SINK) { if (!disconnect_while_debounce) msleep(100); @@ -377,6 +416,12 @@ bool max_contaminant_is_contaminant(struct max_tcpci_chip *chip, bool disconnect max_contaminant_enable_dry_detection(chip); return true; } + + ret = max_contaminant_enable_toggling(chip); + if (ret) + dev_err(chip->dev, + "Failed to enable toggling, ret=%d", + ret); } } else if (chip->contaminant_state == DETECTED) { if (!(cc_status & TCPC_CC_STATUS_TOGGLING)) { @@ -384,6 +429,14 @@ bool max_contaminant_is_contaminant(struct max_tcpci_chip *chip, bool disconnect if (chip->contaminant_state == DETECTED) { max_contaminant_enable_dry_detection(chip); return true; + } else { + ret = max_contaminant_enable_toggling(chip); + if (ret) { + dev_err(chip->dev, + "Failed to enable toggling, ret=%d", + ret); + return true; + } } } } From ba6cc29351b1fa0cb9adce91b88b9f3c3cbe9c46 Mon Sep 17 00:00:00 2001 From: Charalampos Mitrodimas Date: Sat, 16 Aug 2025 14:14:37 +0000 Subject: [PATCH 0428/1307] debugfs: fix mount options not being applied Mount options (uid, gid, mode) are silently ignored when debugfs is mounted. This is a regression introduced during the conversion to the new mount API. When the mount API conversion was done, the parsed options were never applied to the superblock when it was reused. As a result, the mount options were ignored when debugfs was mounted. Fix this by following the same pattern as the tracefs fix in commit e4d32142d1de ("tracing: Fix tracefs mount options"). Call debugfs_reconfigure() in debugfs_get_tree() to apply the mount options to the superblock after it has been created or reused. As an example, with the bug the "mode" mount option is ignored: $ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test $ mount | grep debugfs_test debugfs on /tmp/debugfs_test type debugfs (rw,relatime) $ ls -ld /tmp/debugfs_test drwx------ 25 root root 0 Aug 4 14:16 /tmp/debugfs_test With the fix applied, it works as expected: $ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test $ mount | grep debugfs_test debugfs on /tmp/debugfs_test type debugfs (rw,relatime,mode=666) $ ls -ld /tmp/debugfs_test drw-rw-rw- 37 root root 0 Aug 2 17:28 /tmp/debugfs_test Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220406 Cc: stable Reviewed-by: Eric Sandeen Signed-off-by: Charalampos Mitrodimas Link: https://lore.kernel.org/r/20250816-debugfs-mount-opts-v3-1-d271dad57b5b@posteo.net Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index a0357b0cf362..c12d649df6a5 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -183,6 +183,9 @@ static int debugfs_reconfigure(struct fs_context *fc) struct debugfs_fs_info *sb_opts = sb->s_fs_info; struct debugfs_fs_info *new_opts = fc->s_fs_info; + if (!new_opts) + return 0; + sync_filesystem(sb); /* structure copy of new mount options to sb */ @@ -282,10 +285,16 @@ static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc) static int debugfs_get_tree(struct fs_context *fc) { + int err; + if (!(debugfs_allow & DEBUGFS_ALLOW_API)) return -EPERM; - return get_tree_single(fc, debugfs_fill_super); + err = get_tree_single(fc, debugfs_fill_super); + if (err) + return err; + + return debugfs_reconfigure(fc); } static void debugfs_free_fc(struct fs_context *fc) From d49172bbd7eb07e4ba5e52238eaa9caf692c1cea Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 14 Aug 2025 21:27:29 +0200 Subject: [PATCH 0429/1307] Documentation: clarify the expected collaboration with security bugs reporters Some bug reports sent to the security team sometimes lack any explanation, are only AI-generated without verification, or sometimes it can simply be difficult to have a conversation with an invisible reporter belonging to an opaque team. This fortunately remains rare but the trend has been steadily increasing over the last years and it seems important to clarify what developers expect from reporters to avoid frustration on any side and keep the process efficient. Signed-off-by: Willy Tarreau Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250814192730.19252-1-w@1wt.eu Signed-off-by: Greg Kroah-Hartman --- Documentation/process/security-bugs.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 56c560a00b37..7dcc034d3df8 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -19,6 +19,16 @@ that can speed up the process considerably. It is possible that the security team will bring in extra help from area maintainers to understand and fix the security vulnerability. +The security team and maintainers almost always require additional +information beyond what was initially provided in a report and rely on +active and efficient collaboration with the reporter to perform further +testing (e.g., verifying versions, configuration options, mitigations, or +patches). Before contacting the security team, the reporter must ensure +they are available to explain their findings, engage in discussions, and +run additional tests. Reports where the reporter does not respond promptly +or cannot effectively discuss their findings may be abandoned if the +communication does not quickly improve. + As it is with any bug, the more information provided the easier it will be to diagnose and fix. Please review the procedure outlined in 'Documentation/admin-guide/reporting-issues.rst' if you are unclear about what From 3a68841d1d9b6eb32b2652bbb83acd17d5eb9135 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 14 Aug 2025 21:27:30 +0200 Subject: [PATCH 0430/1307] Documentation: smooth the text flow in the security bug reporting process The text was presenting the team, the the e-mail address, then some of the expectations, then what form of e-mail is expected. By switching the e-mail paragraph two paragraphs later and dropping the "Contact" sub-section, we can have a more natural flow that presents the team, then its expectation, then how to best contribute, then where to send. And more importantly, it increases the chances that reporters have read the prerequisites before finding the e-mail address. Signed-off-by: Willy Tarreau Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250814192730.19252-2-w@1wt.eu Signed-off-by: Greg Kroah-Hartman --- Documentation/process/security-bugs.rst | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 7dcc034d3df8..84657e7d2e5b 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -8,17 +8,6 @@ like to know when a security bug is found so that it can be fixed and disclosed as quickly as possible. Please report security bugs to the Linux kernel security team. -Contact -------- - -The Linux kernel security team can be contacted by email at -. This is a private list of security officers -who will help verify the bug report and develop and release a fix. -If you already have a fix, please include it with your report, as -that can speed up the process considerably. It is possible that the -security team will bring in extra help from area maintainers to -understand and fix the security vulnerability. - The security team and maintainers almost always require additional information beyond what was initially provided in a report and rely on active and efficient collaboration with the reporter to perform further @@ -36,6 +25,14 @@ information is helpful. Any exploit code is very helpful and will not be released without consent from the reporter unless it has already been made public. +The Linux kernel security team can be contacted by email at +. This is a private list of security officers +who will help verify the bug report and develop and release a fix. +If you already have a fix, please include it with your report, as +that can speed up the process considerably. It is possible that the +security team will bring in extra help from area maintainers to +understand and fix the security vulnerability. + Please send plain text emails without attachments where possible. It is much harder to have a context-quoted discussion about a complex issue if all the details are hidden away in attachments. Think of it like a From cfd956dcb101aa3d25bac321fae923323a47c607 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Fri, 15 Aug 2025 13:33:28 +0200 Subject: [PATCH 0431/1307] tty: hvc_console: Call hvc_kick in hvc_write unconditionally After hvc_write completes, call hvc_kick also in the case the output buffer has been drained, to ensure tty_wakeup gets called. This fixes that functions which wait for a drained buffer got stuck occasionally. Cc: stable Closes: https://bugzilla.opensuse.org/show_bug.cgi?id=1230062 Signed-off-by: Fabian Vogt Link: https://lore.kernel.org/r/2011735.PYKUYFuaPT@fvogt-thinkpad Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_console.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index cd1f657f782d..13c663a154c4 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -543,10 +543,10 @@ static ssize_t hvc_write(struct tty_struct *tty, const u8 *buf, size_t count) } /* - * Racy, but harmless, kick thread if there is still pending data. + * Kick thread to flush if there's still pending data + * or to wakeup the write queue. */ - if (hp->n_outbuf) - hvc_kick(); + hvc_kick(); return written; } From e5f48bfa2ae0806d5f51fb8061afc619a73599a7 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:28 +0800 Subject: [PATCH 0432/1307] drm/hisilicon/hibmc: fix the i2c device resource leak when vdac init failed Currently the driver missed to clean the i2c adapter when vdac init failed. It may cause resource leak. Fixes: a0d078d06e516 ("drm/hisilicon: Features to support reading resolutions from EDID") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-2-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 1 + drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c | 5 +++++ drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c | 11 ++++++++--- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 274feabe7df0..ca8502e2760c 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -69,6 +69,7 @@ int hibmc_de_init(struct hibmc_drm_private *priv); int hibmc_vdac_init(struct hibmc_drm_private *priv); int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); +void hibmc_ddc_del(struct hibmc_vdac *vdac); int hibmc_dp_init(struct hibmc_drm_private *priv); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index 99b3b77b5445..44860011855e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -95,3 +95,8 @@ int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *vdac) return i2c_bit_add_bus(&vdac->adapter); } + +void hibmc_ddc_del(struct hibmc_vdac *vdac) +{ + i2c_del_adapter(&vdac->adapter); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index e8a527ede854..841e81f47b68 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -53,7 +53,7 @@ static void hibmc_connector_destroy(struct drm_connector *connector) { struct hibmc_vdac *vdac = to_hibmc_vdac(connector); - i2c_del_adapter(&vdac->adapter); + hibmc_ddc_del(vdac); drm_connector_cleanup(connector); } @@ -110,7 +110,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) ret = drmm_encoder_init(dev, encoder, NULL, DRM_MODE_ENCODER_DAC, NULL); if (ret) { drm_err(dev, "failed to init encoder: %d\n", ret); - return ret; + goto err; } drm_encoder_helper_add(encoder, &hibmc_encoder_helper_funcs); @@ -121,7 +121,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) &vdac->adapter); if (ret) { drm_err(dev, "failed to init connector: %d\n", ret); - return ret; + goto err; } drm_connector_helper_add(connector, &hibmc_connector_helper_funcs); @@ -131,4 +131,9 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; return 0; + +err: + hibmc_ddc_del(vdac); + + return ret; } From 8bed4ec42a4e0dc8113172696ff076d1eb6d8bcb Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:30 +0800 Subject: [PATCH 0433/1307] drm/hisilicon/hibmc: fix irq_request()'s irq name variable is local The local variable is passed in request_irq (), and there will be use after free problem, which will make request_irq failed. Using the global irq name instead of it to fix. Fixes: b11bc1ae4658 ("drm/hisilicon/hibmc: Add MSI irq getting and requesting for HPD") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-4-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 768b97f9e74a..4cdcc34070ee 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -32,7 +32,7 @@ DEFINE_DRM_GEM_FOPS(hibmc_fops); -static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "vblank", "hpd" }; +static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "hibmc-vblank", "hibmc-hpd" }; static irqreturn_t hibmc_interrupt(int irq, void *arg) { @@ -277,7 +277,6 @@ static void hibmc_unload(struct drm_device *dev) static int hibmc_msi_init(struct drm_device *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - char name[32] = {0}; int valid_irq_num; int irq; int ret; @@ -292,9 +291,6 @@ static int hibmc_msi_init(struct drm_device *dev) valid_irq_num = ret; for (int i = 0; i < valid_irq_num; i++) { - snprintf(name, ARRAY_SIZE(name) - 1, "%s-%s-%s", - dev->driver->name, pci_name(pdev), g_irqs_names_map[i]); - irq = pci_irq_vector(pdev, i); if (i) @@ -302,10 +298,10 @@ static int hibmc_msi_init(struct drm_device *dev) ret = devm_request_threaded_irq(&pdev->dev, irq, hibmc_dp_interrupt, hibmc_dp_hpd_isr, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); else ret = devm_request_irq(&pdev->dev, irq, hibmc_interrupt, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); if (ret) { drm_err(dev, "install irq failed: %d\n", ret); return ret; From 93a08f856fcc5aaeeecad01f71bef3088588216a Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:31 +0800 Subject: [PATCH 0434/1307] drm/hisilicon/hibmc: fix the hibmc loaded failed bug When hibmc loaded failed, the driver use hibmc_unload to free the resource, but the mutexes in mode.config are not init, which will access an NULL pointer. Just change goto statement to return, because hibnc_hw_init() doesn't need to free anything. Fixes: b3df5e65cc03 ("drm/hibmc: Drop drm_vblank_cleanup") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-5-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 4cdcc34070ee..ac552c339671 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -319,13 +319,13 @@ static int hibmc_load(struct drm_device *dev) ret = hibmc_hw_init(priv); if (ret) - goto err; + return ret; ret = drmm_vram_helper_init(dev, pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if (ret) { drm_err(dev, "Error initializing VRAM MM; %d\n", ret); - goto err; + return ret; } ret = hibmc_kms_init(priv); From 9f98b429ba67d430b873e06bcfb90afa22888978 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:32 +0800 Subject: [PATCH 0435/1307] drm/hisilicon/hibmc: fix rare monitors cannot display problem In some case, the dp link training success at 8.1Gbps, but the sink's maximum supported rate is less than 8.1G. So change the default 8.1Gbps link rate to the rate that reads from devices' capabilities. Fixes: 54063d86e036 ("drm/hisilicon/hibmc: add dp link moduel in hibmc drivers") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-6-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c index 74f7832ea53e..0726cb5b736e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -325,6 +325,17 @@ static int hibmc_dp_link_downgrade_training_eq(struct hibmc_dp_dev *dp) return hibmc_dp_link_reduce_rate(dp); } +static void hibmc_dp_update_caps(struct hibmc_dp_dev *dp) +{ + dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; + if (dp->link.cap.link_rate > DP_LINK_BW_8_1 || !dp->link.cap.link_rate) + dp->link.cap.link_rate = DP_LINK_BW_8_1; + + dp->link.cap.lanes = dp->dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK; + if (dp->link.cap.lanes > HIBMC_DP_LANE_NUM_MAX) + dp->link.cap.lanes = HIBMC_DP_LANE_NUM_MAX; +} + int hibmc_dp_link_training(struct hibmc_dp_dev *dp) { struct hibmc_dp_link *link = &dp->link; @@ -334,8 +345,7 @@ int hibmc_dp_link_training(struct hibmc_dp_dev *dp) if (ret) drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); - dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; - dp->link.cap.lanes = 0x2; + hibmc_dp_update_caps(dp); ret = hibmc_dp_get_serdes_rate_cfg(dp); if (ret < 0) From 3271faf42d135bcf569c3ff6af55c21858eec212 Mon Sep 17 00:00:00 2001 From: Baihan Li Date: Wed, 13 Aug 2025 17:42:34 +0800 Subject: [PATCH 0436/1307] drm/hisilicon/hibmc: fix dp and vga cannot show together If VGA and DP connected together, there will be only one can get crtc. Add encoder possible_clones to support two connectors enable. Fixes: 3c7623fb5bb6 ("drm/hisilicon/hibmc: Enable this hot plug detect of irq feature") Signed-off-by: Baihan Li Signed-off-by: Yongbang Shi Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250813094238.3722345-8-shiyongbang@huawei.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index ac552c339671..289304500ab0 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -115,6 +115,8 @@ static const struct drm_mode_config_funcs hibmc_mode_funcs = { static int hibmc_kms_init(struct hibmc_drm_private *priv) { struct drm_device *dev = &priv->dev; + struct drm_encoder *encoder; + u32 clone_mask = 0; int ret; ret = drmm_mode_config_init(dev); @@ -154,6 +156,12 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) return ret; } + drm_for_each_encoder(encoder, dev) + clone_mask |= drm_encoder_mask(encoder); + + drm_for_each_encoder(encoder, dev) + encoder->possible_clones = clone_mask; + return 0; } From c17b750b3ad9f45f2b6f7e6f7f4679844244f0b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Aug 2025 15:22:10 -0700 Subject: [PATCH 0437/1307] Linux 6.17-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6bfe776bf3c5..d1adb78c3596 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* From bac7b996d42e458a94578f4227795a0d4deef6fa Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 12 Aug 2025 18:45:46 +0200 Subject: [PATCH 0438/1307] smb: server: split ksmbd_rdma_stop_listening() out of ksmbd_rdma_destroy() We can't call destroy_workqueue(smb_direct_wq); before stop_sessions()! Otherwise already existing connections try to use smb_direct_wq as a NULL pointer. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 3 ++- fs/smb/server/transport_rdma.c | 5 ++++- fs/smb/server/transport_rdma.h | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 3f04a2977ba8..67c4f73398df 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -504,7 +504,8 @@ void ksmbd_conn_transport_destroy(void) { mutex_lock(&init_lock); ksmbd_tcp_destroy(); - ksmbd_rdma_destroy(); + ksmbd_rdma_stop_listening(); stop_sessions(); + ksmbd_rdma_destroy(); mutex_unlock(&init_lock); } diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8d366db5f605..5466aa8c39b1 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2194,7 +2194,7 @@ int ksmbd_rdma_init(void) return 0; } -void ksmbd_rdma_destroy(void) +void ksmbd_rdma_stop_listening(void) { if (!smb_direct_listener.cm_id) return; @@ -2203,7 +2203,10 @@ void ksmbd_rdma_destroy(void) rdma_destroy_id(smb_direct_listener.cm_id); smb_direct_listener.cm_id = NULL; +} +void ksmbd_rdma_destroy(void) +{ if (smb_direct_wq) { destroy_workqueue(smb_direct_wq); smb_direct_wq = NULL; diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h index 77aee4e5c9dc..a2291b77488a 100644 --- a/fs/smb/server/transport_rdma.h +++ b/fs/smb/server/transport_rdma.h @@ -54,13 +54,15 @@ struct smb_direct_data_transfer { #ifdef CONFIG_SMB_SERVER_SMBDIRECT int ksmbd_rdma_init(void); +void ksmbd_rdma_stop_listening(void); void ksmbd_rdma_destroy(void); bool ksmbd_rdma_capable_netdev(struct net_device *netdev); void init_smbd_max_io_size(unsigned int sz); unsigned int get_smbd_max_read_write_size(void); #else static inline int ksmbd_rdma_init(void) { return 0; } -static inline int ksmbd_rdma_destroy(void) { return 0; } +static inline void ksmbd_rdma_stop_listening(void) { } +static inline void ksmbd_rdma_destroy(void) { } static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } static inline void init_smbd_max_io_size(unsigned int sz) { } static inline unsigned int get_smbd_max_read_write_size(void) { return 0; } From c0d41112f1a5828c194b59cca953114bc3776ef2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Aug 2025 09:48:40 +0900 Subject: [PATCH 0439/1307] ksmbd: extend the connection limiting mechanism to support IPv6 Update the connection tracking logic to handle both IPv4 and IPv6 address families. Cc: stable@vger.kernel.org Fixes: e6bb91939740 ("ksmbd: limit repeated connections from clients with the same IP") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.h | 7 ++++++- fs/smb/server/transport_tcp.c | 26 +++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 31dd1caac1e8..2aa8084bb593 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -46,7 +46,12 @@ struct ksmbd_conn { struct mutex srv_mutex; int status; unsigned int cli_cap; - __be32 inet_addr; + union { + __be32 inet_addr; +#if IS_ENABLED(CONFIG_IPV6) + u8 inet6_addr[16]; +#endif + }; char *request_buf; struct ksmbd_transport *transport; struct nls_table *local_nls; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index b1df02e321b0..4337df97987d 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -85,7 +85,14 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return NULL; } +#if IS_ENABLED(CONFIG_IPV6) + if (client_sk->sk->sk_family == AF_INET6) + memcpy(&conn->inet6_addr, &client_sk->sk->sk_v6_daddr, 16); + else + conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr; +#else conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr; +#endif conn->transport = KSMBD_TRANS(t); KSMBD_TRANS(t)->conn = conn; KSMBD_TRANS(t)->ops = &ksmbd_tcp_transport_ops; @@ -229,7 +236,6 @@ static int ksmbd_kthread_fn(void *p) { struct socket *client_sk = NULL; struct interface *iface = (struct interface *)p; - struct inet_sock *csk_inet; struct ksmbd_conn *conn; int ret; @@ -252,13 +258,27 @@ static int ksmbd_kthread_fn(void *p) /* * Limits repeated connections from clients with the same IP. */ - csk_inet = inet_sk(client_sk->sk); down_read(&conn_list_lock); list_for_each_entry(conn, &conn_list, conns_list) - if (csk_inet->inet_daddr == conn->inet_addr) { +#if IS_ENABLED(CONFIG_IPV6) + if (client_sk->sk->sk_family == AF_INET6) { + if (memcmp(&client_sk->sk->sk_v6_daddr, + &conn->inet6_addr, 16) == 0) { + ret = -EAGAIN; + break; + } + } else if (inet_sk(client_sk->sk)->inet_daddr == + conn->inet_addr) { ret = -EAGAIN; break; } +#else + if (inet_sk(client_sk->sk)->inet_daddr == + conn->inet_addr) { + ret = -EAGAIN; + break; + } +#endif up_read(&conn_list_lock); if (ret == -EAGAIN) continue; From 89bb430f621124af39bb31763c4a8b504c9651e2 Mon Sep 17 00:00:00 2001 From: Ziyan Xu Date: Sat, 16 Aug 2025 10:20:05 +0900 Subject: [PATCH 0440/1307] ksmbd: fix refcount leak causing resource not released When ksmbd_conn_releasing(opinfo->conn) returns true,the refcount was not decremented properly, causing a refcount leak that prevents the count from reaching zero and the memory from being released. Cc: stable@vger.kernel.org Signed-off-by: Ziyan Xu Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index d7a8a580d013..a04d5702820d 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1102,8 +1102,10 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, if (!atomic_inc_not_zero(&opinfo->refcount)) continue; - if (ksmbd_conn_releasing(opinfo->conn)) + if (ksmbd_conn_releasing(opinfo->conn)) { + opinfo_put(opinfo); continue; + } oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); @@ -1139,8 +1141,11 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) if (!atomic_inc_not_zero(&opinfo->refcount)) continue; - if (ksmbd_conn_releasing(opinfo->conn)) + if (ksmbd_conn_releasing(opinfo->conn)) { + opinfo_put(opinfo); continue; + } + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); } @@ -1343,8 +1348,10 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, if (!atomic_inc_not_zero(&brk_op->refcount)) continue; - if (ksmbd_conn_releasing(brk_op->conn)) + if (ksmbd_conn_releasing(brk_op->conn)) { + opinfo_put(brk_op); continue; + } if (brk_op->is_lease && (brk_op->o_lease->state & (~(SMB2_LEASE_READ_CACHING_LE | From f52d6aa98379842fc255d93282655566f2114e0c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:48 +0300 Subject: [PATCH 0441/1307] drm/i915/lnl+/tc: Fix handling of an enabled/disconnected dp-alt sink The TypeC PHY HW readout during driver loading and system resume determines which TypeC mode the PHY is in (legacy/DP-alt/TBT-alt) and whether the PHY is connected, based on the PHY's Owned and Ready flags. For the PHY to be in DP-alt or legacy mode and for the PHY to be in the connected state in these modes, both the Owned (set by the BIOS/driver) and the Ready (set by the HW) flags should be set. On ICL-MTL the HW kept the PHY's Ready flag set after the driver connected the PHY by acquiring the PHY ownership (by setting the Owned flag), until the driver disconnected the PHY by releasing the PHY ownership (by clearing the Owned flag). On LNL+ this has changed, in that the HW clears the Ready flag as soon as the sink gets disconnected, even if the PHY ownership was acquired already and hence the PHY is being used by the display. When inheriting the HW state from BIOS for a PHY connected in DP-alt mode on which the sink got disconnected - i.e. in a case where the sink was connected while BIOS/GOP was running and so the sink got enabled connecting the PHY, but the user disconnected the sink by the time the driver loaded - the PHY Owned but not Ready state must be accounted for on LNL+ according to the above. Do that by assuming on LNL+ that the PHY is connected in DP-alt mode whenever the PHY Owned flag is set, regardless of the PHY Ready flag. This fixes a problem on LNL+, where the PHY TypeC mode / connected state was detected incorrectly for a DP-alt sink, which got connected and then disconnected by the user in the above way. v2: Rename tc_phy_in_legacy_or_dp_alt_mode() to tc_phy_owned_by_display(). (Luca, Jani) Cc: Jani Nikula Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Reviewed-by: Luca Coelho [Imre: Add one-liner function documentation for tc_phy_owned_by_display()] Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-2-imre.deak@intel.com (cherry picked from commit 89f4b196ee4b056e0e8c179b247b29d4a71a4e7e) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 3bc57579fe53..8208539bfe66 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1226,14 +1226,19 @@ static void tc_phy_get_hw_state(struct intel_tc_port *tc) tc->phy_ops->get_hw_state(tc); } -static bool tc_phy_is_ready_and_owned(struct intel_tc_port *tc, - bool phy_is_ready, bool phy_is_owned) +/* Is the PHY owned by display i.e. is it in legacy or DP-alt mode? */ +static bool tc_phy_owned_by_display(struct intel_tc_port *tc, + bool phy_is_ready, bool phy_is_owned) { struct intel_display *display = to_intel_display(tc->dig_port); - drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); + if (DISPLAY_VER(display) < 20) { + drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); - return phy_is_ready && phy_is_owned; + return phy_is_ready && phy_is_owned; + } else { + return phy_is_owned; + } } static bool tc_phy_is_connected(struct intel_tc_port *tc, @@ -1244,7 +1249,7 @@ static bool tc_phy_is_connected(struct intel_tc_port *tc, bool phy_is_owned = tc_phy_is_owned(tc); bool is_connected; - if (tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) + if (tc_phy_owned_by_display(tc, phy_is_ready, phy_is_owned)) is_connected = port_pll_type == ICL_PORT_DPLL_MG_PHY; else is_connected = port_pll_type == ICL_PORT_DPLL_DEFAULT; @@ -1352,7 +1357,7 @@ tc_phy_get_current_mode(struct intel_tc_port *tc) phy_is_ready = tc_phy_is_ready(tc); phy_is_owned = tc_phy_is_owned(tc); - if (!tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) { + if (!tc_phy_owned_by_display(tc, phy_is_ready, phy_is_owned)) { mode = get_tc_mode_in_phy_not_owned_state(tc, live_mode); } else { drm_WARN_ON(display->drm, live_mode == TC_PORT_TBT_ALT); From 5fd35236546abe780eaadb7561e09953719d4fc3 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:49 +0300 Subject: [PATCH 0442/1307] drm/i915/icl+/tc: Cache the max lane count value The PHY's pin assignment value in the TCSS_DDI_STATUS register - as set by the HW/FW based on the connected DP-alt sink's TypeC/PD pin assignment negotiation - gets cleared by the HW/FW on LNL+ as soon as the sink gets disconnected, even if the PHY ownership got acquired already by the driver (and hence the PHY itself is still connected and used by the display). This is similar to how the PHY Ready flag gets cleared on LNL+ in the same register. To be able to query the max lane count value on LNL+ - which is based on the above pin assignment - at all times even after the sink gets disconnected, the max lane count must be determined and cached during the PHY's HW readout and connect sequences. Do that here, leaving the actual use of the cached value to a follow-up change. v2: Don't read out the pin configuration if the PHY is disconnected. Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-3-imre.deak@intel.com (cherry picked from commit 3e32438fc406761f81b1928d210b3d2a5e7501a0) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 57 +++++++++++++++++++++---- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 8208539bfe66..34435c4fc280 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -66,6 +66,7 @@ struct intel_tc_port { enum tc_port_mode init_mode; enum phy_fia phy_fia; u8 phy_fia_idx; + u8 max_lane_count; }; static enum intel_display_power_domain @@ -365,12 +366,12 @@ static int intel_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) } } -int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) +static int get_max_lane_count(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - struct intel_tc_port *tc = to_tc_port(dig_port); + struct intel_display *display = to_intel_display(tc->dig_port); + struct intel_digital_port *dig_port = tc->dig_port; - if (!intel_encoder_is_tc(&dig_port->base) || tc->mode != TC_PORT_DP_ALT) + if (tc->mode != TC_PORT_DP_ALT) return 4; assert_tc_cold_blocked(tc); @@ -384,6 +385,21 @@ int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) return intel_tc_port_get_max_lane_count(dig_port); } +static void read_pin_configuration(struct intel_tc_port *tc) +{ + tc->max_lane_count = get_max_lane_count(tc); +} + +int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) +{ + struct intel_tc_port *tc = to_tc_port(dig_port); + + if (!intel_encoder_is_tc(&dig_port->base)) + return 4; + + return get_max_lane_count(tc); +} + void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, int required_lanes) { @@ -596,9 +612,12 @@ static void icl_tc_phy_get_hw_state(struct intel_tc_port *tc) tc_cold_wref = __tc_cold_block(tc, &domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + __tc_cold_unblock(tc, domain, tc_cold_wref); } @@ -656,8 +675,11 @@ static bool icl_tc_phy_connect(struct intel_tc_port *tc, tc->lock_wakeref = tc_cold_block(tc); - if (tc->mode == TC_PORT_TBT_ALT) + if (tc->mode == TC_PORT_TBT_ALT) { + read_pin_configuration(tc); + return true; + } if ((!tc_phy_is_ready(tc) || !icl_tc_phy_take_ownership(tc, true)) && @@ -668,6 +690,7 @@ static bool icl_tc_phy_connect(struct intel_tc_port *tc, goto out_unblock_tc_cold; } + read_pin_configuration(tc); if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_release_phy; @@ -858,9 +881,12 @@ static void adlp_tc_phy_get_hw_state(struct intel_tc_port *tc) port_wakeref = intel_display_power_get(display, port_power_domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + intel_display_power_put(display, port_power_domain, port_wakeref); } @@ -873,6 +899,9 @@ static bool adlp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) if (tc->mode == TC_PORT_TBT_ALT) { tc->lock_wakeref = tc_cold_block(tc); + + read_pin_configuration(tc); + return true; } @@ -894,6 +923,8 @@ static bool adlp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_unblock_tc_cold; @@ -1124,9 +1155,12 @@ static void xelpdp_tc_phy_get_hw_state(struct intel_tc_port *tc) tc_cold_wref = __tc_cold_block(tc, &domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + drm_WARN_ON(display->drm, (tc->mode == TC_PORT_DP_ALT || tc->mode == TC_PORT_LEGACY) && !xelpdp_tc_phy_tcss_power_is_enabled(tc)); @@ -1138,14 +1172,19 @@ static bool xelpdp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) { tc->lock_wakeref = tc_cold_block(tc); - if (tc->mode == TC_PORT_TBT_ALT) + if (tc->mode == TC_PORT_TBT_ALT) { + read_pin_configuration(tc); + return true; + } if (!xelpdp_tc_phy_enable_tcss_power(tc, true)) goto out_unblock_tccold; xelpdp_tc_phy_take_ownership(tc, true); + read_pin_configuration(tc); + if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_release_phy; From c87514a0bb0a64507412a2d98264060dc0c1562a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:50 +0300 Subject: [PATCH 0443/1307] drm/i915/lnl+/tc: Fix max lane count HW readout On LNL+ for a disconnected sink the pin assignment value gets cleared by the HW/FW as soon as the sink gets disconnected, even if the PHY ownership got acquired already by the BIOS/driver (and hence the PHY itself is still connected and used by the display). During HW readout this can result in detecting the PHY's max lane count as 0 - matching the above cleared aka NONE pin assignment HW state. For a connected PHY the driver in general (outside of intel_tc.c) expects the max lane count value to be valid for the video mode enabled on the corresponding output (1, 2 or 4). Ensure this by setting the max lane count to 4 in this case. Note, that it doesn't matter if this lane count happened to be more than the max lane count with which the PHY got connected and enabled, since the only thing the driver can do with such an output - where the DP-alt sink is disconnected - is to disable the output. v2: Rebased on change reading out the pin configuration only if the PHY is connected. Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-4-imre.deak@intel.com (cherry picked from commit 33cf70bc0fe760224f892bc1854a33665f27d482) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 34435c4fc280..3f9842040bb0 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -23,6 +23,7 @@ #include "intel_modeset_lock.h" #include "intel_tc.h" +#define DP_PIN_ASSIGNMENT_NONE 0x0 #define DP_PIN_ASSIGNMENT_C 0x3 #define DP_PIN_ASSIGNMENT_D 0x4 #define DP_PIN_ASSIGNMENT_E 0x5 @@ -308,6 +309,8 @@ static int lnl_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) REG_FIELD_GET(TCSS_DDI_STATUS_PIN_ASSIGNMENT_MASK, val); switch (pin_assignment) { + case DP_PIN_ASSIGNMENT_NONE: + return 0; default: MISSING_CASE(pin_assignment); fallthrough; @@ -1159,6 +1162,12 @@ static void xelpdp_tc_phy_get_hw_state(struct intel_tc_port *tc) tc->lock_wakeref = tc_cold_block(tc); read_pin_configuration(tc); + /* + * Set a valid lane count value for a DP-alt sink which got + * disconnected. The driver can only disable the output on this PHY. + */ + if (tc->max_lane_count == 0) + tc->max_lane_count = 4; } drm_WARN_ON(display->drm, From c5c2b4b3841666be3a45346d0ffa96b4b143504e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:51 +0300 Subject: [PATCH 0444/1307] drm/i915/lnl+/tc: Use the cached max lane count value Use the cached max lane count value on LNL+, to account for scenarios where this value is queried after the HW cleared the corresponding pin assignment value in the TCSS_DDI_STATUS register after the sink got disconnected. For consistency, follow-up changes will use the cached max lane count value on other platforms as well and will also cache the pin assignment value in a similar way. Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-5-imre.deak@intel.com (cherry picked from commit afc4e84388079f4d5ba05271632b7a4d8d85165c) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 3f9842040bb0..6a2442a0649e 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -395,12 +395,16 @@ static void read_pin_configuration(struct intel_tc_port *tc) int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) { + struct intel_display *display = to_intel_display(dig_port); struct intel_tc_port *tc = to_tc_port(dig_port); if (!intel_encoder_is_tc(&dig_port->base)) return 4; - return get_max_lane_count(tc); + if (DISPLAY_VER(display) < 20) + return get_max_lane_count(tc); + + return tc->max_lane_count; } void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, From d7fa5754e83cd36c4327eb2d806064e598a72ff6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 11 Aug 2025 11:01:52 +0300 Subject: [PATCH 0445/1307] drm/i915/icl+/tc: Convert AUX powered WARN to a debug message The BIOS can leave the AUX power well enabled on an output, even if this isn't required (on platforms where the AUX power is only needed for an AUX access). This was observed at least on PTL. To avoid the WARN which would be triggered by this during the HW readout, convert the WARN to a debug message. Cc: stable@vger.kernel.org # v6.8+ Reported-by: Charlton Lin Tested-by: Khaled Almahallawy Reviewed-by: Mika Kahola Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250811080152.906216-6-imre.deak@intel.com (cherry picked from commit 6cb52cba474b2bec1a3018d3dbf75292059a29a1) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 6a2442a0649e..668ef139391b 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1498,11 +1498,11 @@ static void intel_tc_port_reset_mode(struct intel_tc_port *tc, intel_display_power_flush_work(display); if (!intel_tc_cold_requires_aux_pw(dig_port)) { enum intel_display_power_domain aux_domain; - bool aux_powered; aux_domain = intel_aux_power_domain(dig_port); - aux_powered = intel_display_power_is_enabled(display, aux_domain); - drm_WARN_ON(display->drm, aux_powered); + if (intel_display_power_is_enabled(display, aux_domain)) + drm_dbg_kms(display->drm, "Port %s: AUX unexpectedly powered\n", + tc->port_name); } tc_phy_disconnect(tc); From 6347dc7fb967521a77f9ff0774d25ef0cca4c6cd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 11 Aug 2025 10:18:07 -0400 Subject: [PATCH 0446/1307] media: rkvdec: Fix a NULL vs IS_ERR() bug in probe() The iommu_paging_domain_alloc() function doesn't return NULL on error it returns error pointers. Update the check and then set ->empty_domain to NULL because the rest of the driver assumes it can be NULL. Fixes: ff8c5622f9f7 ("media: rkvdec: Restore iommu addresses on errors") Signed-off-by: Dan Carpenter Tested-by: Detlev Casanova Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/media/platform/rockchip/rkvdec/rkvdec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec.c b/drivers/media/platform/rockchip/rkvdec/rkvdec.c index d707088ec0dc..1b7f27e4d961 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec.c @@ -1162,8 +1162,10 @@ static int rkvdec_probe(struct platform_device *pdev) if (iommu_get_domain_for_dev(&pdev->dev)) { rkvdec->empty_domain = iommu_paging_domain_alloc(rkvdec->dev); - if (!rkvdec->empty_domain) + if (IS_ERR(rkvdec->empty_domain)) { + rkvdec->empty_domain = NULL; dev_warn(rkvdec->dev, "cannot alloc new empty domain\n"); + } } vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); From 0d58a72b66ec4c24128395e408348c2c84221605 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Jul 2025 20:24:44 +0200 Subject: [PATCH 0447/1307] media: rkvdec: Fix an error handling path in rkvdec_probe() If an error occurs after a successful iommu_paging_domain_alloc() call, it should be undone by a corresponding iommu_domain_free() call, as already done in the remove function. In order to fix the issue, move the corresponding call at the end of the function, because it is safe to allocate 'empty_domain' later. Fixes: ff8c5622f9f7 ("media: rkvdec: Restore iommu addresses on errors") Signed-off-by: Christophe JAILLET Reviewed-by: Nicolas Dufresne Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- .../media/platform/rockchip/rkvdec/rkvdec.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec.c b/drivers/media/platform/rockchip/rkvdec/rkvdec.c index 1b7f27e4d961..35265e321203 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec.c @@ -1159,15 +1159,6 @@ static int rkvdec_probe(struct platform_device *pdev) return ret; } - if (iommu_get_domain_for_dev(&pdev->dev)) { - rkvdec->empty_domain = iommu_paging_domain_alloc(rkvdec->dev); - - if (IS_ERR(rkvdec->empty_domain)) { - rkvdec->empty_domain = NULL; - dev_warn(rkvdec->dev, "cannot alloc new empty domain\n"); - } - } - vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); irq = platform_get_irq(pdev, 0); @@ -1190,6 +1181,15 @@ static int rkvdec_probe(struct platform_device *pdev) if (ret) goto err_disable_runtime_pm; + if (iommu_get_domain_for_dev(&pdev->dev)) { + rkvdec->empty_domain = iommu_paging_domain_alloc(rkvdec->dev); + + if (IS_ERR(rkvdec->empty_domain)) { + rkvdec->empty_domain = NULL; + dev_warn(rkvdec->dev, "cannot alloc new empty domain\n"); + } + } + return 0; err_disable_runtime_pm: From da3fa08a89dc1cb33ed57d097239b9c7cd9e7a60 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 11 Aug 2025 11:00:19 -0400 Subject: [PATCH 0448/1307] media: rkvdec: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Reviewed-by: Nicolas Dufresne Reviewed-by: Heiko Stuebner Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- drivers/media/platform/rockchip/rkvdec/rkvdec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkvdec/rkvdec.c b/drivers/media/platform/rockchip/rkvdec/rkvdec.c index 35265e321203..d3b31f461194 100644 --- a/drivers/media/platform/rockchip/rkvdec/rkvdec.c +++ b/drivers/media/platform/rockchip/rkvdec/rkvdec.c @@ -765,7 +765,6 @@ static void rkvdec_job_finish(struct rkvdec_ctx *ctx, { struct rkvdec_dev *rkvdec = ctx->dev; - pm_runtime_mark_last_busy(rkvdec->dev); pm_runtime_put_autosuspend(rkvdec->dev); rkvdec_job_finish_no_pm(ctx, result); } From a032fe30cf09b6723ab61a05aee057311b00f9e1 Mon Sep 17 00:00:00 2001 From: Dongcheng Yan Date: Fri, 25 Apr 2025 18:43:30 +0800 Subject: [PATCH 0449/1307] platform/x86: int3472: add hpd pin support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typically HDMI to MIPI CSI-2 bridges have a pin to signal image data is being received. On the host side this is wired to a GPIO for polling or interrupts. This includes the Lontium HDMI to MIPI CSI-2 bridges lt6911uxe and lt6911uxc. The GPIO "hpd" is used already by other HDMI to CSI-2 bridges, use it here as well. Signed-off-by: Dongcheng Yan Reviewed-by: Sakari Ailus Acked-by: Ilpo Järvinen Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Fixes: 20244cbafbd6 ("media: i2c: change lt6911uxe irq_gpio name to "hpd"") Cc: stable@vger.kernel.org Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/platform/x86/intel/int3472/discrete.c | 6 ++++++ include/linux/platform_data/x86/int3472.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 4c0aed6e626f..bdfb8a800c54 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -193,6 +193,10 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 *con_id = "privacy-led"; *gpio_flags = GPIO_ACTIVE_HIGH; break; + case INT3472_GPIO_TYPE_HOTPLUG_DETECT: + *con_id = "hpd"; + *gpio_flags = GPIO_ACTIVE_HIGH; + break; case INT3472_GPIO_TYPE_POWER_ENABLE: *con_id = "avdd"; *gpio_flags = GPIO_ACTIVE_HIGH; @@ -223,6 +227,7 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 * 0x0b Power enable * 0x0c Clock enable * 0x0d Privacy LED + * 0x13 Hotplug detect * * There are some known platform specific quirks where that does not quite * hold up; for example where a pin with type 0x01 (Power down) is mapped to @@ -292,6 +297,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, switch (type) { case INT3472_GPIO_TYPE_RESET: case INT3472_GPIO_TYPE_POWERDOWN: + case INT3472_GPIO_TYPE_HOTPLUG_DETECT: ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, con_id, gpio_flags); if (ret) err_msg = "Failed to map GPIO pin to sensor\n"; diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 78276a11c48d..1571e9157fa5 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -27,6 +27,7 @@ #define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c #define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d #define INT3472_GPIO_TYPE_HANDSHAKE 0x12 +#define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13 #define INT3472_PDEV_MAX_NAME_LEN 23 #define INT3472_MAX_SENSOR_GPIOS 3 From 6f6fbd9a0c5a75eee0618c1499cf73cc770b3f52 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 9 Jul 2025 22:53:48 +0300 Subject: [PATCH 0450/1307] media: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Reviewed-by: Laurent Pinchart Acked-by: Thierry Reding (tegra-vde/h264.c) Acked-by: Tommaso Merciai (alvium-csi2.c) Reviewed-by: Dikshita Agarwal (iris_hfi_queue.c) Reviewed-by: Sean Young Acked-by: Dave Stevenson (imx219.c) Acked-by: Benjamin Mugnier Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/i2c/alvium-csi2.c | 1 - drivers/media/i2c/ccs/ccs-core.c | 7 +------ drivers/media/i2c/dw9768.c | 1 - drivers/media/i2c/gc0308.c | 3 --- drivers/media/i2c/gc2145.c | 3 --- drivers/media/i2c/imx219.c | 2 -- drivers/media/i2c/imx283.c | 3 --- drivers/media/i2c/imx290.c | 3 --- drivers/media/i2c/imx296.c | 1 - drivers/media/i2c/imx415.c | 1 - drivers/media/i2c/mt9m114.c | 6 ------ drivers/media/i2c/ov4689.c | 3 --- drivers/media/i2c/ov5640.c | 4 ---- drivers/media/i2c/ov5645.c | 3 --- drivers/media/i2c/ov64a40.c | 7 +------ drivers/media/i2c/ov8858.c | 2 -- drivers/media/i2c/st-mipid02.c | 2 -- drivers/media/i2c/tc358746.c | 5 ----- drivers/media/i2c/thp7312.c | 4 ---- drivers/media/i2c/vd55g1.c | 4 ---- drivers/media/i2c/vd56g3.c | 4 ---- drivers/media/i2c/video-i2c.c | 4 ---- drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c | 4 ---- drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c | 5 ----- drivers/media/platform/nvidia/tegra-vde/h264.c | 2 -- drivers/media/platform/qcom/iris/iris_hfi_queue.c | 1 - drivers/media/platform/raspberrypi/pisp_be/pisp_be.c | 2 -- drivers/media/platform/verisilicon/hantro_drv.c | 1 - drivers/media/rc/gpio-ir-recv.c | 4 +--- 29 files changed, 3 insertions(+), 89 deletions(-) diff --git a/drivers/media/i2c/alvium-csi2.c b/drivers/media/i2c/alvium-csi2.c index 05b708bd0a64..1f088acecf36 100644 --- a/drivers/media/i2c/alvium-csi2.c +++ b/drivers/media/i2c/alvium-csi2.c @@ -1841,7 +1841,6 @@ static int alvium_s_stream(struct v4l2_subdev *sd, int enable) } else { alvium_set_stream_mipi(alvium, enable); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); } diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index 487bcabb4a19..1c889c878abd 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -787,10 +787,8 @@ static int ccs_set_ctrl(struct v4l2_ctrl *ctrl) rval = -EINVAL; } - if (pm_status > 0) { - pm_runtime_mark_last_busy(&client->dev); + if (pm_status > 0) pm_runtime_put_autosuspend(&client->dev); - } return rval; } @@ -1914,7 +1912,6 @@ static int ccs_set_stream(struct v4l2_subdev *subdev, int enable) if (!enable) { ccs_stop_streaming(sensor); sensor->streaming = false; - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return 0; @@ -1929,7 +1926,6 @@ static int ccs_set_stream(struct v4l2_subdev *subdev, int enable) rval = ccs_start_streaming(sensor); if (rval < 0) { sensor->streaming = false; - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); } @@ -2677,7 +2673,6 @@ nvm_show(struct device *dev, struct device_attribute *attr, char *buf) return -ENODEV; } - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); /* diff --git a/drivers/media/i2c/dw9768.c b/drivers/media/i2c/dw9768.c index 3a4d100b9199..d434721ba8ed 100644 --- a/drivers/media/i2c/dw9768.c +++ b/drivers/media/i2c/dw9768.c @@ -374,7 +374,6 @@ static int dw9768_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh) static int dw9768_close(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh) { - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_autosuspend(sd->dev); return 0; diff --git a/drivers/media/i2c/gc0308.c b/drivers/media/i2c/gc0308.c index 069f42785b3c..cbcda0e18ff1 100644 --- a/drivers/media/i2c/gc0308.c +++ b/drivers/media/i2c/gc0308.c @@ -974,7 +974,6 @@ static int gc0308_s_ctrl(struct v4l2_ctrl *ctrl) if (ret) dev_err(gc0308->dev, "failed to set control: %d\n", ret); - pm_runtime_mark_last_busy(gc0308->dev); pm_runtime_put_autosuspend(gc0308->dev); return ret; @@ -1157,14 +1156,12 @@ static int gc0308_start_stream(struct gc0308 *gc0308) return 0; disable_pm: - pm_runtime_mark_last_busy(gc0308->dev); pm_runtime_put_autosuspend(gc0308->dev); return ret; } static int gc0308_stop_stream(struct gc0308 *gc0308) { - pm_runtime_mark_last_busy(gc0308->dev); pm_runtime_put_autosuspend(gc0308->dev); return 0; } diff --git a/drivers/media/i2c/gc2145.c b/drivers/media/i2c/gc2145.c index ba02161d46e7..559a851669aa 100644 --- a/drivers/media/i2c/gc2145.c +++ b/drivers/media/i2c/gc2145.c @@ -963,7 +963,6 @@ static int gc2145_enable_streams(struct v4l2_subdev *sd, return 0; err_rpm_put: - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; } @@ -985,7 +984,6 @@ static int gc2145_disable_streams(struct v4l2_subdev *sd, if (ret) dev_err(&client->dev, "%s failed to write regs\n", __func__); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; @@ -1193,7 +1191,6 @@ static int gc2145_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c index 3b4f68543342..3faf48f34af4 100644 --- a/drivers/media/i2c/imx219.c +++ b/drivers/media/i2c/imx219.c @@ -771,7 +771,6 @@ static int imx219_enable_streams(struct v4l2_subdev *sd, return 0; err_rpm_put: - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; } @@ -793,7 +792,6 @@ static int imx219_disable_streams(struct v4l2_subdev *sd, __v4l2_ctrl_grab(imx219->vflip, false); __v4l2_ctrl_grab(imx219->hflip, false); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; diff --git a/drivers/media/i2c/imx283.c b/drivers/media/i2c/imx283.c index da618c8cbadc..67e8bb432d10 100644 --- a/drivers/media/i2c/imx283.c +++ b/drivers/media/i2c/imx283.c @@ -1143,7 +1143,6 @@ static int imx283_enable_streams(struct v4l2_subdev *sd, return 0; err_rpm_put: - pm_runtime_mark_last_busy(imx283->dev); pm_runtime_put_autosuspend(imx283->dev); return ret; @@ -1163,7 +1162,6 @@ static int imx283_disable_streams(struct v4l2_subdev *sd, if (ret) dev_err(imx283->dev, "Failed to stop stream\n"); - pm_runtime_mark_last_busy(imx283->dev); pm_runtime_put_autosuspend(imx283->dev); return ret; @@ -1558,7 +1556,6 @@ static int imx283_probe(struct i2c_client *client) * Decrease the PM usage count. The device will get suspended after the * autosuspend delay, turning the power off. */ - pm_runtime_mark_last_busy(imx283->dev); pm_runtime_put_autosuspend(imx283->dev); return 0; diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c index 4f3f386c5353..ec172556612e 100644 --- a/drivers/media/i2c/imx290.c +++ b/drivers/media/i2c/imx290.c @@ -869,7 +869,6 @@ static int imx290_set_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(imx290->dev); pm_runtime_put_autosuspend(imx290->dev); return ret; @@ -1099,7 +1098,6 @@ static int imx290_set_stream(struct v4l2_subdev *sd, int enable) } } else { imx290_stop_streaming(imx290); - pm_runtime_mark_last_busy(imx290->dev); pm_runtime_put_autosuspend(imx290->dev); } @@ -1294,7 +1292,6 @@ static int imx290_subdev_init(struct imx290 *imx290) * will already be prevented even before the delay. */ v4l2_i2c_subdev_init(&imx290->sd, client, &imx290_subdev_ops); - pm_runtime_mark_last_busy(imx290->dev); pm_runtime_put_autosuspend(imx290->dev); imx290->sd.internal_ops = &imx290_internal_ops; diff --git a/drivers/media/i2c/imx296.c b/drivers/media/i2c/imx296.c index f3bec16b527c..61116f4e3f76 100644 --- a/drivers/media/i2c/imx296.c +++ b/drivers/media/i2c/imx296.c @@ -604,7 +604,6 @@ static int imx296_s_stream(struct v4l2_subdev *sd, int enable) if (!enable) { ret = imx296_stream_off(sensor); - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); goto unlock; diff --git a/drivers/media/i2c/imx415.c b/drivers/media/i2c/imx415.c index 278e743646ea..276bf4d6f39d 100644 --- a/drivers/media/i2c/imx415.c +++ b/drivers/media/i2c/imx415.c @@ -952,7 +952,6 @@ static int imx415_s_stream(struct v4l2_subdev *sd, int enable) if (!enable) { ret = imx415_stream_off(sensor); - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); goto unlock; diff --git a/drivers/media/i2c/mt9m114.c b/drivers/media/i2c/mt9m114.c index 3f540ca40f3c..aa3fd6c6c76c 100644 --- a/drivers/media/i2c/mt9m114.c +++ b/drivers/media/i2c/mt9m114.c @@ -974,7 +974,6 @@ static int mt9m114_start_streaming(struct mt9m114 *sensor, return 0; error: - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -988,7 +987,6 @@ static int mt9m114_stop_streaming(struct mt9m114 *sensor) ret = mt9m114_set_state(sensor, MT9M114_SYS_STATE_ENTER_SUSPEND); - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -1046,7 +1044,6 @@ static int mt9m114_pa_g_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -1113,7 +1110,6 @@ static int mt9m114_pa_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -1565,7 +1561,6 @@ static int mt9m114_ifp_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->client->dev); pm_runtime_put_autosuspend(&sensor->client->dev); return ret; @@ -2472,7 +2467,6 @@ static int mt9m114_probe(struct i2c_client *client) * Decrease the PM usage count. The device will get suspended after the * autosuspend delay, turning the power off. */ - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/ov4689.c b/drivers/media/i2c/ov4689.c index 1c3a449f9354..7d740ad3926f 100644 --- a/drivers/media/i2c/ov4689.c +++ b/drivers/media/i2c/ov4689.c @@ -497,7 +497,6 @@ static int ov4689_s_stream(struct v4l2_subdev *sd, int on) } else { cci_write(ov4689->regmap, OV4689_REG_CTRL_MODE, OV4689_MODE_SW_STANDBY, NULL); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } @@ -702,7 +701,6 @@ static int ov4689_set_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -999,7 +997,6 @@ static int ov4689_probe(struct i2c_client *client) goto err_clean_subdev_pm; } - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index 0dae0438aa80..84198613381d 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -3341,7 +3341,6 @@ static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->i2c_client->dev); pm_runtime_put_autosuspend(&sensor->i2c_client->dev); return 0; @@ -3417,7 +3416,6 @@ static int ov5640_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(&sensor->i2c_client->dev); pm_runtime_put_autosuspend(&sensor->i2c_client->dev); return ret; @@ -3754,7 +3752,6 @@ static int ov5640_s_stream(struct v4l2_subdev *sd, int enable) mutex_unlock(&sensor->lock); if (!enable || ret) { - pm_runtime_mark_last_busy(&sensor->i2c_client->dev); pm_runtime_put_autosuspend(&sensor->i2c_client->dev); } @@ -3965,7 +3962,6 @@ static int ov5640_probe(struct i2c_client *client) pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/ov5645.c b/drivers/media/i2c/ov5645.c index 004d0ee5c3f5..58c846a44376 100644 --- a/drivers/media/i2c/ov5645.c +++ b/drivers/media/i2c/ov5645.c @@ -808,7 +808,6 @@ static int ov5645_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(ov5645->dev); pm_runtime_put_autosuspend(ov5645->dev); return ret; @@ -979,7 +978,6 @@ static int ov5645_disable_streams(struct v4l2_subdev *sd, OV5645_SYSTEM_CTRL0_STOP); rpm_put: - pm_runtime_mark_last_busy(ov5645->dev); pm_runtime_put_autosuspend(ov5645->dev); return ret; @@ -1196,7 +1194,6 @@ static int ov5645_probe(struct i2c_client *client) pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/ov64a40.c b/drivers/media/i2c/ov64a40.c index a5da4fe47e0b..2031cbd05c26 100644 --- a/drivers/media/i2c/ov64a40.c +++ b/drivers/media/i2c/ov64a40.c @@ -2990,7 +2990,6 @@ static int ov64a40_start_streaming(struct ov64a40 *ov64a40, return 0; error_power_off: - pm_runtime_mark_last_busy(ov64a40->dev); pm_runtime_put_autosuspend(ov64a40->dev); return ret; @@ -3000,7 +2999,6 @@ static int ov64a40_stop_streaming(struct ov64a40 *ov64a40, struct v4l2_subdev_state *state) { cci_update_bits(ov64a40->cci, OV64A40_REG_SMIA, BIT(0), 0, NULL); - pm_runtime_mark_last_busy(ov64a40->dev); pm_runtime_put_autosuspend(ov64a40->dev); __v4l2_ctrl_grab(ov64a40->link_freq, false); @@ -3329,10 +3327,8 @@ static int ov64a40_set_ctrl(struct v4l2_ctrl *ctrl) break; } - if (pm_status > 0) { - pm_runtime_mark_last_busy(ov64a40->dev); + if (pm_status > 0) pm_runtime_put_autosuspend(ov64a40->dev); - } return ret; } @@ -3622,7 +3618,6 @@ static int ov64a40_probe(struct i2c_client *client) goto error_subdev_cleanup; } - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return 0; diff --git a/drivers/media/i2c/ov8858.c b/drivers/media/i2c/ov8858.c index 95f9ae794846..6b7193eaea1f 100644 --- a/drivers/media/i2c/ov8858.c +++ b/drivers/media/i2c/ov8858.c @@ -1391,7 +1391,6 @@ static int ov8858_s_stream(struct v4l2_subdev *sd, int on) } } else { ov8858_stop_stream(ov8858); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); } @@ -1945,7 +1944,6 @@ static int ov8858_probe(struct i2c_client *client) goto err_power_off; } - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/media/i2c/st-mipid02.c b/drivers/media/i2c/st-mipid02.c index f4568e87f018..41ae25b0911f 100644 --- a/drivers/media/i2c/st-mipid02.c +++ b/drivers/media/i2c/st-mipid02.c @@ -465,7 +465,6 @@ static int mipid02_disable_streams(struct v4l2_subdev *sd, if (ret) goto error; - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); error: @@ -542,7 +541,6 @@ static int mipid02_enable_streams(struct v4l2_subdev *sd, cci_write(bridge->regmap, MIPID02_DATA_LANE0_REG1, 0, &ret); cci_write(bridge->regmap, MIPID02_DATA_LANE1_REG1, 0, &ret); - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return ret; } diff --git a/drivers/media/i2c/tc358746.c b/drivers/media/i2c/tc358746.c index 143aa1359aba..bcfc274cf891 100644 --- a/drivers/media/i2c/tc358746.c +++ b/drivers/media/i2c/tc358746.c @@ -816,7 +816,6 @@ static int tc358746_s_stream(struct v4l2_subdev *sd, int enable) return 0; err_out: - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_sync_autosuspend(sd->dev); return err; @@ -838,7 +837,6 @@ static int tc358746_s_stream(struct v4l2_subdev *sd, int enable) if (err) return err; - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_sync_autosuspend(sd->dev); return v4l2_subdev_call(src, video, s_stream, 0); @@ -1016,7 +1014,6 @@ tc358746_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) err = tc358746_read(tc358746, reg->reg, &val); reg->val = val; - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_sync_autosuspend(sd->dev); return err; @@ -1032,7 +1029,6 @@ tc358746_s_register(struct v4l2_subdev *sd, const struct v4l2_dbg_register *reg) tc358746_write(tc358746, (u32)reg->reg, (u32)reg->val); - pm_runtime_mark_last_busy(sd->dev); pm_runtime_put_sync_autosuspend(sd->dev); return 0; @@ -1395,7 +1391,6 @@ static int tc358746_init_hw(struct tc358746 *tc358746) } err = tc358746_read(tc358746, CHIPID_REG, &val); - pm_runtime_mark_last_busy(dev); pm_runtime_put_sync_autosuspend(dev); if (err) return -ENODEV; diff --git a/drivers/media/i2c/thp7312.c b/drivers/media/i2c/thp7312.c index 8852c56431fe..775cfba188d8 100644 --- a/drivers/media/i2c/thp7312.c +++ b/drivers/media/i2c/thp7312.c @@ -808,7 +808,6 @@ static int thp7312_s_stream(struct v4l2_subdev *sd, int enable) if (!enable) { thp7312_stream_enable(thp7312, false); - pm_runtime_mark_last_busy(thp7312->dev); pm_runtime_put_autosuspend(thp7312->dev); v4l2_subdev_unlock_state(sd_state); @@ -839,7 +838,6 @@ static int thp7312_s_stream(struct v4l2_subdev *sd, int enable) goto finish_unlock; finish_pm: - pm_runtime_mark_last_busy(thp7312->dev); pm_runtime_put_autosuspend(thp7312->dev); finish_unlock: v4l2_subdev_unlock_state(sd_state); @@ -1147,7 +1145,6 @@ static int thp7312_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(thp7312->dev); pm_runtime_put_autosuspend(thp7312->dev); return ret; @@ -2183,7 +2180,6 @@ static int thp7312_probe(struct i2c_client *client) * Decrease the PM usage count. The device will get suspended after the * autosuspend delay, turning the power off. */ - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); dev_info(dev, "THP7312 firmware version %02u.%02u\n", diff --git a/drivers/media/i2c/vd55g1.c b/drivers/media/i2c/vd55g1.c index c0754fd03b1d..7c39183dd44b 100644 --- a/drivers/media/i2c/vd55g1.c +++ b/drivers/media/i2c/vd55g1.c @@ -1104,7 +1104,6 @@ static int vd55g1_disable_streams(struct v4l2_subdev *sd, vd55g1_grab_ctrls(sensor, false); - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1338,7 +1337,6 @@ static int vd55g1_g_volatile_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1433,7 +1431,6 @@ static int vd55g1_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1895,7 +1892,6 @@ static int vd55g1_probe(struct i2c_client *client) pm_runtime_enable(dev); pm_runtime_set_autosuspend_delay(dev, 4000); pm_runtime_use_autosuspend(dev); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); ret = vd55g1_subdev_init(sensor); diff --git a/drivers/media/i2c/vd56g3.c b/drivers/media/i2c/vd56g3.c index 5d951ad0b478..d66e21ba4498 100644 --- a/drivers/media/i2c/vd56g3.c +++ b/drivers/media/i2c/vd56g3.c @@ -493,7 +493,6 @@ static int vd56g3_g_volatile_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -577,7 +576,6 @@ static int vd56g3_s_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1021,7 +1019,6 @@ static int vd56g3_disable_streams(struct v4l2_subdev *sd, __v4l2_ctrl_grab(sensor->vflip_ctrl, false); __v4l2_ctrl_grab(sensor->patgen_ctrl, false); - pm_runtime_mark_last_busy(sensor->dev); pm_runtime_put_autosuspend(sensor->dev); return ret; @@ -1527,7 +1524,6 @@ static int vd56g3_probe(struct i2c_client *client) } /* Sensor could now be powered off (after the autosuspend delay) */ - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); dev_dbg(dev, "Successfully probe %s sensor\n", diff --git a/drivers/media/i2c/video-i2c.c b/drivers/media/i2c/video-i2c.c index 0dd991d70d53..1eee2d4f5b40 100644 --- a/drivers/media/i2c/video-i2c.c +++ b/drivers/media/i2c/video-i2c.c @@ -288,7 +288,6 @@ static int amg88xx_read(struct device *dev, enum hwmon_sensor_types type, return tmp; tmp = regmap_bulk_read(data->regmap, AMG88XX_REG_TTHL, &buf, 2); - pm_runtime_mark_last_busy(regmap_get_device(data->regmap)); pm_runtime_put_autosuspend(regmap_get_device(data->regmap)); if (tmp) return tmp; @@ -527,7 +526,6 @@ static int start_streaming(struct vb2_queue *vq, unsigned int count) return 0; error_rpm_put: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); error_del_list: video_i2c_del_list(vq, VB2_BUF_STATE_QUEUED); @@ -544,7 +542,6 @@ static void stop_streaming(struct vb2_queue *vq) kthread_stop(data->kthread_vid_cap); data->kthread_vid_cap = NULL; - pm_runtime_mark_last_busy(regmap_get_device(data->regmap)); pm_runtime_put_autosuspend(regmap_get_device(data->regmap)); video_i2c_del_list(vq, VB2_BUF_STATE_ERROR); @@ -853,7 +850,6 @@ static int video_i2c_probe(struct i2c_client *client) if (ret < 0) goto error_pm_disable; - pm_runtime_mark_last_busy(&client->dev); pm_runtime_put_autosuspend(&client->dev); return 0; diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c index fd71f0c43ac3..a9ce032cc5a2 100644 --- a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c +++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c @@ -451,7 +451,6 @@ static void wave5_vpu_dec_finish_decode(struct vpu_instance *inst) if (q_status.report_queue_count == 0 && (q_status.instance_queue_count == 0 || dec_info.sequence_changed)) { dev_dbg(inst->dev->dev, "%s: finishing job.\n", __func__); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); } @@ -1364,7 +1363,6 @@ static int wave5_vpu_dec_start_streaming(struct vb2_queue *q, unsigned int count } } - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); return ret; @@ -1498,7 +1496,6 @@ static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q) else streamoff_capture(q); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); } @@ -1662,7 +1659,6 @@ static void wave5_vpu_dec_device_run(void *priv) finish_job_and_return: dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); } diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c index 1e5fc5f8b856..35913a7de834 100644 --- a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c +++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c @@ -1391,12 +1391,10 @@ static int wave5_vpu_enc_start_streaming(struct vb2_queue *q, unsigned int count if (ret) goto return_buffers; - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); return 0; return_buffers: wave5_return_bufs(q, VB2_BUF_STATE_QUEUED); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); return ret; } @@ -1465,7 +1463,6 @@ static void wave5_vpu_enc_stop_streaming(struct vb2_queue *q) else streamoff_capture(inst, q); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); } @@ -1520,7 +1517,6 @@ static void wave5_vpu_enc_device_run(void *priv) break; } dev_dbg(inst->dev->dev, "%s: leave with active job", __func__); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); return; default: @@ -1529,7 +1525,6 @@ static void wave5_vpu_enc_device_run(void *priv) break; } dev_dbg(inst->dev->dev, "%s: leave and finish job", __func__); - pm_runtime_mark_last_busy(inst->dev->dev); pm_runtime_put_autosuspend(inst->dev->dev); v4l2_m2m_job_finish(inst->v4l2_m2m_dev, m2m_ctx); } diff --git a/drivers/media/platform/nvidia/tegra-vde/h264.c b/drivers/media/platform/nvidia/tegra-vde/h264.c index 0e56a4331b0d..45f8f6904867 100644 --- a/drivers/media/platform/nvidia/tegra-vde/h264.c +++ b/drivers/media/platform/nvidia/tegra-vde/h264.c @@ -585,7 +585,6 @@ static int tegra_vde_decode_begin(struct tegra_vde *vde, return 0; put_runtime_pm: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); unlock: @@ -612,7 +611,6 @@ static void tegra_vde_decode_abort(struct tegra_vde *vde) if (err) dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); mutex_unlock(&vde->lock); diff --git a/drivers/media/platform/qcom/iris/iris_hfi_queue.c b/drivers/media/platform/qcom/iris/iris_hfi_queue.c index 221dcd09e1e1..b3ed06297953 100644 --- a/drivers/media/platform/qcom/iris/iris_hfi_queue.c +++ b/drivers/media/platform/qcom/iris/iris_hfi_queue.c @@ -142,7 +142,6 @@ int iris_hfi_queue_cmd_write(struct iris_core *core, void *pkt, u32 pkt_size) } mutex_unlock(&core->lock); - pm_runtime_mark_last_busy(core->dev); pm_runtime_put_autosuspend(core->dev); return 0; diff --git a/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c b/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c index b30891718d8d..d60d92d2ffa1 100644 --- a/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c +++ b/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c @@ -950,7 +950,6 @@ static void pispbe_node_stop_streaming(struct vb2_queue *q) kfree(job); } - pm_runtime_mark_last_busy(pispbe->dev); pm_runtime_put_autosuspend(pispbe->dev); dev_dbg(pispbe->dev, "Nodes streaming now 0x%x\n", @@ -1742,7 +1741,6 @@ static int pispbe_probe(struct platform_device *pdev) if (ret) goto disable_devs_err; - pm_runtime_mark_last_busy(pispbe->dev); pm_runtime_put_autosuspend(pispbe->dev); return 0; diff --git a/drivers/media/platform/verisilicon/hantro_drv.c b/drivers/media/platform/verisilicon/hantro_drv.c index 8542238e0fb1..fa972effd4a2 100644 --- a/drivers/media/platform/verisilicon/hantro_drv.c +++ b/drivers/media/platform/verisilicon/hantro_drv.c @@ -89,7 +89,6 @@ static void hantro_job_finish(struct hantro_dev *vpu, struct hantro_ctx *ctx, enum vb2_buffer_state result) { - pm_runtime_mark_last_busy(vpu->dev); pm_runtime_put_autosuspend(vpu->dev); clk_bulk_disable(vpu->variant->num_clocks, vpu->clocks); diff --git a/drivers/media/rc/gpio-ir-recv.c b/drivers/media/rc/gpio-ir-recv.c index bf6d8fa983bf..a6418ef782bc 100644 --- a/drivers/media/rc/gpio-ir-recv.c +++ b/drivers/media/rc/gpio-ir-recv.c @@ -48,10 +48,8 @@ static irqreturn_t gpio_ir_recv_irq(int irq, void *dev_id) if (val >= 0) ir_raw_event_store_edge(gpio_dev->rcdev, val == 1); - if (pmdev) { - pm_runtime_mark_last_busy(pmdev); + if (pmdev) pm_runtime_put_autosuspend(pmdev); - } return IRQ_HANDLED; } From bd7c2312128e31d056d30d34d60503de056e15f0 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 14 Aug 2025 15:07:13 +0200 Subject: [PATCH 0451/1307] pinctrl: meson: Fix typo in device table macro The typo when using the MODULE_DEVICE_TABLE macro was not noticeable because the macro was defined only if the module was built as a separate module. Cc: Xianwei Zhao Cc: Linus Walleij Cc: Neil Armstrong Cc: Kevin Hilman Cc: linux-amlogic@lists.infradead.org Cc: linux-gpio@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507220009.8HKbNP16-lkp@intel.com/ Signed-off-by: Alexey Gladkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/e548b7761302defec15aa2098172eabb1ce1ad4a.1755170493.git.legion@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index e34e984c2b38..6132710aff68 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -1093,7 +1093,7 @@ static const struct of_device_id aml_pctl_of_match[] = { { .compatible = "amlogic,pinctrl-s6", .data = &s6_priv_data, }, { /* sentinel */ } }; -MODULE_DEVICE_TABLE(of, aml_pctl_dt_match); +MODULE_DEVICE_TABLE(of, aml_pctl_of_match); static struct platform_driver aml_pctl_driver = { .driver = { From c0ed3c2edc7692c6b8af7578b41012694dc8c671 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Sat, 16 Aug 2025 12:27:41 +0800 Subject: [PATCH 0452/1307] ALSA: hda/tas2781: Add name prefix tas2781 for tas2781's dvc_tlv and amp_vol_tlv With some new devices adding into the driver, dvc_tlv and amp_vol_tlv will cause confusion for customers on which devices they support. Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250816042741.1659-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2781-tlv.h | 6 +++--- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 2 +- sound/hda/codecs/side-codecs/tas2781_hda_spi.c | 6 ++++-- sound/soc/codecs/tas2781-i2c.c | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index ef9b9f19d212..273224df9282 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -2,7 +2,7 @@ // // ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier // -// Copyright (C) 2022 - 2024 Texas Instruments Incorporated +// Copyright (C) 2022 - 2025 Texas Instruments Incorporated // https://www.ti.com // // The TAS2781 driver implements a flexible and configurable @@ -15,7 +15,7 @@ #ifndef __TAS2781_TLV_H__ #define __TAS2781_TLV_H__ -static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 50, 0); -static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2781_dvc_tlv, -10000, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2781_amp_tlv, 1100, 50, 0); #endif diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index 06c7bc2b9e9d..b91fff3fde97 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -267,7 +267,7 @@ static const struct snd_kcontrol_new tas2770_snd_controls[] = { static const struct snd_kcontrol_new tas2781_snd_controls[] = { ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Volume", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, - tas2781_amp_putvol, amp_vol_tlv), + tas2781_amp_putvol, tas2781_amp_tlv), ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0, tas2781_force_fwload_get, tas2781_force_fwload_put), }; diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_spi.c b/sound/hda/codecs/side-codecs/tas2781_hda_spi.c index 09a5d0f131b2..b9a55672bf15 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_spi.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_spi.c @@ -494,9 +494,11 @@ static int tas2781_force_fwload_put(struct snd_kcontrol *kcontrol, static struct snd_kcontrol_new tas2781_snd_ctls[] = { ACARD_SINGLE_RANGE_EXT_TLV(NULL, TAS2781_AMP_LEVEL, 1, 0, 20, 0, - tas2781_amp_getvol, tas2781_amp_putvol, amp_vol_tlv), + tas2781_amp_getvol, tas2781_amp_putvol, + tas2781_amp_tlv), ACARD_SINGLE_RANGE_EXT_TLV(NULL, TAS2781_DVC_LVL, 0, 0, 200, 1, - tas2781_digital_getvol, tas2781_digital_putvol, dvc_tlv), + tas2781_digital_getvol, tas2781_digital_putvol, + tas2781_dvc_tlv), ACARD_SINGLE_BOOL_EXT(NULL, 0, tas2781_force_fwload_get, tas2781_force_fwload_put), }; diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 676130f4cf3e..0e09d794516f 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -910,10 +910,10 @@ static const struct snd_kcontrol_new tasdevice_cali_controls[] = { static const struct snd_kcontrol_new tas2781_snd_controls[] = { SOC_SINGLE_RANGE_EXT_TLV("Speaker Analog Volume", TAS2781_AMP_LEVEL, 1, 0, 20, 0, tas2781_amp_getvol, - tas2781_amp_putvol, amp_vol_tlv), + tas2781_amp_putvol, tas2781_amp_tlv), SOC_SINGLE_RANGE_EXT_TLV("Speaker Digital Volume", TAS2781_DVC_LVL, 0, 0, 200, 1, tas2781_digital_getvol, - tas2781_digital_putvol, dvc_tlv), + tas2781_digital_putvol, tas2781_dvc_tlv), }; static const struct snd_kcontrol_new tas2781_cali_controls[] = { From dec8b38be4b35cae5f7fa086daf2631e2cfa09c1 Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Thu, 31 Jul 2025 14:57:50 +0800 Subject: [PATCH 0453/1307] mmc: sdhci-pci-gli: Add a new function to simplify the code In preparation to fix replay timer timeout, add sdhci_gli_mask_replay_timer_timeout() function to simplify some of the code, allowing it to be re-used. Signed-off-by: Victor Shih Fixes: 1ae1d2d6e555 ("mmc: sdhci-pci-gli: Add Genesys Logic GL9763E support") Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250731065752.450231-2-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 4c2ae71770f7..f678c91f8d3e 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -287,6 +287,20 @@ #define GLI_MAX_TUNING_LOOP 40 /* Genesys Logic chipset */ +static void sdhci_gli_mask_replay_timer_timeout(struct pci_dev *pdev) +{ + int aer; + u32 value; + + /* mask the replay timer timeout of AER */ + aer = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + if (aer) { + pci_read_config_dword(pdev, aer + PCI_ERR_COR_MASK, &value); + value |= PCI_ERR_COR_REP_TIMER; + pci_write_config_dword(pdev, aer + PCI_ERR_COR_MASK, value); + } +} + static inline void gl9750_wt_on(struct sdhci_host *host) { u32 wt_value; @@ -607,7 +621,6 @@ static void gl9750_hw_setting(struct sdhci_host *host) { struct sdhci_pci_slot *slot = sdhci_priv(host); struct pci_dev *pdev; - int aer; u32 value; pdev = slot->chip->pdev; @@ -626,12 +639,7 @@ static void gl9750_hw_setting(struct sdhci_host *host) pci_set_power_state(pdev, PCI_D0); /* mask the replay timer timeout of AER */ - aer = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - if (aer) { - pci_read_config_dword(pdev, aer + PCI_ERR_COR_MASK, &value); - value |= PCI_ERR_COR_REP_TIMER; - pci_write_config_dword(pdev, aer + PCI_ERR_COR_MASK, value); - } + sdhci_gli_mask_replay_timer_timeout(pdev); gl9750_wt_off(host); } @@ -806,7 +814,6 @@ static void sdhci_gl9755_set_clock(struct sdhci_host *host, unsigned int clock) static void gl9755_hw_setting(struct sdhci_pci_slot *slot) { struct pci_dev *pdev = slot->chip->pdev; - int aer; u32 value; gl9755_wt_on(pdev); @@ -841,12 +848,7 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot) pci_set_power_state(pdev, PCI_D0); /* mask the replay timer timeout of AER */ - aer = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - if (aer) { - pci_read_config_dword(pdev, aer + PCI_ERR_COR_MASK, &value); - value |= PCI_ERR_COR_REP_TIMER; - pci_write_config_dword(pdev, aer + PCI_ERR_COR_MASK, value); - } + sdhci_gli_mask_replay_timer_timeout(pdev); gl9755_wt_off(pdev); } From 293ed0f5f34e1e9df888456af4b0a021f57b5f54 Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Thu, 31 Jul 2025 14:57:51 +0800 Subject: [PATCH 0454/1307] mmc: sdhci-pci-gli: GL9763e: Rename the gli_set_gl9763e() for consistency In preparation to fix replay timer timeout, rename the gli_set_gl9763e() to gl9763e_hw_setting() for consistency. Signed-off-by: Victor Shih Fixes: 1ae1d2d6e555 ("mmc: sdhci-pci-gli: Add Genesys Logic GL9763E support") Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250731065752.450231-3-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index f678c91f8d3e..436f0460222f 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1753,7 +1753,7 @@ static int gl9763e_add_host(struct sdhci_pci_slot *slot) return ret; } -static void gli_set_gl9763e(struct sdhci_pci_slot *slot) +static void gl9763e_hw_setting(struct sdhci_pci_slot *slot) { struct pci_dev *pdev = slot->chip->pdev; u32 value; @@ -1925,7 +1925,7 @@ static int gli_probe_slot_gl9763e(struct sdhci_pci_slot *slot) gli_pcie_enable_msi(slot); host->mmc_host_ops.hs400_enhanced_strobe = gl9763e_hs400_enhanced_strobe; - gli_set_gl9763e(slot); + gl9763e_hw_setting(slot); sdhci_enable_v4_mode(host); return 0; From 340be332e420ed37d15d4169a1b4174e912ad6cb Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Thu, 31 Jul 2025 14:57:52 +0800 Subject: [PATCH 0455/1307] mmc: sdhci-pci-gli: GL9763e: Mask the replay timer timeout of AER Due to a flaw in the hardware design, the GL9763e replay timer frequently times out when ASPM is enabled. As a result, the warning messages will often appear in the system log when the system accesses the GL9763e PCI config. Therefore, the replay timer timeout must be masked. Signed-off-by: Victor Shih Fixes: 1ae1d2d6e555 ("mmc: sdhci-pci-gli: Add Genesys Logic GL9763E support") Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250731065752.450231-4-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 436f0460222f..3a1de477e9af 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1782,6 +1782,9 @@ static void gl9763e_hw_setting(struct sdhci_pci_slot *slot) value |= FIELD_PREP(GLI_9763E_HS400_RXDLY, GLI_9763E_HS400_RXDLY_5); pci_write_config_dword(pdev, PCIE_GLI_9763E_CLKRXDLY, value); + /* mask the replay timer timeout of AER */ + sdhci_gli_mask_replay_timer_timeout(pdev); + pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value); value &= ~GLI_9763E_VHS_REV; value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R); From e251709aaddb3ee1e8ac1ed5e361a608a1cc92de Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Wed, 30 Jul 2025 11:35:43 +0530 Subject: [PATCH 0456/1307] mmc: sdhci-of-arasan: Ensure CD logic stabilization before power-up During SD suspend/resume without a full card rescan (when using non-removable SD cards for rootfs), the SD card initialization may fail after resume. This occurs because, after a host controller reset, the card detect logic may take time to stabilize due to debounce logic. Without waiting for stabilization, the host may attempt powering up the card prematurely, leading to command timeouts during resume flow. Add sdhci_arasan_set_power_and_bus_voltage() to wait for the card detect stable bit before power up the card. Since the stabilization time is not fixed, a maximum timeout of one second is used to ensure sufficient wait time for the card detect signal to stabilize. Signed-off-by: Sai Krishna Potthuri Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250730060543.1735971-1-sai.krishna.potthuri@amd.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 33 ++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 42878474e56e..60dbc815e501 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -99,6 +99,9 @@ #define HIWORD_UPDATE(val, mask, shift) \ ((val) << (shift) | (mask) << ((shift) + 16)) +#define CD_STABLE_TIMEOUT_US 1000000 +#define CD_STABLE_MAX_SLEEP_US 10 + /** * struct sdhci_arasan_soc_ctl_field - Field used in sdhci_arasan_soc_ctl_map * @@ -206,12 +209,15 @@ struct sdhci_arasan_data { * 19MHz instead */ #define SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN BIT(2) +/* Enable CD stable check before power-up */ +#define SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE BIT(3) }; struct sdhci_arasan_of_data { const struct sdhci_arasan_soc_ctl_map *soc_ctl_map; const struct sdhci_pltfm_data *pdata; const struct sdhci_arasan_clk_ops *clk_ops; + u32 quirks; }; static const struct sdhci_arasan_soc_ctl_map rk3399_soc_ctl_map = { @@ -514,6 +520,24 @@ static int sdhci_arasan_voltage_switch(struct mmc_host *mmc, return -EINVAL; } +static void sdhci_arasan_set_power_and_bus_voltage(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host); + u32 reg; + + /* + * Ensure that the card detect logic has stabilized before powering up, this is + * necessary after a host controller reset. + */ + if (mode == MMC_POWER_UP && sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE) + read_poll_timeout(sdhci_readl, reg, reg & SDHCI_CD_STABLE, CD_STABLE_MAX_SLEEP_US, + CD_STABLE_TIMEOUT_US, false, host, SDHCI_PRESENT_STATE); + + sdhci_set_power_and_bus_voltage(host, mode, vdd); +} + static const struct sdhci_ops sdhci_arasan_ops = { .set_clock = sdhci_arasan_set_clock, .get_max_clock = sdhci_pltfm_clk_get_max_clock, @@ -521,7 +545,7 @@ static const struct sdhci_ops sdhci_arasan_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_arasan_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, - .set_power = sdhci_set_power_and_bus_voltage, + .set_power = sdhci_arasan_set_power_and_bus_voltage, .hw_reset = sdhci_arasan_hw_reset, }; @@ -570,7 +594,7 @@ static const struct sdhci_ops sdhci_arasan_cqe_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_arasan_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, - .set_power = sdhci_set_power_and_bus_voltage, + .set_power = sdhci_arasan_set_power_and_bus_voltage, .irq = sdhci_arasan_cqhci_irq, }; @@ -1447,6 +1471,7 @@ static const struct sdhci_arasan_clk_ops zynqmp_clk_ops = { static struct sdhci_arasan_of_data sdhci_arasan_zynqmp_data = { .pdata = &sdhci_arasan_zynqmp_pdata, .clk_ops = &zynqmp_clk_ops, + .quirks = SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE, }; static const struct sdhci_arasan_clk_ops versal_clk_ops = { @@ -1457,6 +1482,7 @@ static const struct sdhci_arasan_clk_ops versal_clk_ops = { static struct sdhci_arasan_of_data sdhci_arasan_versal_data = { .pdata = &sdhci_arasan_zynqmp_pdata, .clk_ops = &versal_clk_ops, + .quirks = SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE, }; static const struct sdhci_arasan_clk_ops versal_net_clk_ops = { @@ -1467,6 +1493,7 @@ static const struct sdhci_arasan_clk_ops versal_net_clk_ops = { static struct sdhci_arasan_of_data sdhci_arasan_versal_net_data = { .pdata = &sdhci_arasan_versal_net_pdata, .clk_ops = &versal_net_clk_ops, + .quirks = SDHCI_ARASAN_QUIRK_ENSURE_CD_STABLE, }; static struct sdhci_arasan_of_data intel_keembay_emmc_data = { @@ -1937,6 +1964,8 @@ static int sdhci_arasan_probe(struct platform_device *pdev) if (of_device_is_compatible(np, "rockchip,rk3399-sdhci-5.1")) sdhci_arasan_update_clockmultiplier(host, 0x0); + sdhci_arasan->quirks |= data->quirks; + if (of_device_is_compatible(np, "intel,keembay-sdhci-5.1-emmc") || of_device_is_compatible(np, "intel,keembay-sdhci-5.1-sd") || of_device_is_compatible(np, "intel,keembay-sdhci-5.1-sdio")) { From 99d7ab8db9d8230b243f5ed20ba0229e54cc0dfa Mon Sep 17 00:00:00 2001 From: Jiayi Li Date: Mon, 4 Aug 2025 09:36:04 +0800 Subject: [PATCH 0457/1307] memstick: Fix deadlock by moving removing flag earlier The existing memstick core patch: commit 62c59a8786e6 ("memstick: Skip allocating card when removing host") sets host->removing in memstick_remove_host(),but still exists a critical time window where memstick_check can run after host->eject is set but before removing is set. In the rtsx_usb_ms driver, the problematic sequence is: rtsx_usb_ms_drv_remove: memstick_check: host->eject = true cancel_work_sync(handle_req) if(!host->removing) ... memstick_alloc_card() memstick_set_rw_addr() memstick_new_req() rtsx_usb_ms_request() if(!host->eject) skip schedule_work wait_for_completion() memstick_remove_host: [blocks indefinitely] host->removing = true flush_workqueue() [block] 1. rtsx_usb_ms_drv_remove sets host->eject = true 2. cancel_work_sync(&host->handle_req) runs 3. memstick_check work may be executed here <-- danger window 4. memstick_remove_host sets removing = 1 During this window (step 3), memstick_check calls memstick_alloc_card, which may indefinitely waiting for mrq_complete completion that will never occur because rtsx_usb_ms_request sees eject=true and skips scheduling work, memstick_set_rw_addr waits forever for completion. This causes a deadlock when memstick_remove_host tries to flush_workqueue, waiting for memstick_check to complete, while memstick_check is blocked waiting for mrq_complete completion. Fix this by setting removing=true at the start of rtsx_usb_ms_drv_remove, before any work cancellation. This ensures memstick_check will see the removing flag immediately and exit early, avoiding the deadlock. Fixes: 62c59a8786e6 ("memstick: Skip allocating card when removing host") Signed-off-by: Jiayi Li Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250804013604.1311218-1-lijiayi@kylinos.cn Signed-off-by: Ulf Hansson --- drivers/memstick/core/memstick.c | 1 - drivers/memstick/host/rtsx_usb_ms.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 7f3f47db4c98..e4275f8ee5db 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -555,7 +555,6 @@ EXPORT_SYMBOL(memstick_add_host); */ void memstick_remove_host(struct memstick_host *host) { - host->removing = 1; flush_workqueue(workqueue); mutex_lock(&host->lock); if (host->card) diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index 3878136227e4..5b5e9354fb2e 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -812,6 +812,7 @@ static void rtsx_usb_ms_drv_remove(struct platform_device *pdev) int err; host->eject = true; + msh->removing = true; cancel_work_sync(&host->handle_req); cancel_delayed_work_sync(&host->poll_card); From d8df126349dad855cdfedd6bbf315bad2e901c2f Mon Sep 17 00:00:00 2001 From: Tianxiang Peng Date: Mon, 23 Jun 2025 17:31:53 +0800 Subject: [PATCH 0458/1307] x86/cpu/hygon: Add missing resctrl_cpu_detect() in bsp_init helper Since 923f3a2b48bd ("x86/resctrl: Query LLC monitoring properties once during boot") resctrl_cpu_detect() has been moved from common CPU initialization code to the vendor-specific BSP init helper, while Hygon didn't put that call in their code. This triggers a division by zero fault during early booting stage on our machines with X86_FEATURE_CQM* supported, where get_rdt_mon_resources() tries to calculate mon_l3_config with uninitialized boot_cpu_data.x86_cache_occ_scale. Add the missing resctrl_cpu_detect() in the Hygon BSP init helper. [ bp: Massage commit message. ] Fixes: 923f3a2b48bd ("x86/resctrl: Query LLC monitoring properties once during boot") Signed-off-by: Tianxiang Peng Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Hui Li Cc: Link: https://lore.kernel.org/20250623093153.3016937-1-txpeng@tencent.com --- arch/x86/kernel/cpu/hygon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 2154f12766fb..1fda6c3a2b65 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpu.h" @@ -117,6 +118,8 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) x86_amd_ls_cfg_ssbd_mask = 1ULL << 10; } } + + resctrl_cpu_detect(c); } static void early_init_hygon(struct cpuinfo_x86 *c) From 89f0addeee3cb2dc49837599330ed9c4612f05b0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 18 Aug 2025 12:59:45 +0300 Subject: [PATCH 0459/1307] ALSA: usb-audio: Fix size validation in convert_chmap_v3() The "p" pointer is void so sizeof(*p) is 1. The intent was to check sizeof(*cs_desc), which is 3, instead. Fixes: ecfd41166b72 ("ALSA: usb-audio: Validate UAC3 cluster segment descriptors") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aKL5kftC1qGt6lpv@stanley.mountain Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index acf3dc2d79e0..5c235a5ba7e1 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -349,7 +349,7 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor u16 cs_len; u8 cs_type; - if (len < sizeof(*p)) + if (len < sizeof(*cs_desc)) break; cs_len = le16_to_cpu(cs_desc->wLength); if (len < cs_len) From 5f1c8965e748c150d580a2ea8fbee1bd80d07a24 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 4 Aug 2025 22:11:28 +1000 Subject: [PATCH 0460/1307] ovl: use I_MUTEX_PARENT when locking parent in ovl_create_temp() ovl_create_temp() treats "workdir" as a parent in which it creates an object so it should use I_MUTEX_PARENT. Prior to the commit identified below the lock was taken by the caller which sometimes used I_MUTEX_PARENT and sometimes used I_MUTEX_NORMAL. The use of I_MUTEX_NORMAL was incorrect but unfortunately copied into ovl_create_temp(). Note to backporters: This patch only applies after the last Fixes given below (post v6.16). To fix the bug in v6.7 and later the inode_lock() call in ovl_copy_up_workdir() needs to nest using I_MUTEX_PARENT. Link: https://lore.kernel.org/all/67a72070.050a0220.3d72c.0022.GAE@google.com/ Cc: stable@vger.kernel.org Reported-by: syzbot+7836a68852a10ec3d790@syzkaller.appspotmail.com Tested-by: syzbot+7836a68852a10ec3d790@syzkaller.appspotmail.com Fixes: c63e56a4a652 ("ovl: do not open/llseek lower file with upper sb_writers held") Fixes: d2c995581c7c ("ovl: Call ovl_create_temp() without lock held.") Signed-off-by: NeilBrown Signed-off-by: Amir Goldstein --- fs/overlayfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 70b8687dc45e..dbd63a74df4b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -225,7 +225,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr) { struct dentry *ret; - inode_lock(workdir->d_inode); + inode_lock_nested(workdir->d_inode, I_MUTEX_PARENT); ret = ovl_create_real(ofs, workdir, ovl_lookup_temp(ofs, workdir), attr); inode_unlock(workdir->d_inode); From e8bd877fb76bb9f35253e8f41ce0c772269934dd Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 18 Aug 2025 11:23:55 +0200 Subject: [PATCH 0461/1307] ovl: fix possible double unlink commit 9d23967b18c6 ("ovl: simplify an error path in ovl_copy_up_workdir()") introduced the helper ovl_cleanup_unlocked(), which is later used in several following patches to re-acquire the parent inode lock and unlink a dentry that was earlier found using lookup. This helper was eventually renamed to ovl_cleanup(). The helper ovl_parent_lock() is used to re-acquire the parent inode lock. After acquiring the parent inode lock, the helper verifies that the dentry has not since been moved to another parent, but it failed to verify that the dentry wasn't unlinked from the parent. This means that now every call to ovl_cleanup() could potentially race with another thread, unlinking the dentry to be cleaned up underneath overlayfs and trigger a vfs assertion. Reported-by: syzbot+ec9fab8b7f0386b98a17@syzkaller.appspotmail.com Tested-by: syzbot+ec9fab8b7f0386b98a17@syzkaller.appspotmail.com Fixes: 9d23967b18c6 ("ovl: simplify an error path in ovl_copy_up_workdir()") Suggested-by: NeilBrown Signed-off-by: Amir Goldstein --- fs/overlayfs/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index a33115e7384c..41033bac96cb 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1552,7 +1552,8 @@ void ovl_copyattr(struct inode *inode) int ovl_parent_lock(struct dentry *parent, struct dentry *child) { inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); - if (!child || child->d_parent == parent) + if (!child || + (!d_unhashed(child) && child->d_parent == parent)) return 0; inode_unlock(parent->d_inode); From 0227af355b50c526bf83ca52d67aef5d102e9b07 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sun, 17 Aug 2025 15:06:05 +0530 Subject: [PATCH 0462/1307] selftests: ublk: Use ARRAY_SIZE() macro to improve code Use ARRAY_SIZE() macro while calculating size of an array to improve code readability and reduce potential sizing errors. Implement this suggestion given by spatch tool by running coccinelle script - scripts/coccinelle/misc/array_size.cocci Follow ARRAY_SIZE() macro usage pattern in ublk.c introduced by, commit ec120093180b9 ("selftests: ublk: fix ublk_find_tgt()") wherever appropriate to maintain consistency. Signed-off-by: Akhilesh Patil Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/aKGihYui6/Pcijbk@bhairav-test.ee.iitb.ac.in Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 95188065b2e9..6512dfbdbce3 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1400,7 +1400,7 @@ static int cmd_dev_get_features(void) if (!((1ULL << i) & features)) continue; - if (i < sizeof(feat_map) / sizeof(feat_map[0])) + if (i < ARRAY_SIZE(feat_map)) feat = feat_map[i]; else feat = "unknown"; @@ -1477,7 +1477,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); printf("\tdefault: nthreads=nr_queues"); - for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) { const struct ublk_tgt_ops *ops = tgt_ops_list[i]; if (ops->usage) From 447be50598c05499f7ccc2b1f6ddb3da30f8099a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 15 Aug 2025 12:52:09 +0800 Subject: [PATCH 0463/1307] regulator: pca9450: Use devm_register_sys_off_handler With module test, there is error dump: ------------[ cut here ]------------ notifier callback pca9450_i2c_restart_handler already registered WARNING: kernel/notifier.c:23 at notifier_chain_register+0x5c/0x88, CPU#0: kworker/u16:3/50 Call trace: notifier_chain_register+0x5c/0x88 (P) atomic_notifier_chain_register+0x30/0x58 register_restart_handler+0x1c/0x28 pca9450_i2c_probe+0x418/0x538 i2c_device_probe+0x220/0x3d0 really_probe+0x114/0x410 __driver_probe_device+0xa0/0x150 driver_probe_device+0x40/0x114 __device_attach_driver+0xd4/0x12c So use devm_register_sys_off_handler to let kernel handle the resource free to avoid kernel dump. Fixes: 6157e62b07d9 ("regulator: pca9450: Add restart handler") Signed-off-by: Peng Fan Link: https://patch.msgid.link/20250815-pca9450-v1-1-7748e362dc97@nxp.com Signed-off-by: Mark Brown --- drivers/regulator/pca9450-regulator.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c index feadb21a8f30..4be270f4d6c3 100644 --- a/drivers/regulator/pca9450-regulator.c +++ b/drivers/regulator/pca9450-regulator.c @@ -40,7 +40,6 @@ struct pca9450 { struct device *dev; struct regmap *regmap; struct gpio_desc *sd_vsel_gpio; - struct notifier_block restart_nb; enum pca9450_chip_type type; unsigned int rcnt; int irq; @@ -1100,10 +1099,9 @@ static irqreturn_t pca9450_irq_handler(int irq, void *data) return IRQ_HANDLED; } -static int pca9450_i2c_restart_handler(struct notifier_block *nb, - unsigned long action, void *data) +static int pca9450_i2c_restart_handler(struct sys_off_data *data) { - struct pca9450 *pca9450 = container_of(nb, struct pca9450, restart_nb); + struct pca9450 *pca9450 = data->cb_data; struct i2c_client *i2c = container_of(pca9450->dev, struct i2c_client, dev); dev_dbg(&i2c->dev, "Restarting device..\n"); @@ -1261,10 +1259,9 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) pca9450->sd_vsel_fixed_low = of_property_read_bool(ldo5->dev.of_node, "nxp,sd-vsel-fixed-low"); - pca9450->restart_nb.notifier_call = pca9450_i2c_restart_handler; - pca9450->restart_nb.priority = PCA9450_RESTART_HANDLER_PRIORITY; - - if (register_restart_handler(&pca9450->restart_nb)) + if (devm_register_sys_off_handler(&i2c->dev, SYS_OFF_MODE_RESTART, + PCA9450_RESTART_HANDLER_PRIORITY, + pca9450_i2c_restart_handler, pca9450)) dev_warn(&i2c->dev, "Failed to register restart handler\n"); dev_info(&i2c->dev, "%s probed.\n", From 8ea815399c3fcce1889bd951fec25b5b9a3979c1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 14 Apr 2025 16:41:07 +0200 Subject: [PATCH 0464/1307] compiler: remove __ADDRESSABLE_ASM{_STR,}() again __ADDRESSABLE_ASM_STR() is where the necessary stringification happens. As long as "sym" doesn't contain any odd characters, no quoting is required for its use with .quad / .long. In fact the quotation gets in the way with gas 2.25; it's only from 2.26 onwards that quoted symbols are half-way properly supported. However, assembly being different from C anyway, drop __ADDRESSABLE_ASM_STR() and its helper macro altogether. A simple .global directive will suffice to get the symbol "declared", i.e. into the symbol table. While there also stop open-coding STATIC_CALL_TRAMP() and STATIC_CALL_KEY(). Fixes: 0ef8047b737d ("x86/static-call: provide a way to do very early static-call updates") Signed-off-by: Jan Beulich Acked-by: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Message-ID: <609d2c74-de13-4fae-ab1a-1ec44afb948d@suse.com> --- arch/x86/include/asm/xen/hypercall.h | 5 +++-- include/linux/compiler.h | 8 -------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 59a62c3780a2..a16d4631547c 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -94,12 +94,13 @@ DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func); #ifdef MODULE #define __ADDRESSABLE_xen_hypercall #else -#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall) +#define __ADDRESSABLE_xen_hypercall \ + __stringify(.global STATIC_CALL_KEY(xen_hypercall);) #endif #define __HYPERCALL \ __ADDRESSABLE_xen_hypercall \ - "call __SCT__xen_hypercall" + __stringify(call STATIC_CALL_TRAMP(xen_hypercall)) #define __HYPERCALL_ENTRY(x) "a" (x) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6f04a1d8c720..64ff73c533e5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -288,14 +288,6 @@ static inline void *offset_to_ptr(const int *off) #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) -#define __ADDRESSABLE_ASM(sym) \ - .pushsection .discard.addressable,"aw"; \ - .align ARCH_SEL(8,4); \ - ARCH_SEL(.quad, .long) __stringify(sym); \ - .popsection; - -#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym)) - /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. From 75dbd4304afe574fcfc4118a5b78776a9f48fdc4 Mon Sep 17 00:00:00 2001 From: Sungbae Yoo Date: Wed, 6 Aug 2025 12:47:35 +0000 Subject: [PATCH 0465/1307] tee: optee: ffa: fix a typo of "optee_ffa_api_is_compatible" Fixes optee_ffa_api_is_compatbile() to optee_ffa_api_is_compatible() because compatbile is a typo of compatible. Fixes: 4615e5a34b95 ("optee: add FF-A support") Signed-off-by: Sungbae Yoo Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/ffa_abi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index f9ef7d94cebd..a963eed70c1d 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -657,7 +657,7 @@ static int optee_ffa_do_call_with_arg(struct tee_context *ctx, * with a matching configuration. */ -static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev, +static bool optee_ffa_api_is_compatible(struct ffa_device *ffa_dev, const struct ffa_ops *ops) { const struct ffa_msg_ops *msg_ops = ops->msg_ops; @@ -908,7 +908,7 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev) ffa_ops = ffa_dev->ops; notif_ops = ffa_ops->notifier_ops; - if (!optee_ffa_api_is_compatbile(ffa_dev, ffa_ops)) + if (!optee_ffa_api_is_compatible(ffa_dev, ffa_ops)) return -EINVAL; if (!optee_ffa_exchange_caps(ffa_dev, ffa_ops, &sec_caps, From 8fe8a092043f28d3c8e467cb2bbfe1e1ccf7f996 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sun, 10 Aug 2025 19:04:01 +0530 Subject: [PATCH 0466/1307] iommufd: viommu: free memory allocated by kvcalloc() using kvfree() Use kvfree() instead of kfree() to free pages allocated by kvcalloc() in iommufs_hw_queue_alloc_phys() to fix potential memory corruption. Ensure the memory is properly freed, as kvcalloc may internally use vmalloc or kmalloc depending on available memory in the system. Fixes: 2238ddc2b056 ("iommufd/viommu: Add IOMMUFD_CMD_HW_QUEUE_ALLOC ioctl") Link: https://patch.msgid.link/r/aJifyVV2PL6WGEs6@bhairav-test.ee.iitb.ac.in Signed-off-by: Akhilesh Patil Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/viommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 2ca5809b238b..462b457ffd0c 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -339,7 +339,7 @@ iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, } *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset; - kfree(pages); + kvfree(pages); return access; out_unpin: @@ -349,7 +349,7 @@ iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, out_destroy: iommufd_access_destroy_internal(viommu->ictx, access); out_free: - kfree(pages); + kvfree(pages); return ERR_PTR(rc); } From 447c6141e8ea68ef4e56c55144fd18f43e6c8dca Mon Sep 17 00:00:00 2001 From: Alessandro Ratti Date: Fri, 15 Aug 2025 17:37:26 +0200 Subject: [PATCH 0467/1307] iommufd: Fix spelling errors in iommufd.rst This patch corrects two minor spelling issues found in Documentation/userspace-api/iommufd.rst: - "primarly" -> "primarily" - "sharable" -> "shareable" Found using codespell(1). Link: https://patch.msgid.link/r/20250815153840.188213-2-alessandro@0x65c.net Signed-off-by: Alessandro Ratti Signed-off-by: Jason Gunthorpe --- Documentation/userspace-api/iommufd.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/userspace-api/iommufd.rst b/Documentation/userspace-api/iommufd.rst index 03f7510384d2..f1c4d21e5c5e 100644 --- a/Documentation/userspace-api/iommufd.rst +++ b/Documentation/userspace-api/iommufd.rst @@ -43,7 +43,7 @@ Following IOMMUFD objects are exposed to userspace: - IOMMUFD_OBJ_HWPT_PAGING, representing an actual hardware I/O page table (i.e. a single struct iommu_domain) managed by the iommu driver. "PAGING" - primarly indicates this type of HWPT should be linked to an IOAS. It also + primarily indicates this type of HWPT should be linked to an IOAS. It also indicates that it is backed by an iommu_domain with __IOMMU_DOMAIN_PAGING feature flag. This can be either an UNMANAGED stage-1 domain for a device running in the user space, or a nesting parent stage-2 domain for mappings @@ -76,7 +76,7 @@ Following IOMMUFD objects are exposed to userspace: * Security namespace for guest owned ID, e.g. guest-controlled cache tags * Non-device-affiliated event reporting, e.g. invalidation queue errors - * Access to a sharable nesting parent pagetable across physical IOMMUs + * Access to a shareable nesting parent pagetable across physical IOMMUs * Virtualization of various platforms IDs, e.g. RIDs and others * Delivery of paravirtualized invalidation * Direct assigned invalidation queues From e9576e078220c50ace9e9087355423de23e25fa5 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 21 Jul 2025 18:11:54 +0000 Subject: [PATCH 0468/1307] x86/CPU/AMD: Ignore invalid reset reason value The reset reason value may be "all bits set", e.g. 0xFFFFFFFF. This is a commonly used error response from hardware. This may occur due to a real hardware issue or when running in a VM. The user will see all reset reasons reported in this case. Check for an error response value and return early to avoid decoding invalid data. Also, adjust the data variable type to match the hardware register size. Fixes: ab8131028710 ("x86/CPU/AMD: Print the reason for the last reset") Reported-by: Libing He Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Mario Limonciello Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250721181155.3536023-1-yazen.ghannam@amd.com --- arch/x86/kernel/cpu/amd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a5ece6ebe8a7..a6f88ca1a6b4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1326,8 +1326,8 @@ static const char * const s5_reset_reason_txt[] = { static __init int print_s5_reset_status_mmio(void) { - unsigned long value; void __iomem *addr; + u32 value; int i; if (!cpu_feature_enabled(X86_FEATURE_ZEN)) @@ -1340,12 +1340,16 @@ static __init int print_s5_reset_status_mmio(void) value = ioread32(addr); iounmap(addr); + /* Value with "all bits set" is an error response and should be ignored. */ + if (value == U32_MAX) + return 0; + for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) continue; if (s5_reset_reason_txt[i]) { - pr_info("x86/amd: Previous system reset reason [0x%08lx]: %s\n", + pr_info("x86/amd: Previous system reset reason [0x%08x]: %s\n", value, s5_reset_reason_txt[i]); } } From 1ba9fbe40337e448b32e2831a7051191d61f0382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 11 Aug 2025 09:44:42 +0200 Subject: [PATCH 0469/1307] drm/msm: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Signed-off-by: Thomas Weißschuh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/667895/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c | 4 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 4 ++-- drivers/gpu/drm/msm/msm_mdss.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index d4b545448d74..94912b4708fb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -596,7 +596,7 @@ static void _dpu_crtc_complete_flip(struct drm_crtc *crtc) spin_lock_irqsave(&dev->event_lock, flags); if (dpu_crtc->event) { - DRM_DEBUG_VBL("%s: send event: %pK\n", dpu_crtc->name, + DRM_DEBUG_VBL("%s: send event: %p\n", dpu_crtc->name, dpu_crtc->event); trace_dpu_crtc_complete_flip(DRMID(crtc)); drm_crtc_send_vblank_event(crtc, dpu_crtc->event); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index 11fb1bc54fa9..54b20faa0b69 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -31,14 +31,14 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx, u32 base; if (!ctx) { - DRM_ERROR("invalid ctx %pK\n", ctx); + DRM_ERROR("invalid ctx %p\n", ctx); return; } base = ctx->cap->sblk->pcc.base; if (!base) { - DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base); + DRM_ERROR("invalid ctx %p pcc base 0x%x\n", ctx, base); return; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 12dcb32b4724..a306077647c3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1345,7 +1345,7 @@ static int dpu_kms_mmap_mdp5(struct dpu_kms *dpu_kms) dpu_kms->mmio = NULL; return ret; } - DRM_DEBUG("mapped dpu address space @%pK\n", dpu_kms->mmio); + DRM_DEBUG("mapped dpu address space @%p\n", dpu_kms->mmio); dpu_kms->vbif[VBIF_RT] = msm_ioremap_mdss(mdss_dev, dpu_kms->pdev, @@ -1380,7 +1380,7 @@ static int dpu_kms_mmap_dpu(struct dpu_kms *dpu_kms) dpu_kms->mmio = NULL; return ret; } - DRM_DEBUG("mapped dpu address space @%pK\n", dpu_kms->mmio); + DRM_DEBUG("mapped dpu address space @%p\n", dpu_kms->mmio); dpu_kms->vbif[VBIF_RT] = msm_ioremap(pdev, "vbif"); if (IS_ERR(dpu_kms->vbif[VBIF_RT])) { diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 1f5fe7811e01..39885b333910 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -423,7 +423,7 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5 if (IS_ERR(msm_mdss->mmio)) return ERR_CAST(msm_mdss->mmio); - dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio); + dev_dbg(&pdev->dev, "mapped mdss address space @%p\n", msm_mdss->mmio); ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss); if (ret) From 4876b391654142dcf31ac6da619ace357b6b902d Mon Sep 17 00:00:00 2001 From: Antonino Maniscalco Date: Wed, 13 Aug 2025 15:04:44 +0200 Subject: [PATCH 0470/1307] drm/msm: skip re-emitting IBs for unusable VMs When a VM is marked as an usuable we disallow new submissions from it, however submissions that where already scheduled on the ring would still be re-sent. Since this can lead to further hangs, avoid emitting the actual IBs. Fixes: 6a4d287a1ae6 ("drm/msm: Mark VM as unusable on GPU hangs") Signed-off-by: Antonino Maniscalco Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/668314/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 416d47185ef0..26c5ce897cbb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -558,8 +558,15 @@ static void recover_worker(struct kthread_work *work) unsigned long flags; spin_lock_irqsave(&ring->submit_lock, flags); - list_for_each_entry(submit, &ring->submits, node) + list_for_each_entry(submit, &ring->submits, node) { + /* + * If the submit uses an unusable vm make sure + * we don't actually run it + */ + if (to_msm_vm(submit->vm)->unusable) + submit->nr_cmds = 0; gpu->funcs->submit(gpu, submit); + } spin_unlock_irqrestore(&ring->submit_lock, flags); } } From 6c705851499172c0ce863e816946fb5a564ff69f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 14 Aug 2025 09:17:06 -0700 Subject: [PATCH 0471/1307] ACPI: APEI: EINJ: Check if user asked for EINJV2 injection On an EINJV2 capable system, users may still use the old injection interface but einj_get_parameter_address() takes the EINJV2 path to map the parameter structure. This results in the address the user supplied being stored to the wrong location and the BIOS injecting based on an uninitialized field (0x0 in the reported case). Check the version of the request when mapping the EINJ parameter structure in BIOS reserved memory. Fixes: 691a0f0a557b ("ACPI: APEI: EINJ: Discover EINJv2 parameters") Reported-by: Lai, Yi1 Signed-off-by: Tony Luck Reviewed-by: Zaid Alali Reviewed-by: Hanjun Guo Link: https://patch.msgid.link/20250814161706.4489-1-tony.luck@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index bf8dc92a373a..99f1b841fba9 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -315,7 +315,7 @@ static void __iomem *einj_get_parameter_address(void) memcpy_fromio(&v5param, p, v5param_size); acpi5 = 1; check_vendor_extension(pa_v5, &v5param); - if (available_error_type & ACPI65_EINJV2_SUPP) { + if (is_v2 && available_error_type & ACPI65_EINJV2_SUPP) { len = v5param.einjv2_struct.length; offset = offsetof(struct einjv2_extension_struct, component_arr); max_nr_components = (len - offset) / From 7459e87ae1d78ba27b728172fa2aa912a5b8640d Mon Sep 17 00:00:00 2001 From: Charles Han Date: Fri, 15 Aug 2025 10:42:06 +0800 Subject: [PATCH 0472/1307] ACPI: APEI: EINJ: fix potential NULL dereference in __einj_error_inject() The __einj_error_inject() function allocates memory via kmalloc() without checking for allocation failure, which could lead to a NULL pointer dereference. Return -ENOMEM in case allocation fails. Fixes: b47610296d17 ("ACPI: APEI: EINJ: Enable EINJv2 error injections") Signed-off-by: Charles Han Reviewed-by: Tony Luck Reviewed-by: Hanjun Guo Link: https://patch.msgid.link/20250815024207.3038-1-hanchunchao@inspur.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index 99f1b841fba9..b489ae684a1b 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -540,6 +540,9 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, struct set_error_type_with_address *v5param; v5param = kmalloc(v5param_size, GFP_KERNEL); + if (!v5param) + return -ENOMEM; + memcpy_fromio(v5param, einj_param, v5param_size); v5param->type = type; if (type & ACPI5_VENDOR_BIT) { From 61ca3b891b4b9667334c1356a73f28954c92d43a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 06:54:50 +0200 Subject: [PATCH 0473/1307] block: handle pi_tuple_size in queue_limits_stack_integrity queue_limits_stack_integrity needs to handle the new pi_tuple_size field, otherwise stacking PI-capable devices will always fail. Fixes: 76e45252a4ce ("block: introduce pi_tuple_size field in blk_integrity") Signed-off-by: Christoph Hellwig Reviewed-by: Anuj Gupta Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250818045456.1482889-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index 07874e9b609f..491c0c48d52b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -972,6 +972,8 @@ bool queue_limits_stack_integrity(struct queue_limits *t, goto incompatible; if (ti->csum_type != bi->csum_type) goto incompatible; + if (ti->pi_tuple_size != bi->pi_tuple_size) + goto incompatible; if ((ti->flags & BLK_INTEGRITY_REF_TAG) != (bi->flags & BLK_INTEGRITY_REF_TAG)) goto incompatible; @@ -980,6 +982,7 @@ bool queue_limits_stack_integrity(struct queue_limits *t, ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) | (bi->flags & BLK_INTEGRITY_REF_TAG); ti->csum_type = bi->csum_type; + ti->pi_tuple_size = bi->pi_tuple_size; ti->metadata_size = bi->metadata_size; ti->pi_offset = bi->pi_offset; ti->interval_exp = bi->interval_exp; From f4ae1744033d54b63c31a3664a4fdf5cebec7f27 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 06:54:51 +0200 Subject: [PATCH 0474/1307] block: remove newlines from the warnings in blk_validate_integrity_limits Otherwise they are very hard to read in the kernel log. Signed-off-by: Christoph Hellwig Reviewed-by: Anuj Gupta Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250818045456.1482889-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 491c0c48d52b..d6438e6c276d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -157,16 +157,14 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) switch (bi->csum_type) { case BLK_INTEGRITY_CSUM_NONE: if (bi->pi_tuple_size) { - pr_warn("pi_tuple_size must be 0 when checksum type \ - is none\n"); + pr_warn("pi_tuple_size must be 0 when checksum type is none\n"); return -EINVAL; } break; case BLK_INTEGRITY_CSUM_CRC: case BLK_INTEGRITY_CSUM_IP: if (bi->pi_tuple_size != sizeof(struct t10_pi_tuple)) { - pr_warn("pi_tuple_size mismatch for T10 PI: expected \ - %zu, got %u\n", + pr_warn("pi_tuple_size mismatch for T10 PI: expected %zu, got %u\n", sizeof(struct t10_pi_tuple), bi->pi_tuple_size); return -EINVAL; @@ -174,8 +172,7 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) break; case BLK_INTEGRITY_CSUM_CRC64: if (bi->pi_tuple_size != sizeof(struct crc64_pi_tuple)) { - pr_warn("pi_tuple_size mismatch for CRC64 PI: \ - expected %zu, got %u\n", + pr_warn("pi_tuple_size mismatch for CRC64 PI: expected %zu, got %u\n", sizeof(struct crc64_pi_tuple), bi->pi_tuple_size); return -EINVAL; From b21d1fbb97c814c76ffa392cd603f8cd3ecc0355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 14 Aug 2025 07:11:57 +0200 Subject: [PATCH 0475/1307] ACPI: APEI: EINJ: Fix resource leak by remove callback in .exit.text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback is also used during error handling in faux_probe(). As einj_remove() was marked with __exit it's not linked into the kernel if the driver is built-in, potentially resulting in resource leaks. Also remove the comment justifying the __exit annotation which doesn't apply any more since the driver was converted to the faux device interface. Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") Signed-off-by: Uwe Kleine-König Cc: 6.16+ # 6.16+ Link: https://patch.msgid.link/20250814051157.35867-2-u.kleine-koenig@baylibre.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index b489ae684a1b..2561b045acc7 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -1094,7 +1094,7 @@ static int __init einj_probe(struct faux_device *fdev) return rc; } -static void __exit einj_remove(struct faux_device *fdev) +static void einj_remove(struct faux_device *fdev) { struct apei_exec_context ctx; @@ -1117,15 +1117,9 @@ static void __exit einj_remove(struct faux_device *fdev) } static struct faux_device *einj_dev; -/* - * einj_remove() lives in .exit.text. For drivers registered via - * platform_driver_probe() this is ok because they cannot get unbound at - * runtime. So mark the driver struct with __refdata to prevent modpost - * triggering a section mismatch warning. - */ -static struct faux_device_ops einj_device_ops __refdata = { +static struct faux_device_ops einj_device_ops = { .probe = einj_probe, - .remove = __exit_p(einj_remove), + .remove = einj_remove, }; static int __init einj_init(void) From 779b1a1cb13ae17028aeddb2fbbdba97357a1e15 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 13 Aug 2025 12:25:58 +0200 Subject: [PATCH 0476/1307] cpuidle: governors: menu: Avoid selecting states with too much latency Occasionally, the exit latency of the idle state selected by the menu governor may exceed the PM QoS CPU wakeup latency limit. Namely, if the scheduler tick has been stopped already and predicted_ns is greater than the tick period length, the governor may return an idle state whose exit latency exceeds latency_req because that decision is made before checking the current idle state's exit latency. For instance, say that there are 3 idle states, 0, 1, and 2. For idle states 0 and 1, the exit latency is equal to the target residency and the values are 0 and 5 us, respectively. State 2 is deeper and has the exit latency and target residency of 200 us and 2 ms (which is greater than the tick period length), respectively. Say that predicted_ns is equal to TICK_NSEC and the PM QoS latency limit is 20 us. After the first two iterations of the main loop in menu_select(), idx becomes 1 and in the third iteration of it the target residency of the current state (state 2) is greater than predicted_ns. State 2 is not a polling one and predicted_ns is not less than TICK_NSEC, so the check on whether or not the tick has been stopped is done. Say that the tick has been stopped already and there are no imminent timers (that is, delta_tick is greater than the target residency of state 2). In that case, idx becomes 2 and it is returned immediately, but the exit latency of state 2 exceeds the latency limit. Address this issue by modifying the code to compare the exit latency of the current idle state (idle state i) with the latency limit before comparing its target residency with predicted_ns, which allows one more exit_latency_ns check that becomes redundant to be dropped. However, after the above change, latency_req cannot take the predicted_ns value any more, which takes place after commit 38f83090f515 ("cpuidle: menu: Remove iowait influence"), because it may cause a polling state to be returned prematurely. In the context of the previous example say that predicted_ns is 3000 and the PM QoS latency limit is still 20 us. Additionally, say that idle state 0 is a polling one. Moving the exit_latency_ns check before the target_residency_ns one causes the loop to terminate in the second iteration, before the target_residency_ns check, so idle state 0 will be returned even though previously state 1 would be returned if there were no imminent timers. For this reason, remove the assignment of the predicted_ns value to latency_req from the code. Fixes: 5ef499cd571c ("cpuidle: menu: Handle stopped tick more aggressively") Cc: 4.17+ # 4.17+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Link: https://patch.msgid.link/5043159.31r3eYUQgx@rafael.j.wysocki --- drivers/cpuidle/governors/menu.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 81306612a5c6..b2e3d0b0a116 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -287,20 +287,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return 0; } - if (tick_nohz_tick_stopped()) { - /* - * If the tick is already stopped, the cost of possible short - * idle duration misprediction is much higher, because the CPU - * may be stuck in a shallow idle state for a long time as a - * result of it. In that case say we might mispredict and use - * the known time till the closest timer event for the idle - * state selection. - */ - if (predicted_ns < TICK_NSEC) - predicted_ns = data->next_timer_ns; - } else if (latency_req > predicted_ns) { - latency_req = predicted_ns; - } + /* + * If the tick is already stopped, the cost of possible short idle + * duration misprediction is much higher, because the CPU may be stuck + * in a shallow idle state for a long time as a result of it. In that + * case, say we might mispredict and use the known time till the closest + * timer event for the idle state selection. + */ + if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC) + predicted_ns = data->next_timer_ns; /* * Find the idle state with the lowest power while satisfying @@ -316,13 +311,15 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx == -1) idx = i; /* first enabled state */ + if (s->exit_latency_ns > latency_req) + break; + if (s->target_residency_ns > predicted_ns) { /* * Use a physical idle state, not busy polling, unless * a timer is going to trigger soon enough. */ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && - s->exit_latency_ns <= latency_req && s->target_residency_ns <= data->next_timer_ns) { predicted_ns = s->target_residency_ns; idx = i; @@ -354,8 +351,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, return idx; } - if (s->exit_latency_ns > latency_req) - break; idx = i; } From af24c20c4633a667ac5b5e20cf9d96f6176a0ca3 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Fri, 15 Aug 2025 10:47:29 +0800 Subject: [PATCH 0477/1307] ASoC: codecs: ES9389: Modify the standby configuration Modify the standby configuration Signed-off-by: Zhang Yi Link: https://patch.msgid.link/20250815024729.3051-1-zhangyi@everest-semi.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8389.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/es8389.c b/sound/soc/codecs/es8389.c index ba1763f36f17..6e4c75d288ef 100644 --- a/sound/soc/codecs/es8389.c +++ b/sound/soc/codecs/es8389.c @@ -636,7 +636,7 @@ static int es8389_set_bias_level(struct snd_soc_component *component, regmap_write(es8389->regmap, ES8389_ANA_CTL1, 0x59); regmap_write(es8389->regmap, ES8389_ADC_EN, 0x00); regmap_write(es8389->regmap, ES8389_CLK_OFF1, 0x00); - regmap_write(es8389->regmap, ES8389_RESET, 0x7E); + regmap_write(es8389->regmap, ES8389_RESET, 0x3E); regmap_update_bits(es8389->regmap, ES8389_DAC_INV, 0x80, 0x80); usleep_range(8000, 8500); regmap_update_bits(es8389->regmap, ES8389_DAC_INV, 0x80, 0x00); From 43c0f6456f801181a80b73d95def0e0fd134e1cc Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Mon, 18 Aug 2025 10:27:30 +0100 Subject: [PATCH 0478/1307] iio: pressure: bmp280: Use IS_ERR() in bmp280_common_probe() `devm_gpiod_get_optional()` may return non-NULL error pointer on failure. Check its return value using `IS_ERR()` and propagate the error if necessary. Fixes: df6e71256c84 ("iio: pressure: bmp280: Explicitly mark GPIO optional") Signed-off-by: Salah Triki Reviewed-by: David Lechner Link: https://patch.msgid.link/20250818092740.545379-2-salah.triki@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 74505c9ec1a0..6cdc8ed53520 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -3213,11 +3213,12 @@ int bmp280_common_probe(struct device *dev, /* Bring chip out of reset if there is an assigned GPIO line */ gpiod = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(gpiod)) + return dev_err_probe(dev, PTR_ERR(gpiod), "failed to get reset GPIO\n"); + /* Deassert the signal */ - if (gpiod) { - dev_info(dev, "release reset\n"); - gpiod_set_value(gpiod, 0); - } + dev_info(dev, "release reset\n"); + gpiod_set_value(gpiod, 0); data->regmap = regmap; From 22ec0faa0eda30acdd6dcb3c29c872629da677bb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Aug 2025 09:41:22 -0700 Subject: [PATCH 0479/1307] perf test: Fix a build error in x86 topdown test There's an environment that caused the following build error. Include "debug.h" (under util directory) to fix it. arch/x86/tests/topdown.c: In function 'event_cb': arch/x86/tests/topdown.c:53:25: error: implicit declaration of function 'pr_debug' [-Werror=implicit-function-declaration] 53 | pr_debug("Broken topdown information for '%s'\n", evsel__name(evsel)); | ^~~~~~~~ cc1: all warnings being treated as errors Link: https://lore.kernel.org/r/20250815164122.289651-1-namhyung@kernel.org Fixes: 5b546de9cc177936 ("perf topdown: Use attribute to see an event is a topdown metic or slots") Reported-by: Naresh Kamboju Signed-off-by: Namhyung Kim --- tools/perf/arch/x86/tests/topdown.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/x86/tests/topdown.c b/tools/perf/arch/x86/tests/topdown.c index 8d0ea7a4bbc1..1eba3b4594ef 100644 --- a/tools/perf/arch/x86/tests/topdown.c +++ b/tools/perf/arch/x86/tests/topdown.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "arch-tests.h" #include "../util/topdown.h" +#include "debug.h" #include "evlist.h" #include "parse-events.h" #include "pmu.h" From bd842ff41543af424c2473dc16c678ac8ba2b43f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0480/1307] tools headers: Sync KVM headers with the kernel source To pick up the changes in this cset: f55ce5a6cd33211c KVM: arm64: Expose new KVM cap for cacheable PFNMAP 28224ef02b56fcee KVM: TDX: Report supported optional TDVMCALLs in TDX capabilities 4580dbef5ce0f95a KVM: TDX: Exit to userspace for SetupEventNotifyInterrupt 25e8b1dd4883e6c2 KVM: TDX: Exit to userspace for GetTdVmCallInfo cf207eac06f661fb KVM: TDX: Handle TDG.VP.VMCALL This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/uapi/asm/kvm.h | 8 +++++++- tools/include/uapi/linux/kvm.h | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7415a3863891..f0f0d49d2544 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,31 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -935,6 +961,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2 240 #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 struct kvm_irq_routing_irqchip { __u32 irqchip; From 8aa5a3b68ad144da49a3d17f165e6561255e3529 Mon Sep 17 00:00:00 2001 From: Rajeev Mishra Date: Mon, 18 Aug 2025 18:48:20 +0000 Subject: [PATCH 0481/1307] loop: Consolidate size calculation logic into lo_calculate_size() Renamed get_size to lo_calculate_size and merged the logic from get_size and get_loop_size into a single function. Update all callers to use lo_calculate_size. This is done in preparation for improving the size detection logic. Signed-off-by: Rajeev Mishra Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250818184821.115033-2-rajeevm@hpe.com [axboe: massage commit message] Signed-off-by: Jens Axboe --- drivers/block/loop.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1b6ee91f8eb9..0e1b9eb9db10 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -137,20 +137,18 @@ static void loop_global_unlock(struct loop_device *lo, bool global) static int max_part; static int part_shift; -static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) +static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { loff_t loopsize; - /* Compute loopsize in bytes */ loopsize = i_size_read(file->f_mapping->host); - if (offset > 0) - loopsize -= offset; + if (lo->lo_offset > 0) + loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; - - if (sizelimit > 0 && sizelimit < loopsize) - loopsize = sizelimit; + if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) + loopsize = lo->lo_sizelimit; /* * Unfortunately, if we want to do I/O on the device, * the number of 512-byte sectors has to fit into a sector_t. @@ -158,11 +156,6 @@ static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) return loopsize >> 9; } -static loff_t get_loop_size(struct loop_device *lo, struct file *file) -{ - return get_size(lo->lo_offset, lo->lo_sizelimit, file); -} - /* * We support direct I/O only if lo_offset is aligned with the logical I/O size * of backing device, and the logical block size of loop is bigger than that of @@ -569,7 +562,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, error = -EINVAL; /* size of the new backing store needs to be the same */ - if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) + if (lo_calculate_size(lo, file) != lo_calculate_size(lo, old_file)) goto out_err; /* @@ -1063,7 +1056,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, loop_update_dio(lo); loop_sysfs_init(lo); - size = get_loop_size(lo, file); + size = lo_calculate_size(lo, file); loop_set_size(lo, size); /* Order wrt reading lo_state in loop_validate_file(). */ @@ -1255,8 +1248,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); if (!err && size_changed) { - loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, - lo->lo_backing_file); + loff_t new_size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, new_size); } out_unlock: @@ -1399,7 +1391,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - size = get_loop_size(lo, lo->lo_backing_file); + size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, size); return 0; From 47b71abd58461a67cae71d2f2a9d44379e4e2fcf Mon Sep 17 00:00:00 2001 From: Rajeev Mishra Date: Mon, 18 Aug 2025 18:48:21 +0000 Subject: [PATCH 0482/1307] loop: use vfs_getattr_nosec for accurate file size Use vfs_getattr_nosec() in lo_calculate_size() for getting the file size, rather than just read the cached inode size via i_size_read(). This provides better results than cached inode data, particularly for network filesystems where metadata may be stale. Signed-off-by: Rajeev Mishra Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250818184821.115033-3-rajeevm@hpe.com [axboe: massage commit message] Signed-off-by: Jens Axboe --- drivers/block/loop.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 0e1b9eb9db10..57263c273f0f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -139,9 +139,20 @@ static int part_shift; static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { + struct kstat stat; loff_t loopsize; - /* Compute loopsize in bytes */ - loopsize = i_size_read(file->f_mapping->host); + int ret; + + /* + * Get the accurate file size. This provides better results than + * cached inode data, particularly for network filesystems where + * metadata may be stale. + */ + ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); + if (ret) + return 0; + + loopsize = stat.size; if (lo->lo_offset > 0) loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ From d0a2b527d8c32e46ccb8a34053468d4ff0c27e5c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 12:11:02 +0200 Subject: [PATCH 0483/1307] block: tone down bio_check_eod bdev_nr_sectors() == 0 is a pattern used for block devices that have been hot removed, don't spam the log about them. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250818101102.1604551-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index fdac48aec5ef..4201504158a1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -557,7 +557,7 @@ static inline int bio_check_eod(struct bio *bio) sector_t maxsector = bdev_nr_sectors(bio->bi_bdev); unsigned int nr_sectors = bio_sectors(bio); - if (nr_sectors && + if (nr_sectors && maxsector && (nr_sectors > maxsector || bio->bi_iter.bi_sector > maxsector - nr_sectors)) { pr_info_ratelimited("%s: attempt to access beyond end of device\n" From 6cb8607934d937f4ad24ec9ad26aeb669e266937 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0484/1307] tools headers: Sync linux/bits.h with the kernel source To pick up the changes in this cset: 104ea1c84b91c9f4 bits: unify the non-asm GENMASK*() 6d4471252ccc1722 bits: split the definition of the asm and non-asm GENMASK*() This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/bits.h include/linux/bits.h Please see tools/include/uapi/README for further details. Cc: Yury Norov Signed-off-by: Namhyung Kim --- tools/include/linux/bits.h | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7ad056219115..a40cc861b3a7 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -2,10 +2,8 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include #include #include -#include #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -50,10 +48,14 @@ (type_max(t) << (l) & \ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + #define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) #define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) #define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) #define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) /* * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The @@ -79,28 +81,9 @@ * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ -#define GENMASK_INPUT_CHECK(h, l) 0 +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) #endif /* !defined(__ASSEMBLY__) */ -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - -#if !defined(__ASSEMBLY__) -/* - * Missing asm support - * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __int128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. - */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif - #endif /* __LINUX_BITS_H */ From aa34642f6fc36a436de5ae5b30d414578b3622f5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0485/1307] tools headers: Sync linux/cfi_types.h with the kernel source To pick up the changes in this cset: 5ccaeedb489b41ce cfi: add C CFI type macro This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/cfi_types.h include/linux/cfi_types.h Please see tools/include/uapi/README for further details. Cc: Mark Rutland Signed-off-by: Namhyung Kim --- tools/include/linux/cfi_types.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h index 6b8713675765..685f7181780f 100644 --- a/tools/include/linux/cfi_types.h +++ b/tools/include/linux/cfi_types.h @@ -41,5 +41,28 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI_CLANG +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_ */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_CFI_TYPES_H */ From 619f55c859014e2235f83ba6cde8c59edc492f39 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0486/1307] tools headers: Sync x86 headers with the kernel source To pick up the changes in this cset: 7b306dfa326f7011 x86/sev: Evict cache lines during SNP memory validation 65f55a30176662ee x86/CPU/AMD: Add CPUID faulting support d8010d4ba43e9f79 x86/bugs: Add a Transient Scheduler Attacks mitigation a3c4f3396b82849a x86/msr-index: Add AMD workload classification MSRs 17ec2f965344ee3f KVM: VMX: Allow guest to set DEBUGCTL.RTM_DEBUG if RTM is supported This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Please see tools/include/uapi/README for further details. Cc: x86@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/asm/cpufeatures.h | 10 +++++++++- tools/arch/x86/include/asm/msr-index.h | 7 +++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index ee176236c2be..06fc0479a23f 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ @@ -456,10 +457,14 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ + #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ +#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ + #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ @@ -487,6 +492,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +550,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 5cfb5d74dd5f..b65c3ba5fa14 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -419,6 +419,7 @@ #define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) +#define DEBUGCTLMSR_RTM_DEBUG BIT(15) #define MSR_PEBS_FRONTEND 0x000003f7 @@ -733,6 +734,11 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +/* AMD Hardware Feedback Support MSRs */ +#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 +#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501 +#define MSR_AMD_WORKLOAD_HRST 0xc0000502 + /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e @@ -831,6 +837,7 @@ #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) #define MSR_K7_HWCR_IRPERF_EN_BIT 30 #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) +#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35 #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 #define MSR_K7_HWCR_CPB_DIS_BIT 25 From 14ec8ce45611c767656e4fa575f17b05344aa80a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0487/1307] tools headers: Sync arm64 headers with the kernel source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick up the changes in this cset: efe676a1a7554219 arm64: proton-pack: Add new CPUs 'k' values for branch mitigation e18c09b204e81702 arm64: Add support for HIP09 Spectre-BHB mitigation a9b5bd81b294d30a arm64: cputype: Add MIDR_CORTEX_A76AE 53a52a0ec7680287 arm64: cputype: Add comments about Qualcomm Kryo 5XX and 6XX cores 401c3333bb2396aa arm64: cputype: Add QCOM_CPU_PART_KRYO_3XX_GOLD 86edf6bdcf0571c0 smccc/kvm_guest: Enable errata based on implementation CPUs 0bc9a9e85fcf4ffb KVM: arm64: Work around x1e's CNTVOFF_EL2 bogosity This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h But the following two changes cannot be applied since they introduced new build errors in util/arm-spe.c. So it still has the warning after this change. c8c2647e69bedf80 arm64: Make  _midr_in_range_list() an exported function e3121298c7fcaf48 arm64: Modify _midr_range() functions to read MIDR/REVIDR internally Please see tools/include/uapi/README for further details. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Namhyung Kim perf build: [WIP] Fix arm-spe build errors Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/asm/cputype.h | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 9a5d85cfd1fb..139d5e87dc95 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -75,11 +75,13 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A76AE 0xD0E #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D @@ -119,9 +121,11 @@ #define QCOM_CPU_PART_KRYO 0x200 #define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 #define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 +#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 +#define QCOM_CPU_PART_ORYON_X1 0x001 #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 @@ -129,6 +133,7 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP09 0xD02 #define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 @@ -159,11 +164,13 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) @@ -196,13 +203,26 @@ #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) #define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) +#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) +#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1) + +/* + * NOTES: + * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77 + * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER + * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1 + * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78 + * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55 + */ + #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) #define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) @@ -291,6 +311,14 @@ static inline u32 __attribute_const__ read_cpuid_id(void) return read_cpuid(MIDR_EL1); } +struct target_impl_cpu { + u64 midr; + u64 revidr; + u64 aidr; +}; + +bool cpu_errata_set_target_impl(u64 num, void *impl_cpus); + static inline u64 __attribute_const__ read_cpuid_mpidr(void) { return read_cpuid(MPIDR_EL1); From c85538c4e3c7111958057d15ea8ee444116891c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0488/1307] tools headers: Sync powerpc headers with the kernel source To pick up the changes in this cset: 69bf2053608423cb powerpc: Drop GPL boilerplate text with obsolete FSF address This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/powerpc/include/uapi/asm/kvm.h arch/powerpc/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Cc: Madhavan Srinivasan Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Namhyung Kim --- tools/arch/powerpc/include/uapi/asm/kvm.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index eaeda001784e..077c5437f521 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -1,18 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * Copyright IBM Corp. 2007 * * Authors: Hollis Blanchard From 52174e0eb13876654f56701c26a672890aa5e7e3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0489/1307] tools headers: Sync syscall tables with the kernel source To pick up the changes in this cset: be7efb2d20d67f33 fs: introduce file_getattr and file_setattr syscalls This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h diff -u tools/scripts/syscall.tbl scripts/syscall.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl diff -u tools/perf/arch/arm/entry/syscalls/syscall.tbl arch/arm/tools/syscall.tbl diff -u tools/perf/arch/sh/entry/syscalls/syscall.tbl arch/sh/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/sparc/entry/syscalls/syscall.tbl arch/sparc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/xtensa/entry/syscalls/syscall.tbl arch/xtensa/kernel/syscalls/syscall.tbl Please see tools/include/uapi/README for further details. Cc: Arnd Bergmann CC: linux-api@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/asm-generic/unistd.h | 8 +++++++- tools/perf/arch/arm/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 ++ tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/sh/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/sparc/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/x86/entry/syscalls/syscall_32.tbl | 2 ++ tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 ++ tools/perf/arch/xtensa/entry/syscalls/syscall.tbl | 2 ++ tools/scripts/syscall.tbl | 2 ++ 11 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 2892a45023af..04e0077fb4c9 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat) #define __NR_open_tree_attr 467 __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) +/* fs/inode.c */ +#define __NR_file_getattr 468 +__SYSCALL(__NR_file_getattr, sys_file_getattr) +#define __NR_file_setattr 469 +__SYSCALL(__NR_file_setattr, sys_file_setattr) + #undef __NR_syscalls -#define __NR_syscalls 468 +#define __NR_syscalls 470 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 27c1d5ebcd91..b07e699aaa3c 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -482,3 +482,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1e8c44c7b614..7a7049c2c307 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -382,3 +382,5 @@ 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat 467 n64 open_tree_attr sys_open_tree_attr +468 n64 file_getattr sys_file_getattr +469 n64 file_setattr sys_file_setattr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 9a084bdb8926..b453e80dfc00 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -558,3 +558,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index a4569b96ef06..8a6744d658db 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -470,3 +470,5 @@ 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr sys_file_setattr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index 52a7652fcff6..5e9c9eff5539 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -471,3 +471,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 83e45eb6c095..ebb7d06d1044 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -513,3 +513,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index ac007ea00979..4877e16da69a 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -473,3 +473,5 @@ 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat 467 i386 open_tree_attr sys_open_tree_attr +468 i386 file_getattr sys_file_getattr +469 i386 file_setattr sys_file_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index cfb5ca41e30d..92cf0fe2291e 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -391,6 +391,8 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index f657a77314f8..374e4cb788d8 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -438,3 +438,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index 580b4e246aec..d1ae5e92c615 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -408,3 +408,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr From b18aabe283a10774977d698c075d2296a2336aef Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0490/1307] tools headers: Sync uapi/linux/fcntl.h with the kernel source To pick up the changes in this cset: 3941e37f62fe2c3c uapi/fcntl: add FD_PIDFS_ROOT cd5d2006327b6d84 uapi/fcntl: add FD_INVALID 67fcec2919e4ed31 fcntl/pidfd: redefine PIDFD_SELF_THREAD_GROUP a4c746f06853f91d uapi/fcntl: mark range as reserved This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h Please see tools/include/uapi/README for further details. Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim --- .../trace/beauty/include/uapi/linux/fcntl.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index a15ac2fa4b20..f291ab4f94eb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -90,10 +90,28 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* Reserved kernel ranges [-100], [-10000, -40000]. */ #define AT_FDCWD -100 /* Special value for dirfd used to indicate openat should use the current working directory. */ +/* + * The concept of process and threads in userland and the kernel is a confusing + * one - within the kernel every thread is a 'task' with its own individual PID, + * however from userland's point of view threads are grouped by a single PID, + * which is that of the 'thread group leader', typically the first thread + * spawned. + * + * To cut the Gideon knot, for internal kernel usage, we refer to + * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel + * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread + * group leader... + */ +#define PIDFD_SELF_THREAD -10000 /* Current thread. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ + +#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ +#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ From 4a4083af03a7a75a86c392fd60cb37ce23ed87b6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0491/1307] tools headers: Sync uapi/linux/fs.h with the kernel source To pick up the changes in this cset: 76fdb7eb4e1c9108 uapi: export PROCFS_ROOT_INO ca115d7e754691c0 tree-wide: s/struct fileattr/struct file_kattr/g be7efb2d20d67f33 fs: introduce file_getattr and file_setattr syscalls 9eb22f7fedfc9eb1 fs: add ioctl to query metadata and protection info capabilities This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h Please see tools/include/uapi/README for further details. Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim --- .../perf/trace/beauty/include/uapi/linux/fs.h | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 0098b0ce8ccb..0bd678a4a10e 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -60,6 +60,17 @@ #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +/* + * The root inode of procfs is guaranteed to always have the same inode number. + * For programs that make heavy use of procfs, verifying that the root is a + * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH}) + * will allow you to make sure you are never tricked into operating on the + * wrong procfs file. + */ +enum procfs_ino { + PROCFS_ROOT_INO = 1, +}; + struct file_clone_range { __s64 src_fd; __u64 src_offset; @@ -91,6 +102,63 @@ struct fs_sysfs_path { __u8 name[128]; }; +/* Protection info capability flags */ +#define LBMD_PI_CAP_INTEGRITY (1 << 0) +#define LBMD_PI_CAP_REFTAG (1 << 1) + +/* Checksum types for Protection Information */ +#define LBMD_PI_CSUM_NONE 0 +#define LBMD_PI_CSUM_IP 1 +#define LBMD_PI_CSUM_CRC16_T10DIF 2 +#define LBMD_PI_CSUM_CRC64_NVME 4 + +/* sizeof first published struct */ +#define LBMD_SIZE_VER0 16 + +/* + * Logical block metadata capability descriptor + * If the device does not support metadata, all the fields will be zero. + * Applications must check lbmd_flags to determine whether metadata is + * supported or not. + */ +struct logical_block_metadata_cap { + /* Bitmask of logical block metadata capability flags */ + __u32 lbmd_flags; + /* + * The amount of data described by each unit of logical block + * metadata + */ + __u16 lbmd_interval; + /* + * Size in bytes of the logical block metadata associated with each + * interval + */ + __u8 lbmd_size; + /* + * Size in bytes of the opaque block tag associated with each + * interval + */ + __u8 lbmd_opaque_size; + /* + * Offset in bytes of the opaque block tag within the logical block + * metadata + */ + __u8 lbmd_opaque_offset; + /* Size in bytes of the T10 PI tuple associated with each interval */ + __u8 lbmd_pi_size; + /* Offset in bytes of T10 PI tuple within the logical block metadata */ + __u8 lbmd_pi_offset; + /* T10 PI guard tag type */ + __u8 lbmd_guard_tag_type; + /* Size in bytes of the T10 PI application tag */ + __u8 lbmd_app_tag_size; + /* Size in bytes of the T10 PI reference tag */ + __u8 lbmd_ref_tag_size; + /* Size in bytes of the T10 PI storage tag */ + __u8 lbmd_storage_tag_size; + __u8 pad; +}; + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1 @@ -148,6 +216,24 @@ struct fsxattr { unsigned char fsx_pad[8]; }; +/* + * Variable size structure for file_[sg]et_attr(). + * + * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'. + * As this structure is passed to/from userspace with its size, this can + * be versioned based on the size. + */ +struct file_attr { + __u64 fa_xflags; /* xflags field value (get/set) */ + __u32 fa_extsize; /* extsize field value (get/set)*/ + __u32 fa_nextents; /* nextents field value (get) */ + __u32 fa_projid; /* project identifier (get/set) */ + __u32 fa_cowextsize; /* CoW extsize field value (get/set) */ +}; + +#define FILE_ATTR_SIZE_VER0 24 +#define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0 + /* * Flags for the fsx_xflags field */ @@ -247,6 +333,8 @@ struct fsxattr { * also /sys/kernel/debug/ for filesystems with debugfs exports */ #define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) +/* Get logical block metadata capability details */ +#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap) /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) From e7e79e99726190a5a83d158576cd448896d68102 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0492/1307] tools headers: Sync uapi/linux/prctl.h with the kernel source To pick up the changes in this cset: b1fabef37bd504f3 prctl: Introduce PR_MTE_STORE_ONLY a2fc422ed75748ee syscall_user_dispatch: Add PR_SYS_DISPATCH_INCLUSIVE_ON This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Please see tools/include/uapi/README for further details. Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 3b93fb906e3c..ed3aed264aeb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -244,6 +244,8 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* MTE tag check store only */ +# define PR_MTE_STORE_ONLY (1UL << 19) /* RISC-V pointer masking tag length */ # define PR_PMLEN_SHIFT 24 # define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) @@ -255,7 +257,12 @@ struct prctl_mm_map { /* Dispatch syscalls to a userspace handler */ #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 +/* Enable dispatch except for the specified range */ +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +/* Enable dispatch for the specified range */ +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +/* Legacy name for backwards compatibility */ +# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON /* The control values for the user space selector when dispatch is enabled */ # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 From f79a62f4b3c750759e60a402e8fe5180fc5771f0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: [PATCH 0493/1307] tools headers: Sync uapi/linux/vhost.h with the kernel source To pick up the changes in this cset: 7d9896e9f6d02d8a vhost: Reintroduce kthread API and add mode selection 333c515d189657c9 vhost-net: allow configuring extended features This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/vhost.h include/uapi/linux/vhost.h Please see tools/include/uapi/README for further details. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux.dev Signed-off-by: Namhyung Kim --- .../trace/beauty/include/uapi/linux/vhost.h | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/vhost.h b/tools/perf/trace/beauty/include/uapi/linux/vhost.h index d4b3e2ae1314..c57674a6aa0d 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/vhost.h +++ b/tools/perf/trace/beauty/include/uapi/linux/vhost.h @@ -235,4 +235,39 @@ */ #define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) + +/* Extended features manipulation */ +#define VHOST_GET_FEATURES_ARRAY _IOR(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) +#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) + +/* fork_owner values for vhost */ +#define VHOST_FORK_OWNER_KTHREAD 0 +#define VHOST_FORK_OWNER_TASK 1 + +/** + * VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device, + * This ioctl must called before VHOST_SET_OWNER. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @param fork_owner: An 8-bit value that determines the vhost thread mode + * + * When fork_owner is set to VHOST_FORK_OWNER_TASK(default value): + * - Vhost will create vhost worker as tasks forked from the owner, + * inheriting all of the owner's attributes. + * + * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: + * - Vhost will create vhost workers as kernel threads. + */ +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8) + +/** + * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @return: An 8-bit value indicating the current thread mode. + */ +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8) + #endif From 923fcb3dbc0246fc5207093c0049af4c56f20e41 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 11 Aug 2025 16:30:39 -0400 Subject: [PATCH 0494/1307] KVM: SEV: don't check have_run_cpus in sev_writeback_caches() Drop KVM's check on an empty cpumask when flushing caches when memory is being reclaimed from an SEV VM, as smp_call_function_many_cond() naturally (and correctly) handles an empty cpumask. This avoids an extra O(n) lookup in the common case where at least one pCPU has enterred the guest, which could be noticeable in some setups, e.g. if a small VM is pinned to the last few pCPUs in the system. Fixes: 6f38f8c57464 ("KVM: SVM: Flush cache only on CPUs running SEV guest") Signed-off-by: Yury Norov (NVIDIA) [sean: rewrite changelog to capture performance angle] Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 2fbdebf79fbb..0635bd71c10e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -718,13 +718,6 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) static void sev_writeback_caches(struct kvm *kvm) { - /* - * Note, the caller is responsible for ensuring correctness if the mask - * can be modified, e.g. if a CPU could be doing VMRUN. - */ - if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus)) - return; - /* * Ensure that all dirty guest tagged cache entries are written back * before releasing the pages back to the system for use. CLFLUSH will @@ -739,6 +732,9 @@ static void sev_writeback_caches(struct kvm *kvm) * serializing multiple calls and having responding CPUs (to the IPI) * mark themselves as still running if they are running (or about to * run) a vCPU for the VM. + * + * Note, the caller is responsible for ensuring correctness if the mask + * can be modified, e.g. if a CPU could be doing VMRUN. */ wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus); } From 0aa86640ebd98d77fb64acef5684e42fba517d2d Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Wed, 9 Jul 2025 14:12:22 -0400 Subject: [PATCH 0495/1307] drm/amd/display: Revert Add HPO encoder support to Replay This reverts commits: commit 1f26214d268b ("drm/amd/display: Add HPO encoder support to Replay") commit 3bfce48b109f ("drm/amd/display: Add support for Panel Replay on DP1 eDP (panel_inst=1)") due to visual confirm issue. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Gabe Teeger Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 92f68f6a1b297633159a3f3759e4dfc7e5b58abb) --- .../gpu/drm/amd/display/dc/dce/dmub_replay.c | 43 ++----------------- .../gpu/drm/amd/display/dc/dce/dmub_replay.h | 2 +- .../link/protocols/link_edp_panel_control.c | 2 +- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 20 --------- 4 files changed, 5 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index e7a318e26d38..fcd3d86ad517 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -4,7 +4,6 @@ #include "dc.h" #include "dc_dmub_srv.h" -#include "dc_dp_types.h" #include "dmub/dmub_srv.h" #include "core_types.h" #include "dmub_replay.h" @@ -44,45 +43,21 @@ static void dmub_replay_get_state(struct dmub_replay *dmub, enum replay_state *s /* * Enable/Disable Replay. */ -static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst, - struct dc_link *link) +static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; uint32_t retry_count; enum replay_state state = REPLAY_STATE_0; - struct pipe_ctx *pipe_ctx = NULL; - struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx; - uint8_t i; memset(&cmd, 0, sizeof(cmd)); cmd.replay_enable.header.type = DMUB_CMD__REPLAY; cmd.replay_enable.data.panel_inst = panel_inst; cmd.replay_enable.header.sub_type = DMUB_CMD__REPLAY_ENABLE; - if (enable) { + if (enable) cmd.replay_enable.data.enable = REPLAY_ENABLE; - // hpo stream/link encoder assignments are not static, need to update everytime we try to enable replay - if (link->cur_link_settings.link_rate >= LINK_RATE_UHBR10) { - for (i = 0; i < MAX_PIPES; i++) { - if (res_ctx && - res_ctx->pipe_ctx[i].stream && - res_ctx->pipe_ctx[i].stream->link && - res_ctx->pipe_ctx[i].stream->link == link && - res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) { - pipe_ctx = &res_ctx->pipe_ctx[i]; - //TODO: refactor for multi edp support - break; - } - } - - if (!pipe_ctx) - return; - - cmd.replay_enable.data.hpo_stream_enc_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - cmd.replay_enable.data.hpo_link_enc_inst = pipe_ctx->link_res.hpo_dp_link_enc->inst; - } - } else + else cmd.replay_enable.data.enable = REPLAY_DISABLE; cmd.replay_enable.header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_enable_data); @@ -174,17 +149,6 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->digbe_inst = replay_context->digbe_inst; copy_settings_data->digfe_inst = replay_context->digfe_inst; - if (link->cur_link_settings.link_rate >= LINK_RATE_UHBR10) { - if (pipe_ctx->stream_res.hpo_dp_stream_enc) - copy_settings_data->hpo_stream_enc_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - else - copy_settings_data->hpo_stream_enc_inst = 0; - if (pipe_ctx->link_res.hpo_dp_link_enc) - copy_settings_data->hpo_link_enc_inst = pipe_ctx->link_res.hpo_dp_link_enc->inst; - else - copy_settings_data->hpo_link_enc_inst = 0; - } - if (pipe_ctx->plane_res.dpp) copy_settings_data->dpp_inst = pipe_ctx->plane_res.dpp->inst; else @@ -247,7 +211,6 @@ static void dmub_replay_set_coasting_vtotal(struct dmub_replay *dmub, pCmd->header.type = DMUB_CMD__REPLAY; pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_COASTING_VTOTAL; pCmd->header.payload_bytes = sizeof(struct dmub_cmd_replay_set_coasting_vtotal_data); - pCmd->replay_set_coasting_vtotal_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index ccbe385e132c..e6346c0ffc0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -19,7 +19,7 @@ struct dmub_replay_funcs { void (*replay_get_state)(struct dmub_replay *dmub, enum replay_state *state, uint8_t panel_inst); void (*replay_enable)(struct dmub_replay *dmub, bool enable, bool wait, - uint8_t panel_inst, struct dc_link *link); + uint8_t panel_inst); bool (*replay_copy_settings)(struct dmub_replay *dmub, struct dc_link *link, struct replay_context *replay_context, uint8_t panel_inst); void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e7927b8f5ba3..98ec9b5a559c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -944,7 +944,7 @@ bool edp_set_replay_allow_active(struct dc_link *link, const bool *allow_active, // TODO: Handle mux change case if force_static is set // If force_static is set, just change the replay_allow_active state directly if (replay != NULL && link->replay_settings.replay_feature_enabled) - replay->funcs->replay_enable(replay, *allow_active, wait, panel_inst, link); + replay->funcs->replay_enable(replay, *allow_active, wait, panel_inst); link->replay_settings.replay_allow_active = *allow_active; } diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c587b3441e07..6a69a788abe8 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -4047,14 +4047,6 @@ struct dmub_cmd_replay_copy_settings_data { * DIG BE HW instance. */ uint8_t digbe_inst; - /** - * @hpo_stream_enc_inst: HPO stream encoder instance - */ - uint8_t hpo_stream_enc_inst; - /** - * @hpo_link_enc_inst: HPO link encoder instance - */ - uint8_t hpo_link_enc_inst; /** * AUX HW instance. */ @@ -4159,18 +4151,6 @@ struct dmub_rb_cmd_replay_enable_data { * This does not support HDMI/DP2 for now. */ uint8_t phy_rate; - /** - * @hpo_stream_enc_inst: HPO stream encoder instance - */ - uint8_t hpo_stream_enc_inst; - /** - * @hpo_link_enc_inst: HPO link encoder instance - */ - uint8_t hpo_link_enc_inst; - /** - * @pad: Align structure to 4 byte boundary. - */ - uint8_t pad[2]; }; /** From 79e25cd06e85105c75701ef1773c6c64bb304091 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 8 Aug 2025 13:12:07 -0400 Subject: [PATCH 0496/1307] drm/amdgpu/swm14: Update power limit logic Take into account the limits from the vbios. Ported from the SMU13 code. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4352 Reviewed-by: Jesse Zhang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit 203cc7f1dd86f2c8de5c3c6182f19adac7c9c206) Cc: stable@vger.kernel.org --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 3aea32baea3d..f32474af90b3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1697,9 +1697,11 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *min_power_limit) { struct smu_table_context *table_context = &smu->smu_table; + struct smu_14_0_2_powerplay_table *powerplay_table = + table_context->power_play_table; PPTable_t *pptable = table_context->driver_pptable; CustomSkuTable_t *skutable = &pptable->CustomSkuTable; - uint32_t power_limit; + uint32_t power_limit, od_percent_upper = 0, od_percent_lower = 0; uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v14_0_get_current_power_limit(smu, &power_limit)) @@ -1712,11 +1714,29 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (max_power_limit) - *max_power_limit = msg_limit; + if (powerplay_table) { + if (smu->od_enabled && + smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = pptable->SkuTable.OverDriveLimitsBasicMax.Ppt; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } else if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = 0; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } + } - if (min_power_limit) - *min_power_limit = 0; + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + + if (max_power_limit) { + *max_power_limit = msg_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } + + if (min_power_limit) { + *min_power_limit = power_limit * (100 + od_percent_lower); + *min_power_limit /= 100; + } return 0; } From 07b93a5704b0b72002f0c4bd1076214af67dc661 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 24 Jul 2025 15:00:43 -0500 Subject: [PATCH 0497/1307] drm/amd/display: Avoid a NULL pointer dereference [WHY] Although unlikely drm_atomic_get_new_connector_state() or drm_atomic_get_old_connector_state() can return NULL. [HOW] Check returns before dereference. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Hung Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1e5e8d672fec9f2ab352be121be971877bff2af9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index cd0e2976e268..a0ca3b2c6bd8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7792,6 +7792,9 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn, struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(conn); int ret; + if (WARN_ON(unlikely(!old_con_state || !new_con_state))) + return -EINVAL; + trace_amdgpu_dm_connector_atomic_check(new_con_state); if (conn->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { From 66af73a1c319336694a8610fe4c2943f7b33066c Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 18 Jul 2025 18:25:08 +0800 Subject: [PATCH 0498/1307] drm/amd/display: Fix Xorg desktop unresponsive on Replay panel [WHY & HOW] IPS & self-fresh feature can cause vblank counter resets between vblank disable and enable. It may cause system stuck due to wait the vblank counter. Call the drm_crtc_vblank_restore() during vblank enable to estimate missed vblanks by using timestamps and update the vblank counter in DRM. It can make the vblank counter increase smoothly and resolve this issue. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Sun peng (Leo) Li Signed-off-by: Tom Chung Signed-off-by: Alex Hung Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 34d66bc7ff10e146a4cec76cf286979740a10954) Cc: stable@vger.kernel.org --- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 010172f930ae..45feb404b097 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -299,6 +299,25 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); if (enable) { + struct dc *dc = adev->dm.dc; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + struct psr_settings *psr = &acrtc_state->stream->link->psr_settings; + struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; + bool sr_supported = (psr->psr_version != DC_PSR_VERSION_UNSUPPORTED) || + pr->config.replay_supported; + + /* + * IPS & self-refresh feature can cause vblank counter resets between + * vblank disable and enable. + * It may cause system stuck due to waiting for the vblank counter. + * Call this function to estimate missed vblanks by using timestamps and + * update the vblank counter in DRM. + */ + if (dc->caps.ips_support && + dc->config.disable_ips != DMUB_IPS_DISABLE_ALL && + sr_supported && vblank->config.disable_immediate) + drm_crtc_vblank_restore(crtc); + /* vblank irq on -> Only need vupdate irq in vrr mode */ if (amdgpu_dm_crtc_vrr_active(acrtc_state)) rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true); From 7a2ca2ea64b1b63c8baa94a8f5deb70b2248d119 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Wed, 23 Jul 2025 21:36:41 -0500 Subject: [PATCH 0499/1307] drm/amd/display: Add null pointer check in mod_hdcp_hdcp1_create_session() The function mod_hdcp_hdcp1_create_session() calls the function get_first_active_display(), but does not check its return value. The return value is a null pointer if the display list is empty. This will lead to a null pointer dereference. Add a null pointer check for get_first_active_display() and return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND if the function return null. This is similar to the commit c3e9826a2202 ("drm/amd/display: Add null pointer check for get_first_active_display()"). Fixes: 2deade5ede56 ("drm/amd/display: Remove hdcp display state with mst fix") Signed-off-by: Chenyuan Yang Reviewed-by: Alex Hung Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 5e43eb3cd731649c4f8b9134f857be62a416c893) --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index e58e7b93810b..6b7db8ec9a53 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -260,6 +260,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp) return MOD_HDCP_STATUS_FAILURE; } + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; mutex_lock(&psp->hdcp_context.mutex); From cb7b7ae53b557d168b4af5cd8549f3eff920bfb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:46 +0200 Subject: [PATCH 0500/1307] drm/amd/display: Don't overclock DCE 6 by 15% MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The extra 15% clock was added as a workaround for a Polaris issue which uses DCE 11, and should not have been used on DCE 6 which is already hardcoded to the highest possible display clock. Unfortunately, the extra 15% was mistakenly copied and kept even on code paths which don't affect Polaris. This commit fixes that and also adds a check to make sure not to exceed the maximum DCE 6 display clock. Fixes: 8cd61c313d8b ("drm/amd/display: Raise dispclk value for Polaris") Fixes: dc88b4a684d2 ("drm/amd/display: make clk mgr soc specific") Fixes: 3ecb3b794e2c ("drm/amd/display: dc/clk_mgr: add support for SI parts (v2)") Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 427980c1cbd22bb256b9385f5ce73c0937562408) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index 0267644717b2..cfd7309f2c6a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -123,11 +123,9 @@ static void dce60_update_clocks(struct clk_mgr *clk_mgr_base, { struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; - - /*TODO: W/A for dal3 linux, investigate why this works */ - if (!clk_mgr_dce->dfs_bypass_active) - patched_disp_clk = patched_disp_clk * 115 / 100; + const int max_disp_clk = + clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz); level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context); /* get max clock state from PPLIB */ From 1fc931be2f47fde23ca5aff6f19421375c312fb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:47 +0200 Subject: [PATCH 0501/1307] drm/amd/display: Adjust DCE 8-10 clock, don't overclock by 15% MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the nominal (and performance) clocks for DCE 8-10, and set them to 625 MHz, which is the value used by the legacy display code in amdgpu_atombios_get_clock_info. This was tested with Hawaii, Tonga and Fiji. These GPUs can output 4K 60Hz (10-bit depth) at 625 MHz. The extra 15% clock was added as a workaround for a Polaris issue which uses DCE 11, and should not have been used on DCE 8-10 which are already hardcoded to the highest possible display clock. Unfortunately, the extra 15% was mistakenly copied and kept even on code paths which don't affect Polaris. This commit fixes that and also adds a check to make sure not to exceed the maximum DCE 8-10 display clock. Fixes: 8cd61c313d8b ("drm/amd/display: Raise dispclk value for Polaris") Fixes: dc88b4a684d2 ("drm/amd/display: make clk mgr soc specific") Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 1ae45b5d4f371af8ae51a3827d0ec9fe27eeb867) --- .../drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index f5ad0a177038..a324dbd4543c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -72,9 +72,9 @@ static const struct state_dependent_clocks dce80_max_clks_by_state[] = { /* ClocksStateLow */ { .display_clk_khz = 352000, .pixel_clk_khz = 330000}, /* ClocksStateNominal */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 }, +{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 }, /* ClocksStatePerformance */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } }; +{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 } }; int dentist_get_divider_from_did(int did) { @@ -405,11 +405,9 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr_base, { struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; - - /*TODO: W/A for dal3 linux, investigate why this works */ - if (!clk_mgr_dce->dfs_bypass_active) - patched_disp_clk = patched_disp_clk * 115 / 100; + const int max_disp_clk = + clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz); level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context); /* get max clock state from PPLIB */ From 669f73a26f6112eedbadac53a2f2707ac6d0b9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:48 +0200 Subject: [PATCH 0502/1307] drm/amd/display: Find first CRTC and its line time in dce110_fill_display_configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dce110_fill_display_configs is shared between DCE 6-11, and finding the first CRTC and its line time is relevant to DCE 6 too. Move the code to find it from DCE 11 specific code. Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 4ab09785f8d5d03df052827af073d5c508ff5f63) Cc: stable@vger.kernel.org --- .../dc/clk_mgr/dce110/dce110_clk_mgr.c | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index f8409453434c..baeac8f1c04f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -120,9 +120,12 @@ void dce110_fill_display_configs( const struct dc_state *context, struct dm_pp_display_configuration *pp_display_cfg) { + struct dc *dc = context->clk_mgr->ctx->dc; int j; int num_cfgs = 0; + pp_display_cfg->crtc_index = dc->res_pool->res_cap->num_timing_generator; + for (j = 0; j < context->stream_count; j++) { int k; @@ -164,6 +167,23 @@ void dce110_fill_display_configs( cfg->v_refresh /= stream->timing.h_total; cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2) / stream->timing.v_total; + + /* Find first CRTC index and calculate its line time. + * This is necessary for DPM on SI GPUs. + */ + if (cfg->pipe_idx < pp_display_cfg->crtc_index) { + const struct dc_crtc_timing *timing = + &context->streams[0]->timing; + + pp_display_cfg->crtc_index = cfg->pipe_idx; + pp_display_cfg->line_time_in_us = + timing->h_total * 10000 / timing->pix_clk_100hz; + } + } + + if (!num_cfgs) { + pp_display_cfg->crtc_index = 0; + pp_display_cfg->line_time_in_us = 0; } pp_display_cfg->display_count = num_cfgs; @@ -232,16 +252,6 @@ void dce11_pplib_apply_display_requirements( dce110_fill_display_configs(context, pp_display_cfg); - /* TODO: is this still applicable?*/ - if (pp_display_cfg->display_count == 1) { - const struct dc_crtc_timing *timing = - &context->streams[0]->timing; - - pp_display_cfg->crtc_index = - pp_display_cfg->disp_configs[0].pipe_idx; - pp_display_cfg->line_time_in_us = timing->h_total * 10000 / timing->pix_clk_100hz; - } - if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); } From 7d07140d37f792f01cfdb8ca9a6a792ab1d29126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:49 +0200 Subject: [PATCH 0503/1307] drm/amd/display: Fill display clock and vblank time in dce110_fill_display_configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also needed by DCE 6. This way the code that gathers this info can be shared between different DCE versions and doesn't have to be repeated. Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 8107432dff37db26fcb641b6cebeae8981cd73a0) Cc: stable@vger.kernel.org --- .../drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c | 2 -- .../drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c | 10 +++------- .../drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c | 2 -- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index a324dbd4543c..dbd6ef1b60a0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -391,8 +391,6 @@ static void dce_pplib_apply_display_requirements( { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index baeac8f1c04f..13cf415e38e5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -124,6 +124,9 @@ void dce110_fill_display_configs( int j; int num_cfgs = 0; + pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); + pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz; + pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; pp_display_cfg->crtc_index = dc->res_pool->res_cap->num_timing_generator; for (j = 0; j < context->stream_count; j++) { @@ -243,13 +246,6 @@ void dce11_pplib_apply_display_requirements( pp_display_cfg->min_engine_clock_deep_sleep_khz = context->bw_ctx.bw.dce.sclk_deep_sleep_khz; - pp_display_cfg->avail_mclk_switch_time_us = - dce110_get_min_vblank_time_us(context); - /* TODO: dce11.2*/ - pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; - - pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz; - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index cfd7309f2c6a..7044b437fe9d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -109,8 +109,6 @@ static void dce60_pplib_apply_display_requirements( { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) From 8246147f1fbaed522b8bcc02ca34e4260747dcfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:50 +0200 Subject: [PATCH 0504/1307] drm/amd/display: Don't warn when missing DCE encoder caps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some GPUs the VBIOS just doesn't have encoder caps, or maybe not for every encoder. This isn't really a problem and it's handled well, so let's not litter the logs with it. Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 33e0227ee96e62d034781e91f215e32fd0b1d512) --- drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 4a9d07c31bc5..0c50fe266c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -896,13 +896,13 @@ void dce110_link_encoder_construct( enc110->base.id, &bp_cap_info); /* Override features with DCE-specific values */ - if (BP_RESULT_OK == result) { + if (result == BP_RESULT_OK) { enc110->base.features.flags.bits.IS_HBR2_CAPABLE = bp_cap_info.DP_HBR2_EN; enc110->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; - } else { + } else if (result != BP_RESULT_NORECORD) { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, result); @@ -1798,13 +1798,13 @@ void dce60_link_encoder_construct( enc110->base.id, &bp_cap_info); /* Override features with DCE-specific values */ - if (BP_RESULT_OK == result) { + if (result == BP_RESULT_OK) { enc110->base.features.flags.bits.IS_HBR2_CAPABLE = bp_cap_info.DP_HBR2_EN; enc110->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; - } else { + } else if (result != BP_RESULT_NORECORD) { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, result); From f14ee2e7a86c5e57295b48b8e198cae7189b3b93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:51 +0200 Subject: [PATCH 0505/1307] drm/amd/display: Don't print errors for nonexistent connectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When getting the number of connectors, the VBIOS reports the number of valid indices, but it doesn't say which indices are valid, and not every valid index has an actual connector. If we don't find a connector on an index, that is not an error. Considering these are not actual errors, don't litter the logs. Fixes: 60df5628144b ("drm/amd/display: handle invalid connector indices") Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 249d4bc5f1935f04bb45b3b63c0f8922565124f7) --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 5 +---- drivers/gpu/drm/amd/display/dc/core/dc.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 67f08495b7e6..154fd2c18e88 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -174,11 +174,8 @@ static struct graphics_object_id bios_parser_get_connector_id( return object_id; } - if (tbl->ucNumberOfObjects <= i) { - dm_error("Can't find connector id %d in connector table of size %d.\n", - i, tbl->ucNumberOfObjects); + if (tbl->ucNumberOfObjects <= i) return object_id; - } id = le16_to_cpu(tbl->asObjects[i].usObjectID); object_id = object_id_from_bios_object_id(id); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9ab0ee20ca6f..dcc48b5238e5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -217,11 +217,24 @@ static bool create_links( connectors_num, num_virtual_links); - // condition loop on link_count to allow skipping invalid indices + /* When getting the number of connectors, the VBIOS reports the number of valid indices, + * but it doesn't say which indices are valid, and not every index has an actual connector. + * So, if we don't find a connector on an index, that is not an error. + * + * - There is no guarantee that the first N indices will be valid + * - VBIOS may report a higher amount of valid indices than there are actual connectors + * - Some VBIOS have valid configurations for more connectors than there actually are + * on the card. This may be because the manufacturer used the same VBIOS for different + * variants of the same card. + */ for (i = 0; dc->link_count < connectors_num && i < MAX_LINKS; i++) { + struct graphics_object_id connector_id = bios->funcs->get_connector_id(bios, i); struct link_init_data link_init_params = {0}; struct dc_link *link; + if (connector_id.id == CONNECTOR_ID_UNKNOWN) + continue; + DC_LOG_DC("BIOS object table - printing link object info for connector number: %d, link_index: %d", i, dc->link_count); link_init_params.ctx = dc->ctx; From 10507478468f165ea681605d133991ed05cdff62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 31 Jul 2025 11:43:52 +0200 Subject: [PATCH 0506/1307] drm/amd/display: Fix fractional fb divider in set_pixel_clock_v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For later VBIOS versions, the fractional feedback divider is calculated as the remainder of dividing the feedback divider by a factor, which is set to 1000000. For reference, see: - calculate_fb_and_fractional_fb_divider - calc_pll_max_vco_construct However, in case of old VBIOS versions that have set_pixel_clock_v3, they only have 1 byte available for the fractional feedback divider, and it's expected to be set to the remainder from dividing the feedback divider by 10. For reference see the legacy display code: - amdgpu_pll_compute - amdgpu_atombios_crtc_program_pll This commit fixes set_pixel_clock_v3 by dividing the fractional feedback divider passed to the function by 100000. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Signed-off-by: Timur Kristóf Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit 027e7acc7e17802ebf28e1edb88a404836ad50d6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/bios/command_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 2bcae0643e61..58e88778da7f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -993,7 +993,7 @@ static enum bp_result set_pixel_clock_v3( allocation.sPCLKInput.usFbDiv = cpu_to_le16((uint16_t)bp_params->feedback_divider); allocation.sPCLKInput.ucFracFbDiv = - (uint8_t)bp_params->fractional_feedback_divider; + (uint8_t)(bp_params->fractional_feedback_divider / 100000); allocation.sPCLKInput.ucPostDiv = (uint8_t)bp_params->pixel_clock_post_divider; From 297a4833a68aac3316eb808b4123eb016ef242d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sat, 2 Aug 2025 17:51:53 +0200 Subject: [PATCH 0507/1307] drm/amd/display: Fix DP audio DTO1 clock source on DCE 6. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On DCE 6, DP audio was not working. However, it worked when an HDMI monitor was also plugged in. Looking at dce_aud_wall_dto_setup it seems that the main difference is that we use DTO1 when only DP is plugged in. When programming DTO1, it uses audio_dto_source_clock_in_khz which is set from get_dp_ref_freq_khz The dce60_get_dp_ref_freq_khz implementation looks incorrect, because DENTIST_DISPCLK_CNTL seems to be always zero on DCE 6, so it isn't usable. I compared dce60_get_dp_ref_freq_khz to the legacy display code, specifically dce_v6_0_audio_set_dto, and it turns out that in case of DCE 6, it needs to use the display clock. With that, DP audio started working on Pitcairn, Oland and Cape Verde. However, it still didn't work on Tahiti. Despite having the same DCE version, Tahiti seems to have a different audio device. After some trial and error I realized that it works with the default display clock as reported by the VBIOS, not the current display clock. The patch was tested on all four SI GPUs: * Pitcairn (DCE 6.0) * Oland (DCE 6.4) * Cape Verde (DCE 6.0) * Tahiti (DCE 6.0 but different) The testing was done on Samsung Odyssey G7 LS28BG700EPXEN on each of the above GPUs, at the following settings: * 4K 60 Hz * 1080p 60 Hz * 1080p 144 Hz Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 645cc7863da5de700547d236697dffd6760cf051) Cc: stable@vger.kernel.org --- .../display/dc/clk_mgr/dce60/dce60_clk_mgr.c | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index 7044b437fe9d..a39641a0ff09 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -83,22 +83,13 @@ static const struct state_dependent_clocks dce60_max_clks_by_state[] = { static int dce60_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - int dprefclk_wdivider; - int dp_ref_clk_khz; - int target_div; + struct dc_context *ctx = clk_mgr_base->ctx; + int dp_ref_clk_khz = 0; - /* DCE6 has no DPREFCLK_CNTL to read DP Reference Clock source */ - - /* Read the mmDENTIST_DISPCLK_CNTL to get the currently - * programmed DID DENTIST_DPREFCLK_WDIVIDER*/ - REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider); - - /* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/ - target_div = dentist_get_divider_from_did(dprefclk_wdivider); - - /* Calculate the current DFS clock, in kHz.*/ - dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; + if (ASIC_REV_IS_TAHITI_P(ctx->asic_id.hw_internal_rev)) + dp_ref_clk_khz = ctx->dc_bios->fw_info.default_display_engine_pll_frequency; + else + dp_ref_clk_khz = clk_mgr_base->clks.dispclk_khz; return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz); } From 864e3396976ef41de6cc7bc366276bf4e084fff2 Mon Sep 17 00:00:00 2001 From: Jakub Ramaseuski Date: Thu, 14 Aug 2025 12:51:19 +0200 Subject: [PATCH 0508/1307] net: gso: Forbid IPv6 TSO with extensions on devices with only IPV6_CSUM When performing Generic Segmentation Offload (GSO) on an IPv6 packet that contains extension headers, the kernel incorrectly requests checksum offload if the egress device only advertises NETIF_F_IPV6_CSUM feature, which has a strict contract: it supports checksum offload only for plain TCP or UDP over IPv6 and explicitly does not support packets with extension headers. The current GSO logic violates this contract by failing to disable the feature for packets with extension headers, such as those used in GREoIPv6 tunnels. This violation results in the device being asked to perform an operation it cannot support, leading to a `skb_warn_bad_offload` warning and a collapse of network throughput. While device TSO/USO is correctly bypassed in favor of software GSO for these packets, the GSO stack must be explicitly told not to request checksum offload. Mask NETIF_F_IPV6_CSUM, NETIF_F_TSO6 and NETIF_F_GSO_UDP_L4 in gso_features_check if the IPv6 header contains extension headers to compute checksum in software. The exception is a BIG TCP extension, which, as stated in commit 68e068cabd2c6c53 ("net: reenable NETIF_F_IPV6_CSUM offload for BIG TCP packets"): "The feature is only enabled on devices that support BIG TCP TSO. The header is only present for PF_PACKET taps like tcpdump, and not transmitted by physical devices." kernel log output (truncated): WARNING: CPU: 1 PID: 5273 at net/core/dev.c:3535 skb_warn_bad_offload+0x81/0x140 ... Call Trace: skb_checksum_help+0x12a/0x1f0 validate_xmit_skb+0x1a3/0x2d0 validate_xmit_skb_list+0x4f/0x80 sch_direct_xmit+0x1a2/0x380 __dev_xmit_skb+0x242/0x670 __dev_queue_xmit+0x3fc/0x7f0 ip6_finish_output2+0x25e/0x5d0 ip6_finish_output+0x1fc/0x3f0 ip6_tnl_xmit+0x608/0xc00 [ip6_tunnel] ip6gre_tunnel_xmit+0x1c0/0x390 [ip6_gre] dev_hard_start_xmit+0x63/0x1c0 __dev_queue_xmit+0x6d0/0x7f0 ip6_finish_output2+0x214/0x5d0 ip6_finish_output+0x1fc/0x3f0 ip6_xmit+0x2ca/0x6f0 ip6_finish_output+0x1fc/0x3f0 ip6_xmit+0x2ca/0x6f0 inet6_csk_xmit+0xeb/0x150 __tcp_transmit_skb+0x555/0xa80 tcp_write_xmit+0x32a/0xe90 tcp_sendmsg_locked+0x437/0x1110 tcp_sendmsg+0x2f/0x50 ... skb linear: 00000000: e4 3d 1a 7d ec 30 e4 3d 1a 7e 5d 90 86 dd 60 0e skb linear: 00000010: 00 0a 1b 34 3c 40 20 11 00 00 00 00 00 00 00 00 skb linear: 00000020: 00 00 00 00 00 12 20 11 00 00 00 00 00 00 00 00 skb linear: 00000030: 00 00 00 00 00 11 2f 00 04 01 04 01 01 00 00 00 skb linear: 00000040: 86 dd 60 0e 00 0a 1b 00 06 40 20 23 00 00 00 00 skb linear: 00000050: 00 00 00 00 00 00 00 00 00 12 20 23 00 00 00 00 skb linear: 00000060: 00 00 00 00 00 00 00 00 00 11 bf 96 14 51 13 f9 skb linear: 00000070: ae 27 a0 a8 2b e3 80 18 00 40 5b 6f 00 00 01 01 skb linear: 00000080: 08 0a 42 d4 50 d5 4b 70 f8 1a Fixes: 04c20a9356f283da ("net: skip offload for NETIF_F_IPV6_CSUM if ipv6 header contains extension") Reported-by: Tianhao Zhao Suggested-by: Michal Schmidt Suggested-by: Willem de Bruijn Signed-off-by: Jakub Ramaseuski Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250814105119.1525687-1-jramaseu@redhat.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 5a3c0f40a93f..93a25d87b86b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3779,6 +3779,18 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, features &= ~NETIF_F_TSO_MANGLEID; } + /* NETIF_F_IPV6_CSUM does not support IPv6 extension headers, + * so neither does TSO that depends on it. + */ + if (features & NETIF_F_IPV6_CSUM && + (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) != sizeof(struct ipv6hdr) && + !ipv6_has_hopopt_jumbo(skb)) + features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); + return features; } From 84967deee9d9870b15bc4c3acb50f1d401807902 Mon Sep 17 00:00:00 2001 From: Minhong He Date: Fri, 15 Aug 2025 14:38:45 +0800 Subject: [PATCH 0509/1307] ipv6: sr: validate HMAC algorithm ID in seg6_hmac_info_add The seg6_genl_sethmac() directly uses the algorithm ID provided by the userspace without verifying whether it is an HMAC algorithm supported by the system. If an unsupported HMAC algorithm ID is configured, packets using SRv6 HMAC will be dropped during encapsulation or decapsulation. Fixes: 4f4853dc1c9c ("ipv6: sr: implement API to control SR HMAC structure") Signed-off-by: Minhong He Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250815063845.85426-1-heminhong@kylinos.cn Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_hmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index f78ecb6ad838..d77b52523b6a 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -304,6 +304,9 @@ int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) struct seg6_pernet_data *sdata = seg6_pernet(net); int err; + if (!__hmac_get_algo(hinfo->alg_id)) + return -EINVAL; + err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, rht_params); From ccab044697980c6c01ab51f43f48f13b8a3e5c33 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Fri, 15 Aug 2025 19:28:19 +0200 Subject: [PATCH 0510/1307] mptcp: drop skb if MPTCP skb extension allocation fails When skb_ext_add(skb, SKB_EXT_MPTCP) fails in mptcp_incoming_options(), we used to return true, letting the segment proceed through the TCP receive path without a DSS mapping. Such segments can leave inconsistent mapping state and trigger a mid-stream fallback to TCP, which in testing collapsed (by artificially forcing failures in skb_ext_add) throughput to zero. Return false instead so the TCP input path drops the skb (see tcp_data_queue() and step-7 processing). This is the safer choice under memory pressure: it preserves MPTCP correctness and provides backpressure to the sender. Control packets remain unaffected: ACK updates and DATA_FIN handling happen before attempting the extension allocation, and tcp_reset() continues to ignore the return value. With this change, MPTCP continues to work at high throughput if we artificially inject failures into skb_ext_add. Fixes: 6787b7e350d3 ("mptcp: avoid processing packet if a subflow reset") Cc: stable@vger.kernel.org Signed-off-by: Christoph Paasch Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-1-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 70c0ab0ecf90..2a8ea28442b2 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1118,7 +1118,9 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, return hmac == mp_opt->ahmac; } -/* Return false if a subflow has been reset, else return true */ +/* Return false in case of error (or subflow has been reset), + * else return true. + */ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -1222,7 +1224,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) - return true; + return false; memset(mpext, 0, sizeof(*mpext)); From 68fc0f4b0d25692940cdc85c68e366cae63e1757 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:20 +0200 Subject: [PATCH 0511/1307] mptcp: pm: kernel: flush: do not reset ADD_ADDR limit A flush of the MPTCP endpoints should not affect the MPTCP limits. In other words, 'ip mptcp endpoint flush' should not change 'ip mptcp limits'. But it was the case: the MPTCP_PM_ATTR_RCV_ADD_ADDRS (add_addr_accepted) limit was reset by accident. Removing the reset of this counter during a flush fixes this issue. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reported-by: Thomas Dreibholz Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/579 Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-2-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_kernel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index d39e7c178460..667803d72b64 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -1085,7 +1085,6 @@ static void __flush_addrs(struct list_head *list) static void __reset_counters(struct pm_nl_pernet *pernet) { WRITE_ONCE(pernet->add_addr_signal_max, 0); - WRITE_ONCE(pernet->add_addr_accept_max, 0); WRITE_ONCE(pernet->local_addr_max, 0); pernet->addrs = 0; } From 452690be7de2f91cc0de68cb9e95252875b33503 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:21 +0200 Subject: [PATCH 0512/1307] selftests: mptcp: pm: check flush doesn't reset limits This modification is linked to the parent commit where the received ADD_ADDR limit was accidentally reset when the endpoints were flushed. To validate that, the test is now flushing endpoints after having set new limits, and before checking them. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-3-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2e6648a2b2c0..ac7ec6f94023 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -198,6 +198,7 @@ set_limits 1 9 2>/dev/null check "get_limits" "${default_limits}" "subflows above hard limit" set_limits 8 8 +flush_endpoint ## to make sure it doesn't affect the limits check "get_limits" "$(format_limits 8 8)" "set limits" flush_endpoint From 5d13349472ac8abcbcb94407969aa0fdc2e1f1be Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Aug 2025 19:28:22 +0200 Subject: [PATCH 0513/1307] mptcp: remove duplicate sk_reset_timer call sk_reset_timer() was called twice in mptcp_pm_alloc_anno_list. Simplify the code by using a 'goto' statement to eliminate the duplication. Note that this is not a fix, but it will help backporting the following patch. The same "Fixes" tag has been added for this reason. Fixes: 93f323b9cccc ("mptcp: add a new sysctl add_addr_timeout") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-4-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 420d416e2603..c5f6a53ce5f1 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -353,9 +353,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) return false; - sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); - return true; + goto reset_timer; } add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); @@ -369,6 +367,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, add_entry->retrans_times = 0; timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); +reset_timer: sk_reset_timer(sk, &add_entry->add_timer, jiffies + mptcp_get_add_addr_timeout(net)); From f5ce0714623cffd00bf2a83e890d09c609b7f50a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Aug 2025 19:28:23 +0200 Subject: [PATCH 0514/1307] mptcp: disable add_addr retransmission when timeout is 0 When add_addr_timeout was set to 0, this caused the ADD_ADDR to be retransmitted immediately, which looks like a buggy behaviour. Instead, interpret 0 as "no retransmissions needed". The documentation is updated to explicitly state that setting the timeout to 0 disables retransmission. Fixes: 93f323b9cccc ("mptcp: add a new sysctl add_addr_timeout") Cc: stable@vger.kernel.org Suggested-by: Matthieu Baerts Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-5-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/mptcp-sysctl.rst | 2 ++ net/mptcp/pm.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 5bfab01eff5a..1683c139821e 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -12,6 +12,8 @@ add_addr_timeout - INTEGER (seconds) resent to an MPTCP peer that has not acknowledged a previous ADD_ADDR message. + Do not retransmit if set to 0. + The default value matches TCP_RTO_MAX. This is a per-namespace sysctl. diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index c5f6a53ce5f1..136a380602ca 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -274,6 +274,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; + unsigned int timeout; pr_debug("msk=%p\n", msk); @@ -291,6 +292,10 @@ static void mptcp_pm_add_timer(struct timer_list *timer) goto out; } + timeout = mptcp_get_add_addr_timeout(sock_net(sk)); + if (!timeout) + goto out; + spin_lock_bh(&msk->pm.lock); if (!mptcp_pm_should_add_signal_addr(msk)) { @@ -302,7 +307,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) sk_reset_timer(sk, timer, - jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); + jiffies + timeout); spin_unlock_bh(&msk->pm.lock); @@ -344,6 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); + unsigned int timeout; lockdep_assert_held(&msk->pm.lock); @@ -368,8 +374,9 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); reset_timer: - sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); + timeout = mptcp_get_add_addr_timeout(net); + if (timeout) + sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); return true; } From f92199f551e617fae028c5c5905ddd63e3616e18 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Aug 2025 19:28:24 +0200 Subject: [PATCH 0515/1307] selftests: mptcp: disable add_addr retrans in endpoint_tests To prevent test instability in the "delete re-add signal" test caused by ADD_ADDR retransmissions, disable retransmissions for this test by setting net.mptcp.add_addr_timeout to 0. Suggested-by: Matthieu Baerts Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-6-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8af65373b3a..82cae37d9c20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3842,6 +3842,7 @@ endpoint_tests() # remove and re-add if reset_with_events "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal From 2eefbed30d46d5e68593baf6b52923e00e7678af Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:25 +0200 Subject: [PATCH 0516/1307] selftests: mptcp: connect: fix C23 extension warning GCC was complaining about the new label: mptcp_connect.c:187:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 187 | int err = getaddrinfo(node, service, hints, res); | ^ Simply declare 'err' before the label to avoid this warning. Fixes: a862771d1aa4 ("selftests: mptcp: use IPPROTO_MPTCP for getaddrinfo") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-7-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index ac1349c4b9e5..4f07ac9fa207 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -183,9 +183,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; From 3259889fd3c0cc165b7e9ee375c789875dd32326 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:26 +0200 Subject: [PATCH 0517/1307] selftests: mptcp: sockopt: fix C23 extension warning GCC was complaining about the new label: mptcp_inq.c:79:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 79 | int err = getaddrinfo(node, service, hints, res); | ^ mptcp_sockopt.c:166:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 166 | int err = getaddrinfo(node, service, hints, res); | ^ Simply declare 'err' before the label to avoid this warning. Fixes: dd367e81b79a ("selftests: mptcp: sockopt: use IPPROTO_MPTCP for getaddrinfo") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-8-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++-- tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 3cf1e2a612ce..f3bcaa48df8f 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 9934a68df237..e934dd26a59d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; From 89eb9a62aed77b409663ba1eac152e8f758815b7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 15 Aug 2025 22:18:09 +0200 Subject: [PATCH 0518/1307] net: dsa: b53: fix reserved register access in b53_fdb_dump() When BCM5325 support was added in c45655386e53 ("net: dsa: b53: add support for FDB operations on 5325/5365"), the register used for ARL access was made conditional on the chip. But in b53_fdb_dump(), instead of the register argument the page argument was replaced, causing it to write to a reserved page 0x50 on !BCM5325*. Writing to this page seems to completely lock the switch up: [ 89.680000] b53-switch spi0.1 lan2: Link is Down [ 89.680000] WARNING: CPU: 1 PID: 26 at drivers/net/phy/phy.c:1350 _phy_state_machine+0x1bc/0x454 [ 89.720000] phy_check_link_status+0x0/0x114: returned: -5 [ 89.730000] Modules linked in: nft_fib_inet nf_flow_table_inet nft_reject_ipv6 nft_reject_ipv4 nft_reject_inet nft_reject nft_redir nft_quota nft_numgen nft_nat nft_masq nft_log nft_limit nft_hash nft_flow_offload nft_fib_ipv6 nft_fib_ipv4 nft_fib nft_ct nft_chain_nat nf_tables nf_nat nf_flow_table nf_conntrack nfnetlink nf_reject_ipv6 nf_reject_ipv4 nf_log_syslog nf_defrag_ipv6 nf_defrag_ipv4 cls_flower sch_tbf sch_ingress sch_htb sch_hfsc em_u32 cls_u32 cls_route cls_matchall cls_fw cls_flow cls_basic act_skbedit act_mirred act_gact vrf md5 crc32c_cryptoapi [ 89.780000] CPU: 1 UID: 0 PID: 26 Comm: kworker/u10:0 Tainted: G W 6.16.0-rc1+ #0 NONE [ 89.780000] Tainted: [W]=WARN [ 89.780000] Hardware name: Netgear DGND3700 v1 [ 89.780000] Workqueue: events_power_efficient phy_state_machine [ 89.780000] Stack : 809c762c 8006b050 00000001 820a9ce3 0000114c 000affff 805d22d0 8200ba00 [ 89.780000] 82005000 6576656e 74735f70 6f776572 5f656666 10008b00 820a9cb8 82088700 [ 89.780000] 00000000 00000000 809c762c 820a9a98 00000000 00000000 ffffefff 80a7a76c [ 89.780000] 80a70000 820a9af8 80a70000 80a70000 80a70000 00000000 809c762c 820a9dd4 [ 89.780000] 00000000 805d1494 80a029e4 80a70000 00000003 00000000 00000004 81a60004 [ 89.780000] ... [ 89.780000] Call Trace: [ 89.780000] [<800228b8>] show_stack+0x38/0x118 [ 89.780000] [<8001afc4>] dump_stack_lvl+0x6c/0xac [ 89.780000] [<80046b90>] __warn+0x9c/0x114 [ 89.780000] [<80046da8>] warn_slowpath_fmt+0x1a0/0x1b0 [ 89.780000] [<805d1494>] _phy_state_machine+0x1bc/0x454 [ 89.780000] [<805d22fc>] phy_state_machine+0x2c/0x70 [ 89.780000] [<80066b08>] process_one_work+0x1e8/0x3e0 [ 89.780000] [<80067a1c>] worker_thread+0x354/0x4e4 [ 89.780000] [<800706cc>] kthread+0x130/0x274 [ 89.780000] [<8001d808>] ret_from_kernel_thread+0x14/0x1c And any further accesses fail: [ 120.790000] b53-switch spi0.1: timeout waiting for ARL to finish: 0x81 [ 120.800000] b53-switch spi0.1: port 2 failed to add 2c:b0:5d:27:9a:bd vid 3 to fdb: -145 [ 121.010000] b53-switch spi0.1: timeout waiting for ARL to finish: 0xbf [ 121.020000] b53-switch spi0.1: port 3 failed to add 2c:b0:5d:27:9a:bd vid 3 to fdb: -145 Restore the correct page B53_ARLIO_PAGE again, and move the offset argument to the correct place. *On BCM5325, this became a write to the MIB page of Port 1. Still a reserved offset, but likely less brokenness from that write. Fixes: c45655386e53 ("net: dsa: b53: add support for FDB operations on 5325/5365") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250815201809.549195-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9942fb6f7f4b..829b1f087e9e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2078,7 +2078,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, /* Start search operation */ reg = ARL_SRCH_STDN; - b53_write8(priv, offset, B53_ARL_SRCH_CTL, reg); + b53_write8(priv, B53_ARLIO_PAGE, offset, reg); do { ret = b53_arl_search_wait(priv); From 4611d88a37cfc18cbabc6978aaf7325d1ae3f53a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 16 Aug 2025 11:38:50 -0700 Subject: [PATCH 0519/1307] bnxt_en: Fix lockdep warning during rmmod The commit under the Fixes tag added a netdev_assert_locked() in bnxt_free_ntp_fltrs(). The lock should be held during normal run-time but the assert will be triggered (see below) during bnxt_remove_one() which should not need the lock. The netdev is already unregistered by then. Fix it by calling netdev_assert_locked_or_invisible() which will not assert if the netdev is unregistered. WARNING: CPU: 5 PID: 2241 at ./include/net/netdev_lock.h:17 bnxt_free_ntp_fltrs+0xf8/0x100 [bnxt_en] Modules linked in: rpcrdma rdma_cm iw_cm ib_cm configfs ib_core bnxt_en(-) bridge stp llc x86_pkg_temp_thermal xfs tg3 [last unloaded: bnxt_re] CPU: 5 UID: 0 PID: 2241 Comm: rmmod Tainted: G S W 6.16.0 #2 PREEMPT(voluntary) Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 RIP: 0010:bnxt_free_ntp_fltrs+0xf8/0x100 [bnxt_en] Code: 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 48 8b 47 60 be ff ff ff ff 48 8d b8 28 0c 00 00 e8 d0 cf 41 c3 85 c0 0f 85 2e ff ff ff <0f> 0b e9 27 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffa92082387da0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9e5b593d8000 RCX: 0000000000000001 RDX: 0000000000000001 RSI: ffffffff83dc9a70 RDI: ffffffff83e1a1cf RBP: ffff9e5b593d8c80 R08: 0000000000000000 R09: ffffffff8373a2b3 R10: 000000008100009f R11: 0000000000000001 R12: 0000000000000001 R13: ffffffffc01c4478 R14: dead000000000122 R15: dead000000000100 FS: 00007f3a8a52c740(0000) GS:ffff9e631ad1c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055bb289419c8 CR3: 000000011274e001 CR4: 00000000003706f0 Call Trace: bnxt_remove_one+0x57/0x180 [bnxt_en] pci_device_remove+0x39/0xc0 device_release_driver_internal+0xa5/0x130 driver_detach+0x42/0x90 bus_remove_driver+0x61/0xc0 pci_unregister_driver+0x38/0x90 bnxt_exit+0xc/0x7d0 [bnxt_en] Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250816183850.4125033-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2800a90fba1f..207a8bb36ae5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5332,7 +5332,7 @@ static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool all) { int i; - netdev_assert_locked(bp->dev); + netdev_assert_locked_or_invisible(bp->dev); /* Under netdev instance lock and all our NAPIs have been disabled. * It's safe to delete the hash table. From eabcac808ca3ee9878223d4b49b750979029016b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Aug 2025 08:58:23 -0700 Subject: [PATCH 0520/1307] scsi: ufs: core: Fix IRQ lock inversion for the SCSI host lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 3c7ac40d7322 ("scsi: ufs: core: Delegate the interrupt service routine to a threaded IRQ handler") introduced an IRQ lock inversion issue. Fix this lock inversion by changing the spin_lock_irq() calls into spin_lock_irqsave() calls in code that can be called either from interrupt context or from thread context. This patch fixes the following lockdep complaint: WARNING: possible irq lock inversion dependency detected 6.12.30-android16-5-maybe-dirty-4k #1 Tainted: G W OE -------------------------------------------------------- kworker/u28:0/12 just changed the state of lock: ffffff881e29dd60 (&hba->clk_gating.lock){-...}-{2:2}, at: ufshcd_release_scsi_cmd+0x60/0x110 but this lock took another, HARDIRQ-unsafe lock in the past: (shost->host_lock){+.+.}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(shost->host_lock); local_irq_disable(); lock(&hba->clk_gating.lock); lock(shost->host_lock); lock(&hba->clk_gating.lock); *** DEADLOCK *** 4 locks held by kworker/u28:0/12: #0: ffffff8800ac6158 ((wq_completion)async){+.+.}-{0:0}, at: process_one_work+0x1bc/0x65c #1: ffffffc085c93d70 ((work_completion)(&entry->work)){+.+.}-{0:0}, at: process_one_work+0x1e4/0x65c #2: ffffff881e29c0e0 (&shost->scan_mutex){+.+.}-{3:3}, at: __scsi_add_device+0x74/0x120 #3: ffffff881960ea00 (&hwq->cq_lock){-...}-{2:2}, at: ufshcd_mcq_poll_cqe_lock+0x28/0x104 the shortest dependencies between 2nd lock and 1st lock: -> (shost->host_lock){+.+.}-{2:2} { HARDIRQ-ON-W at: lock_acquire+0x134/0x2b4 _raw_spin_lock+0x48/0x64 ufshcd_sl_intr+0x4c/0xa08 ufshcd_threaded_intr+0x70/0x12c irq_thread_fn+0x48/0xa8 irq_thread+0x130/0x1ec kthread+0x110/0x134 ret_from_fork+0x10/0x20 SOFTIRQ-ON-W at: lock_acquire+0x134/0x2b4 _raw_spin_lock+0x48/0x64 ufshcd_sl_intr+0x4c/0xa08 ufshcd_threaded_intr+0x70/0x12c irq_thread_fn+0x48/0xa8 irq_thread+0x130/0x1ec kthread+0x110/0x134 ret_from_fork+0x10/0x20 INITIAL USE at: lock_acquire+0x134/0x2b4 _raw_spin_lock+0x48/0x64 ufshcd_sl_intr+0x4c/0xa08 ufshcd_threaded_intr+0x70/0x12c irq_thread_fn+0x48/0xa8 irq_thread+0x130/0x1ec kthread+0x110/0x134 ret_from_fork+0x10/0x20 } ... key at: [] scsi_host_alloc.__key+0x0/0x10 ... acquired at: _raw_spin_lock_irqsave+0x5c/0x80 __ufshcd_release+0x78/0x118 ufshcd_send_uic_cmd+0xe4/0x118 ufshcd_dme_set_attr+0x88/0x1c8 ufs_google_phy_initialization+0x68/0x418 [ufs] ufs_google_link_startup_notify+0x78/0x27c [ufs] ufshcd_link_startup+0x84/0x720 ufshcd_init+0xf3c/0x1330 ufshcd_pltfrm_init+0x728/0x7d8 ufs_google_probe+0x30/0x84 [ufs] platform_probe+0xa0/0xe0 really_probe+0x114/0x454 __driver_probe_device+0xa4/0x160 driver_probe_device+0x44/0x23c __driver_attach_async_helper+0x60/0xd4 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 -> (&hba->clk_gating.lock){-...}-{2:2} { IN-HARDIRQ-W at: lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 ufshcd_release_scsi_cmd+0x60/0x110 ufshcd_compl_one_cqe+0x2c0/0x3f4 ufshcd_mcq_poll_cqe_lock+0xb0/0x104 ufs_google_mcq_intr+0x80/0xa0 [ufs] __handle_irq_event_percpu+0x104/0x32c handle_irq_event+0x40/0x9c handle_fasteoi_irq+0x170/0x2e8 generic_handle_domain_irq+0x58/0x80 gic_handle_irq+0x48/0x104 call_on_irq_stack+0x3c/0x50 do_interrupt_handler+0x7c/0xd8 el1_interrupt+0x34/0x58 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x68/0x6c _raw_spin_unlock_irqrestore+0x3c/0x6c debug_object_assert_init+0x16c/0x21c __mod_timer+0x4c/0x48c schedule_timeout+0xd4/0x16c io_schedule_timeout+0x48/0x70 do_wait_for_common+0x100/0x194 wait_for_completion_io_timeout+0x48/0x6c blk_execute_rq+0x124/0x17c scsi_execute_cmd+0x18c/0x3f8 scsi_probe_and_add_lun+0x204/0xd74 __scsi_add_device+0xbc/0x120 ufshcd_async_scan+0x80/0x3c0 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 INITIAL USE at: lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 ufshcd_hold+0x34/0x14c ufshcd_send_uic_cmd+0x28/0x118 ufshcd_dme_set_attr+0x88/0x1c8 ufs_google_phy_initialization+0x68/0x418 [ufs] ufs_google_link_startup_notify+0x78/0x27c [ufs] ufshcd_link_startup+0x84/0x720 ufshcd_init+0xf3c/0x1330 ufshcd_pltfrm_init+0x728/0x7d8 ufs_google_probe+0x30/0x84 [ufs] platform_probe+0xa0/0xe0 really_probe+0x114/0x454 __driver_probe_device+0xa4/0x160 driver_probe_device+0x44/0x23c __driver_attach_async_helper+0x60/0xd4 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 } ... key at: [] ufshcd_init.__key+0x0/0x10 ... acquired at: mark_lock+0x1c4/0x224 __lock_acquire+0x438/0x2e1c lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 ufshcd_release_scsi_cmd+0x60/0x110 ufshcd_compl_one_cqe+0x2c0/0x3f4 ufshcd_mcq_poll_cqe_lock+0xb0/0x104 ufs_google_mcq_intr+0x80/0xa0 [ufs] __handle_irq_event_percpu+0x104/0x32c handle_irq_event+0x40/0x9c handle_fasteoi_irq+0x170/0x2e8 generic_handle_domain_irq+0x58/0x80 gic_handle_irq+0x48/0x104 call_on_irq_stack+0x3c/0x50 do_interrupt_handler+0x7c/0xd8 el1_interrupt+0x34/0x58 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x68/0x6c _raw_spin_unlock_irqrestore+0x3c/0x6c debug_object_assert_init+0x16c/0x21c __mod_timer+0x4c/0x48c schedule_timeout+0xd4/0x16c io_schedule_timeout+0x48/0x70 do_wait_for_common+0x100/0x194 wait_for_completion_io_timeout+0x48/0x6c blk_execute_rq+0x124/0x17c scsi_execute_cmd+0x18c/0x3f8 scsi_probe_and_add_lun+0x204/0xd74 __scsi_add_device+0xbc/0x120 ufshcd_async_scan+0x80/0x3c0 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 stack backtrace: CPU: 6 UID: 0 PID: 12 Comm: kworker/u28:0 Tainted: G W OE 6.12.30-android16-5-maybe-dirty-4k #1 ccd4020fe444bdf629efc3b86df6be920b8df7d0 Tainted: [W]=WARN, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Spacecraft board based on MALIBU (DT) Workqueue: async async_run_entry_fn Call trace: dump_backtrace+0xfc/0x17c show_stack+0x18/0x28 dump_stack_lvl+0x40/0xa0 dump_stack+0x18/0x24 print_irq_inversion_bug+0x2fc/0x304 mark_lock_irq+0x388/0x4fc mark_lock+0x1c4/0x224 __lock_acquire+0x438/0x2e1c lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 ufshcd_release_scsi_cmd+0x60/0x110 ufshcd_compl_one_cqe+0x2c0/0x3f4 ufshcd_mcq_poll_cqe_lock+0xb0/0x104 ufs_google_mcq_intr+0x80/0xa0 [ufs dd6f385554e109da094ab91d5f7be18625a2222a] __handle_irq_event_percpu+0x104/0x32c handle_irq_event+0x40/0x9c handle_fasteoi_irq+0x170/0x2e8 generic_handle_domain_irq+0x58/0x80 gic_handle_irq+0x48/0x104 call_on_irq_stack+0x3c/0x50 do_interrupt_handler+0x7c/0xd8 el1_interrupt+0x34/0x58 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x68/0x6c _raw_spin_unlock_irqrestore+0x3c/0x6c debug_object_assert_init+0x16c/0x21c __mod_timer+0x4c/0x48c schedule_timeout+0xd4/0x16c io_schedule_timeout+0x48/0x70 do_wait_for_common+0x100/0x194 wait_for_completion_io_timeout+0x48/0x6c blk_execute_rq+0x124/0x17c scsi_execute_cmd+0x18c/0x3f8 scsi_probe_and_add_lun+0x204/0xd74 __scsi_add_device+0xbc/0x120 ufshcd_async_scan+0x80/0x3c0 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 Cc: Neil Armstrong Cc: André Draszik Reviewed-by: Peter Wang Fixes: 3c7ac40d7322 ("scsi: ufs: core: Delegate the interrupt service routine to a threaded IRQ handler") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250815155842.472867-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index efd7a811a002..b047325a3669 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5559,7 +5559,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) irqreturn_t retval = IRQ_NONE; struct uic_command *cmd; - spin_lock(hba->host->host_lock); + guard(spinlock_irqsave)(hba->host->host_lock); cmd = hba->active_uic_cmd; if (WARN_ON_ONCE(!cmd)) goto unlock; @@ -5586,8 +5586,6 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) ufshcd_add_uic_command_trace(hba, cmd, UFS_CMD_COMP); unlock: - spin_unlock(hba->host->host_lock); - return retval; } @@ -6920,7 +6918,7 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status) bool queue_eh_work = false; irqreturn_t retval = IRQ_NONE; - spin_lock(hba->host->host_lock); + guard(spinlock_irqsave)(hba->host->host_lock); hba->errors |= UFSHCD_ERROR_MASK & intr_status; if (hba->errors & INT_FATAL_ERRORS) { @@ -6979,7 +6977,7 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status) */ hba->errors = 0; hba->uic_error = 0; - spin_unlock(hba->host->host_lock); + return retval; } From e5203d89d59bfcbe1f348aa0d2dc4449a8ba644c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Aug 2025 08:58:24 -0700 Subject: [PATCH 0521/1307] scsi: ufs: core: Remove WARN_ON_ONCE() call from ufshcd_uic_cmd_compl() The UIC completion interrupt may be disabled while an UIC command is being processed. When the UIC completion interrupt is reenabled, an UIC interrupt is triggered and the WARN_ON_ONCE(!cmd) statement is hit. Hence this patch that removes this kernel warning. Fixes: fcd8b0450a9a ("scsi: ufs: core: Make ufshcd_uic_cmd_compl() easier to analyze") Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250815155842.472867-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index b047325a3669..2097efe3a990 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5561,7 +5561,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) guard(spinlock_irqsave)(hba->host->host_lock); cmd = hba->active_uic_cmd; - if (WARN_ON_ONCE(!cmd)) + if (!cmd) goto unlock; if (ufshcd_is_auto_hibern8_error(hba, intr_status)) From 9ee35fd43f94bf19dbd27cffc213a31314b623d2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Aug 2025 08:58:25 -0700 Subject: [PATCH 0522/1307] scsi: ufs: core: Fix the return value documentation ufshcd_wait_for_dev_cmd() and all its callers can return an OCS error. OCS errors are represented by positive integers. Remove the WARN_ONCE() statements that complain about positive error codes and update the documentation. Keep the behavior of ufshcd_wait_for_dev_cmd() because this return value may end be passed as the second argument of bsg_job_done() and bsg_job_done() handles positive and negative error codes differently. Cc: Peter Wang Fixes: cc59f3b68542 ("scsi: ufs: core: Improve return value documentation") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250815155842.472867-4-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 62 ++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 2097efe3a990..f471288a5c70 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3199,7 +3199,8 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) } /* - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, int max_timeout) @@ -3275,7 +3276,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, } } - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3294,7 +3294,8 @@ static void ufshcd_dev_man_unlock(struct ufs_hba *hba) } /* - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_issue_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, const u32 tag, int timeout) @@ -3317,7 +3318,8 @@ static int ufshcd_issue_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, * @cmd_type: specifies the type (NOP, Query...) * @timeout: timeout in milliseconds * - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. * * NOTE: Since there is only one available tag for device management commands, * it is expected you hold the hba->dev_cmd.lock mutex. @@ -3363,6 +3365,10 @@ static inline void ufshcd_init_query(struct ufs_hba *hba, (*request)->upiu_req.selector = selector; } +/* + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. + */ static int ufshcd_query_flag_retry(struct ufs_hba *hba, enum query_opcode opcode, enum flag_idn idn, u8 index, bool *flag_res) { @@ -3383,7 +3389,6 @@ static int ufshcd_query_flag_retry(struct ufs_hba *hba, dev_err(hba->dev, "%s: query flag, opcode %d, idn %d, failed with error %d after %d retries\n", __func__, opcode, idn, ret, retries); - WARN_ONCE(ret > 0, "Incorrect return value %d > 0\n", ret); return ret; } @@ -3395,7 +3400,8 @@ static int ufshcd_query_flag_retry(struct ufs_hba *hba, * @index: flag index to access * @flag_res: the flag value after the query request completes * - * Return: 0 for success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, enum flag_idn idn, u8 index, bool *flag_res) @@ -3451,7 +3457,6 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, out_unlock: ufshcd_dev_man_unlock(hba); - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3464,8 +3469,9 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, * @selector: selector field * @attr_val: the attribute value after the query request completes * - * Return: 0 upon success; < 0 upon failure. -*/ + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. + */ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, enum attr_idn idn, u8 index, u8 selector, u32 *attr_val) { @@ -3513,7 +3519,6 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, out_unlock: ufshcd_dev_man_unlock(hba); - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3528,8 +3533,9 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, * @attr_val: the attribute value after the query request * completes * - * Return: 0 for success; < 0 upon failure. -*/ + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. + */ int ufshcd_query_attr_retry(struct ufs_hba *hba, enum query_opcode opcode, enum attr_idn idn, u8 index, u8 selector, u32 *attr_val) @@ -3551,12 +3557,12 @@ int ufshcd_query_attr_retry(struct ufs_hba *hba, dev_err(hba->dev, "%s: query attribute, idn %d, failed with error %d after %d retries\n", __func__, idn, ret, QUERY_REQ_RETRIES); - WARN_ONCE(ret > 0, "Incorrect return value %d > 0\n", ret); return ret; } /* - * Return: 0 if successful; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int __ufshcd_query_descriptor(struct ufs_hba *hba, enum query_opcode opcode, enum desc_idn idn, u8 index, @@ -3615,7 +3621,6 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, out_unlock: hba->dev_cmd.query.descriptor = NULL; ufshcd_dev_man_unlock(hba); - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3632,7 +3637,8 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, * The buf_len parameter will contain, on return, the length parameter * received on the response. * - * Return: 0 for success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ int ufshcd_query_descriptor_retry(struct ufs_hba *hba, enum query_opcode opcode, @@ -3650,7 +3656,6 @@ int ufshcd_query_descriptor_retry(struct ufs_hba *hba, break; } - WARN_ONCE(err > 0, "Incorrect return value %d > 0\n", err); return err; } @@ -3663,7 +3668,8 @@ int ufshcd_query_descriptor_retry(struct ufs_hba *hba, * @param_read_buf: pointer to buffer where parameter would be read * @param_size: sizeof(param_read_buf) * - * Return: 0 in case of success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ int ufshcd_read_desc_param(struct ufs_hba *hba, enum desc_idn desc_id, @@ -3730,7 +3736,6 @@ int ufshcd_read_desc_param(struct ufs_hba *hba, out: if (is_kmalloc) kfree(desc_buf); - WARN_ONCE(ret > 0, "Incorrect return value %d > 0\n", ret); return ret; } @@ -4781,7 +4786,8 @@ EXPORT_SYMBOL_GPL(ufshcd_config_pwr_mode); * * Set fDeviceInit flag and poll until device toggles it. * - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_complete_dev_init(struct ufs_hba *hba) { @@ -5135,7 +5141,8 @@ static int ufshcd_link_startup(struct ufs_hba *hba) * not respond with NOP IN UPIU within timeout of %NOP_OUT_TIMEOUT * and we retry sending NOP OUT for %NOP_OUT_RETRIES iterations. * - * Return: 0 upon success; < 0 upon failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_verify_dev_init(struct ufs_hba *hba) { @@ -5867,7 +5874,8 @@ static inline int ufshcd_enable_ee(struct ufs_hba *hba, u16 mask) * as the device is allowed to manage its own way of handling background * operations. * - * Return: zero on success, non-zero on failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_enable_auto_bkops(struct ufs_hba *hba) { @@ -5906,7 +5914,8 @@ static int ufshcd_enable_auto_bkops(struct ufs_hba *hba) * host is idle so that BKOPS are managed effectively without any negative * impacts. * - * Return: zero on success, non-zero on failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) { @@ -6056,6 +6065,10 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) __func__, err); } +/* + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. + */ int ufshcd_read_device_lvl_exception_id(struct ufs_hba *hba, u64 *exception_id) { struct utp_upiu_query_v4_0 *upiu_resp; @@ -7452,7 +7465,8 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, * @sg_list: Pointer to SG list when DATA IN/OUT UPIU is required in ARPMB operation * @dir: DMA direction * - * Return: zero on success, non-zero on failure. + * Return: 0 upon success; > 0 in case the UFS device reported an OCS error; + * < 0 if another error occurred. */ int ufshcd_advanced_rpmb_req_handler(struct ufs_hba *hba, struct utp_upiu_req *req_upiu, struct utp_upiu_req *rsp_upiu, struct ufs_ehs *req_ehs, From 09d57d68ba9a36117eadb75d3ecf817a3c091acc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Aug 2025 08:58:26 -0700 Subject: [PATCH 0523/1307] scsi: ufs: core: Rename ufshcd_wait_for_doorbell_clr() The name ufshcd_wait_for_doorbell_clr() refers to legacy mode. Commit 8d077ede48c1 ("scsi: ufs: Optimize the command queueing code") added support for MCQ mode in this function. Since then the name of this function is misleading. Hence change the name of this function into something that is appropriate for both legacy and MCQ mode. Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250815155842.472867-5-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f471288a5c70..9a43102b2b21 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1303,7 +1303,7 @@ static u32 ufshcd_pending_cmds(struct ufs_hba *hba) * * Return: 0 upon success; -EBUSY upon timeout. */ -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, +static int ufshcd_wait_for_pending_cmds(struct ufs_hba *hba, u64 wait_timeout_us) { int ret = 0; @@ -1431,7 +1431,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) down_write(&hba->clk_scaling_lock); if (!hba->clk_scaling.is_allowed || - ufshcd_wait_for_doorbell_clr(hba, timeout_us)) { + ufshcd_wait_for_pending_cmds(hba, timeout_us)) { ret = -EBUSY; up_write(&hba->clk_scaling_lock); mutex_unlock(&hba->wb_mutex); From 6300d5c5438724c0876828da2f6e2c1a661871fc Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 11 Aug 2025 13:03:30 +0530 Subject: [PATCH 0524/1307] scsi: ufs: ufs-qcom: Fix ESI null pointer dereference ESI/MSI is a performance optimization feature that provides dedicated interrupts per MCQ hardware queue. This is optional feature and UFS MCQ should work with and without ESI feature. Commit e46a28cea29a ("scsi: ufs: qcom: Remove the MSI descriptor abuse") brings a regression in ESI (Enhanced System Interrupt) configuration that causes a null pointer dereference when Platform MSI allocation fails. The issue occurs in when platform_device_msi_init_and_alloc_irqs() in ufs_qcom_config_esi() fails (returns -EINVAL) but the current code uses __free() macro for automatic cleanup free MSI resources that were never successfully allocated. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: mutex_lock+0xc/0x54 (P) platform_device_msi_free_irqs_all+0x1c/0x40 ufs_qcom_config_esi+0x1d0/0x220 [ufs_qcom] ufshcd_config_mcq+0x28/0x104 ufshcd_init+0xa3c/0xf40 ufshcd_pltfrm_init+0x504/0x7d4 ufs_qcom_probe+0x20/0x58 [ufs_qcom] Fix by restructuring the ESI configuration to try MSI allocation first, before any other resource allocation and instead use explicit cleanup instead of __free() macro to avoid cleanup of unallocated resources. Tested on SM8750 platform with MCQ enabled, both with and without Platform ESI support. Fixes: e46a28cea29a ("scsi: ufs: qcom: Remove the MSI descriptor abuse") Cc: Manivannan Sadhasivam Cc: Thomas Gleixner Cc: James Bottomley Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250811073330.20230-1-quic_nitirawa@quicinc.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 39 ++++++++++++++----------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 76fc70503a62..9574fdc2bb0f 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -2070,17 +2070,6 @@ static irqreturn_t ufs_qcom_mcq_esi_handler(int irq, void *data) return IRQ_HANDLED; } -static void ufs_qcom_irq_free(struct ufs_qcom_irq *uqi) -{ - for (struct ufs_qcom_irq *q = uqi; q->irq; q++) - devm_free_irq(q->hba->dev, q->irq, q->hba); - - platform_device_msi_free_irqs_all(uqi->hba->dev); - devm_kfree(uqi->hba->dev, uqi); -} - -DEFINE_FREE(ufs_qcom_irq, struct ufs_qcom_irq *, if (_T) ufs_qcom_irq_free(_T)) - static int ufs_qcom_config_esi(struct ufs_hba *hba) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); @@ -2095,18 +2084,18 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) */ nr_irqs = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL]; - struct ufs_qcom_irq *qi __free(ufs_qcom_irq) = - devm_kcalloc(hba->dev, nr_irqs, sizeof(*qi), GFP_KERNEL); - if (!qi) - return -ENOMEM; - /* Preset so __free() has a pointer to hba in all error paths */ - qi[0].hba = hba; - ret = platform_device_msi_init_and_alloc_irqs(hba->dev, nr_irqs, ufs_qcom_write_msi_msg); if (ret) { - dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret); - return ret; + dev_warn(hba->dev, "Platform MSI not supported or failed, continuing without ESI\n"); + return ret; /* Continue without ESI */ + } + + struct ufs_qcom_irq *qi = devm_kcalloc(hba->dev, nr_irqs, sizeof(*qi), GFP_KERNEL); + + if (!qi) { + platform_device_msi_free_irqs_all(hba->dev); + return -ENOMEM; } for (int idx = 0; idx < nr_irqs; idx++) { @@ -2117,15 +2106,17 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) ret = devm_request_irq(hba->dev, qi[idx].irq, ufs_qcom_mcq_esi_handler, IRQF_SHARED, "qcom-mcq-esi", qi + idx); if (ret) { - dev_err(hba->dev, "%s: Fail to request IRQ for %d, err = %d\n", + dev_err(hba->dev, "%s: Failed to request IRQ for %d, err = %d\n", __func__, qi[idx].irq, ret); - qi[idx].irq = 0; + /* Free previously allocated IRQs */ + for (int j = 0; j < idx; j++) + devm_free_irq(hba->dev, qi[j].irq, qi + j); + platform_device_msi_free_irqs_all(hba->dev); + devm_kfree(hba->dev, qi); return ret; } } - retain_and_null_ptr(qi); - if (host->hw_ver.major >= 6) { ufshcd_rmwl(hba, ESI_VEC_MASK, FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1), REG_UFS_CFG3); From 018f659753fd38bb6fdba7fa8c751121b495e1f4 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Mon, 18 Aug 2025 23:42:43 +0300 Subject: [PATCH 0525/1307] ALSA: hda/realtek: Fix headset mic on ASUS Zenbook 14 Add a PCI quirk to enable microphone input on the headphone jack on the ASUS Zenbook 14 UM3406HA laptop. This model uses an ALC294 codec with CS35L41 amplifiers over I2C, and the existing fixup for it did not enable the headset microphone. A new fix is introduced to get the mic working while keeping the amplifier settings correct. Fixes: 61cbc08fdb04 ("ALSA: hda/realtek: Add quirks for ASUS 2024 Zenbooks") Signed-off-by: Vasiliy Kovalev Link: https://patch.msgid.link/20250818204243.247297-1-kovalev@altlinux.org Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index db8e6352b942..6c78a286172c 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3579,6 +3579,7 @@ enum { ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, ALC294_FIXUP_ASUS_MIC, ALC294_FIXUP_ASUS_HEADSET_MIC, + ALC294_FIXUP_ASUS_I2C_HEADSET_MIC, ALC294_FIXUP_ASUS_SPK, ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE, @@ -4889,6 +4890,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MIC }, + [ALC294_FIXUP_ASUS_I2C_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a19020 }, /* use as headset mic */ + { } + }, + .chained = true, + .chain_id = ALC287_FIXUP_CS35L41_I2C_2 + }, [ALC294_FIXUP_ASUS_SPK] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -6730,7 +6740,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC294_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2), From f4b3cef55f5f96fdb4e7f9ca90b7d6213689faeb Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 19 Aug 2025 14:03:44 +0800 Subject: [PATCH 0526/1307] ALSA: hda/realtek: Audio disappears on HP 15-fc000 after warm boot again There was a similar bug in the past (Bug 217440), which was fixed for this laptop. The same issue is occurring again as of kernel v.6.12.2. The symptoms are very similar - initially audio works but after a warm reboot, the audio completely disappears until the computer is powered off (there is no audio output at all). The issue is also related by caused by a different change now. By bisecting different kernel versions, I found that reverting cc3d0b5dd989 in patch_realtek.c[*] restores the sound and it works fine after the reboot. [*] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/sound/pci/hda/patch_realtek.c?h=v6.12.2&id=4ed7f16070a8475c088ff423b2eb11ba15eb89b6 [ patch description reformatted by tiwai ] Fixes: cc3d0b5dd989 ("ALSA: hda/realtek: Update ALC256 depop procedure") Link: https://bugzilla.kernel.org/show_bug.cgi?id=220109 Signed-off-by: Kailang Yang Link: https://lore.kernel.org/5317ca723c82447a938414fcca85cbf5@realtek.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 6c78a286172c..0323606b3d6d 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -510,6 +510,15 @@ static void alc256_shutup(struct hda_codec *codec) hp_pin = 0x21; alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ + + /* 3k pull low control for Headset jack. */ + /* NOTE: call this before clearing the pin, otherwise codec stalls */ + /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly + * when booting with headset plugged. So skip setting it for the codec alc257 + */ + if (spec->en_3kpull_low) + alc_update_coef_idx(codec, 0x46, 0, 3 << 12); + hp_pin_sense = snd_hda_jack_detect(codec, hp_pin); if (hp_pin_sense) { @@ -520,14 +529,6 @@ static void alc256_shutup(struct hda_codec *codec) msleep(75); - /* 3k pull low control for Headset jack. */ - /* NOTE: call this before clearing the pin, otherwise codec stalls */ - /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly - * when booting with headset plugged. So skip setting it for the codec alc257 - */ - if (spec->en_3kpull_low) - alc_update_coef_idx(codec, 0x46, 0, 3 << 12); - if (!spec->no_shutup_pins) snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); From ff646d033783068cc5b38924873cab4a536b17c1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Aug 2025 14:56:56 +0300 Subject: [PATCH 0527/1307] drm/i915: silence rpm wakeref asserts on GEN11_GU_MISC_IIR access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8d9908e8fe9c ("drm/i915/display: remove small micro-optimizations in irq handling") not only removed the optimizations, it also enabled wakeref asserts for the GEN11_GU_MISC_IIR access. Silence the asserts by wrapping the access inside intel_display_rpm_assert_{block,unblock}(). Reported-by: "Jason A. Donenfeld" Closes: https://lore.kernel.org/r/aG0tWkfmxWtxl_xc@zx2c4.com Fixes: 8d9908e8fe9c ("drm/i915/display: remove small micro-optimizations in irq handling") Cc: stable@vger.kernel.org # v6.13+ Suggested-by: Ville Syrjälä Reviewed-by: Jouni Högander Link: https://lore.kernel.org/r/20250805115656.832235-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit cbd3baeffbc08052ce7dc53f11bf5524b4411056) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display_irq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index fb25ec8adae3..68157f177b6a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1506,10 +1506,14 @@ u32 gen11_gu_misc_irq_ack(struct intel_display *display, const u32 master_ctl) if (!(master_ctl & GEN11_GU_MISC_IRQ)) return 0; + intel_display_rpm_assert_block(display); + iir = intel_de_read(display, GEN11_GU_MISC_IIR); if (likely(iir)) intel_de_write(display, GEN11_GU_MISC_IIR, iir); + intel_display_rpm_assert_unblock(display); + return iir; } From 8236820fd767f400d1baefb71bc7e36e37730a1e Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Mon, 11 Aug 2025 09:12:31 +0000 Subject: [PATCH 0528/1307] drm/i915/gt: Relocate compression repacking WA for JSL/EHL CACHE_MODE_0 registers should be saved and restored as part of the context, not during engine reset. Move the related workaround (Disable Repacking for Compression) from rcs_engine_wa_init() to icl_ctx_workarounds_init() for Jasper Lake and Elkhart Lake platforms. This ensures the WA is applied during context initialisation. BSPEC: 11322 Fixes: 0ddae025ab6c ("drm/i915: Disable compression tricks on JSL") Closes: Fixes: 0ddae025ab6c ("drm/i915: Disable compression tricks on JSL") Signed-off-by: Sebastian Brzezinka Cc: stable@vger.kernel.org # v6.13+ Reviewed-by: Andi Shyti Reviewed-by: Krzysztof Karas Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/4feaa24094e019e000ceb6011d8cd419b0361b3f.1754902406.git.sebastian.brzezinka@intel.com (cherry picked from commit c9932f0d604e4c8f2c6018e598a322acb43c68a2) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b37e400f74e5..5a95f06900b5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -634,6 +634,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = engine->i915; + /* Wa_1406697149 (WaDisableBankHangMode:icl) */ wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL); @@ -669,6 +671,15 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1406306137:icl,ehl */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); + + if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { + /* + * Disable Repacking for Compression (masked R/W access) + * before rendering compressed surfaces for display. + */ + wa_masked_en(wal, CACHE_MODE_0_GEN7, + DISABLE_REPACKING_FOR_COMPRESSION); + } } /* @@ -2306,15 +2317,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { - /* - * "Disable Repacking for Compression (masked R/W access) - * before rendering compressed surfaces for display." - */ - wa_masked_en(wal, CACHE_MODE_0_GEN7, - DISABLE_REPACKING_FOR_COMPRESSION); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, From d4932a1b148bb6121121e56bad312c4339042d70 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Aug 2025 10:33:56 +0800 Subject: [PATCH 0529/1307] x86/bugs: Fix GDS mitigation selecting when mitigation is off The current GDS mitigation logic incorrectly returns early when the attack vector mitigation is turned off, which leads to two problems: 1. CPUs without ARCH_CAP_GDS_CTRL support are incorrectly marked with GDS_MITIGATION_OFF when they should be marked as GDS_MITIGATION_UCODE_NEEDED. 2. The mitigation state checks and locking verification that follow are skipped, which means: - fail to detect if the mitigation was locked - miss the warning when trying to disable a locked mitigation Remove the early return to ensure proper mitigation state handling. This allows: - Proper mitigation classification for non-ARCH_CAP_GDS_CTRL CPUs - Complete mitigation state verification This also addresses the failed MSR 0x123 write attempt at boot on non-ARCH_CAP_GDS_CTRL CPUs: unchecked MSR access error: WRMSR to 0x123 (tried to write 0x0000000000000010) at rIP: ... (update_gds_msr) Call Trace: identify_secondary_cpu start_secondary common_startup_64 WARNING: CPU: 1 PID: 0 at arch/x86/kernel/cpu/bugs.c:1053 update_gds_msr [ bp: Massage, zap superfluous braces. ] Fixes: 8c7261abcb7ad ("x86/bugs: Add attack vector controls for GDS") Suggested-by: Pawan Gupta Signed-off-by: Li RongQing Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pawan Gupta Link: https://lore.kernel.org/20250819023356.2012-1-lirongqing@baidu.com --- arch/x86/kernel/cpu/bugs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2186a771b9fc..49ef1b832c1a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1068,10 +1068,8 @@ static void __init gds_select_mitigation(void) if (gds_mitigation == GDS_MITIGATION_AUTO) { if (should_mitigate_vuln(X86_BUG_GDS)) gds_mitigation = GDS_MITIGATION_FULL; - else { + else gds_mitigation = GDS_MITIGATION_OFF; - return; - } } /* No microcode */ From 62c30c544359aa18b8fb2734166467a07d435c2d Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 14 Aug 2025 09:25:57 +0800 Subject: [PATCH 0530/1307] net: ethernet: mtk_ppe: add RCU lock around dev_fill_forward_path Ensure ndo_fill_forward_path() is called with RCU lock held. Fixes: 2830e314778d ("net: ethernet: mtk-ppe: fix traffic offload with bridged wlan") Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20250814012559.3705-1-dqfext@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index c855fb799ce1..e9bd32741983 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -101,7 +101,9 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED)) return -1; + rcu_read_lock(); err = dev_fill_forward_path(dev, addr, &stack); + rcu_read_unlock(); if (err) return err; From 0417adf367a0af11adf7ace849af4638cfb573f7 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 14 Aug 2025 09:25:58 +0800 Subject: [PATCH 0531/1307] ppp: fix race conditions in ppp_fill_forward_path ppp_fill_forward_path() has two race conditions: 1. The ppp->channels list can change between list_empty() and list_first_entry(), as ppp_lock() is not held. If the only channel is deleted in ppp_disconnect_channel(), list_first_entry() may access an empty head or a freed entry, and trigger a panic. 2. pch->chan can be NULL. When ppp_unregister_channel() is called, pch->chan is set to NULL before pch is removed from ppp->channels. Fix these by using a lockless RCU approach: - Use list_first_or_null_rcu() to safely test and access the first list entry. - Convert list modifications on ppp->channels to their RCU variants and add synchronize_net() after removal. - Check for a NULL pch->chan before dereferencing it. Fixes: f6efc675c9dd ("net: ppp: resolve forwarding path for bridge pppoe devices") Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20250814012559.3705-2-dqfext@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ppp/ppp_generic.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 8c98cbd4b06d..824c8dc4120b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1598,11 +1599,14 @@ static int ppp_fill_forward_path(struct net_device_path_ctx *ctx, if (ppp->flags & SC_MULTILINK) return -EOPNOTSUPP; - if (list_empty(&ppp->channels)) + pch = list_first_or_null_rcu(&ppp->channels, struct channel, clist); + if (!pch) + return -ENODEV; + + chan = READ_ONCE(pch->chan); + if (!chan) return -ENODEV; - pch = list_first_entry(&ppp->channels, struct channel, clist); - chan = pch->chan; if (!chan->ops->fill_forward_path) return -EOPNOTSUPP; @@ -2994,7 +2998,7 @@ ppp_unregister_channel(struct ppp_channel *chan) */ down_write(&pch->chan_sem); spin_lock_bh(&pch->downl); - pch->chan = NULL; + WRITE_ONCE(pch->chan, NULL); spin_unlock_bh(&pch->downl); up_write(&pch->chan_sem); ppp_disconnect_channel(pch); @@ -3515,7 +3519,7 @@ ppp_connect_channel(struct channel *pch, int unit) hdrlen = pch->file.hdrlen + 2; /* for protocol bytes */ if (hdrlen > ppp->dev->hard_header_len) ppp->dev->hard_header_len = hdrlen; - list_add_tail(&pch->clist, &ppp->channels); + list_add_tail_rcu(&pch->clist, &ppp->channels); ++ppp->n_channels; pch->ppp = ppp; refcount_inc(&ppp->file.refcnt); @@ -3545,10 +3549,11 @@ ppp_disconnect_channel(struct channel *pch) if (ppp) { /* remove it from the ppp unit's list */ ppp_lock(ppp); - list_del(&pch->clist); + list_del_rcu(&pch->clist); if (--ppp->n_channels == 0) wake_up_interruptible(&ppp->file.rwait); ppp_unlock(ppp); + synchronize_net(); if (refcount_dec_and_test(&ppp->file.refcnt)) ppp_destroy_interface(ppp); err = 0; From 01792bc3e5bdafa171dd83c7073f00e7de93a653 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Thu, 14 Aug 2025 16:21:06 +0530 Subject: [PATCH 0532/1307] net: ti: icssg-prueth: Fix HSR and switch offload Enablement during firwmare reload. To enable HSR / Switch offload, certain configurations are needed. Currently they are done inside icssg_change_mode(). This function only gets called if we move from one mode to another without bringing the links up / down. Once in HSR / Switch mode, if we bring the links down and bring it back up again. The callback sequence is, - emac_ndo_stop() Firmwares are stopped - emac_ndo_open() Firmwares are loaded In this path icssg_change_mode() doesn't get called and as a result the configurations needed for HSR / Switch is not done. To fix this, put all these configurations in a separate function icssg_enable_fw_offload() and call this from both icssg_change_mode() and emac_ndo_open() Fixes: 56375086d093 ("net: ti: icssg-prueth: Enable HSR Tx duplication, Tx Tag and Rx Tag offload") Signed-off-by: MD Danish Anwar Link: https://patch.msgid.link/20250814105106.1491871-1-danishanwar@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 72 +++++++++++--------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 6c7d776ae4ee..dadce6009791 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -203,6 +203,44 @@ static void prueth_emac_stop(struct prueth *prueth) } } +static void icssg_enable_fw_offload(struct prueth *prueth) +{ + struct prueth_emac *emac; + int mac; + + for (mac = PRUETH_MAC0; mac < PRUETH_NUM_MACS; mac++) { + emac = prueth->emac[mac]; + if (prueth->is_hsr_offload_mode) { + if (emac->ndev->features & NETIF_F_HW_HSR_TAG_RM) + icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_ENABLE); + else + icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_DISABLE); + } + + if (prueth->is_switch_mode || prueth->is_hsr_offload_mode) { + if (netif_running(emac->ndev)) { + icssg_fdb_add_del(emac, eth_stp_addr, prueth->default_vlan, + ICSSG_FDB_ENTRY_P0_MEMBERSHIP | + ICSSG_FDB_ENTRY_P1_MEMBERSHIP | + ICSSG_FDB_ENTRY_P2_MEMBERSHIP | + ICSSG_FDB_ENTRY_BLOCK, + true); + icssg_vtbl_modify(emac, emac->port_vlan | DEFAULT_VID, + BIT(emac->port_id) | DEFAULT_PORT_MASK, + BIT(emac->port_id) | DEFAULT_UNTAG_MASK, + true); + if (prueth->is_hsr_offload_mode) + icssg_vtbl_modify(emac, DEFAULT_VID, + DEFAULT_PORT_MASK, + DEFAULT_UNTAG_MASK, true); + icssg_set_pvid(prueth, emac->port_vlan, emac->port_id); + if (prueth->is_switch_mode) + icssg_set_port_state(emac, ICSSG_EMAC_PORT_VLAN_AWARE_ENABLE); + } + } + } +} + static int prueth_emac_common_start(struct prueth *prueth) { struct prueth_emac *emac; @@ -753,6 +791,7 @@ static int emac_ndo_open(struct net_device *ndev) ret = prueth_emac_common_start(prueth); if (ret) goto free_rx_irq; + icssg_enable_fw_offload(prueth); } flow_cfg = emac->dram.va + ICSSG_CONFIG_OFFSET + PSI_L_REGULAR_FLOW_ID_BASE_OFFSET; @@ -1360,8 +1399,7 @@ static int prueth_emac_restart(struct prueth *prueth) static void icssg_change_mode(struct prueth *prueth) { - struct prueth_emac *emac; - int mac, ret; + int ret; ret = prueth_emac_restart(prueth); if (ret) { @@ -1369,35 +1407,7 @@ static void icssg_change_mode(struct prueth *prueth) return; } - for (mac = PRUETH_MAC0; mac < PRUETH_NUM_MACS; mac++) { - emac = prueth->emac[mac]; - if (prueth->is_hsr_offload_mode) { - if (emac->ndev->features & NETIF_F_HW_HSR_TAG_RM) - icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_ENABLE); - else - icssg_set_port_state(emac, ICSSG_EMAC_HSR_RX_OFFLOAD_DISABLE); - } - - if (netif_running(emac->ndev)) { - icssg_fdb_add_del(emac, eth_stp_addr, prueth->default_vlan, - ICSSG_FDB_ENTRY_P0_MEMBERSHIP | - ICSSG_FDB_ENTRY_P1_MEMBERSHIP | - ICSSG_FDB_ENTRY_P2_MEMBERSHIP | - ICSSG_FDB_ENTRY_BLOCK, - true); - icssg_vtbl_modify(emac, emac->port_vlan | DEFAULT_VID, - BIT(emac->port_id) | DEFAULT_PORT_MASK, - BIT(emac->port_id) | DEFAULT_UNTAG_MASK, - true); - if (prueth->is_hsr_offload_mode) - icssg_vtbl_modify(emac, DEFAULT_VID, - DEFAULT_PORT_MASK, - DEFAULT_UNTAG_MASK, true); - icssg_set_pvid(prueth, emac->port_vlan, emac->port_id); - if (prueth->is_switch_mode) - icssg_set_port_state(emac, ICSSG_EMAC_PORT_VLAN_AWARE_ENABLE); - } - } + icssg_enable_fw_offload(prueth); } static int prueth_netdevice_port_link(struct net_device *ndev, From 70fb252a84a47430240d924528a40e84c2b027e4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 18 Aug 2025 10:59:07 -0400 Subject: [PATCH 0533/1307] USB: core: Update kerneldoc for usb_hcd_giveback_urb() The kerneldoc added for usb_hcd_giveback_urb() by commit 41631d3616c3 ("usb: core: Replace in_interrupt() in comments") is unclear and incorrect. Update the text for greater clarity and to say that URBs for a root hub will always use a BH context for their completion. Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/41eaae05-116a-4568-940c-eeb94ab6baa0@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 6270fbb5c699..9dd79769cad1 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1717,10 +1717,10 @@ static void usb_giveback_urb_bh(struct work_struct *work) * @urb: urb being returned to the USB device driver. * @status: completion status code for the URB. * - * Context: atomic. The completion callback is invoked in caller's context. - * For HCDs with HCD_BH flag set, the completion callback is invoked in BH - * context (except for URBs submitted to the root hub which always complete in - * caller's context). + * Context: atomic. The completion callback is invoked either in a work queue + * (BH) context or in the caller's context, depending on whether the HCD_BH + * flag is set in the @hcd structure, except that URBs submitted to the + * root hub always complete in BH context. * * This hands the URB from HCD to its USB device driver, using its * completion function. The HCD has freed all per-urb resources From 309b6341d5570fb2b41b923de2fc9bb147106b80 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 18 Aug 2025 18:50:19 +0200 Subject: [PATCH 0534/1307] usb: typec: fusb302: Revert incorrect threaded irq fix The fusb302 irq handler has been carefully optimized by Hans de Goede in commit 207338ec5a27 ("usb: typec: fusb302: Improve suspend/resume handling"). A recent 'fix' undid most of that work to avoid a virtio-gpio driver bug. This reverts the incorrect fix, since it is of very low quality. It reverts the quirks from Hans change (and thus reintroduces the problems fixed by Hans) while keeping the overhead from the original change. The proper fix to support using fusb302 with an interrupt line provided by virtio-gpio must be implemented in the virtio driver instead, which should support disabling the IRQ from the fusb302 interrupt routine. Cc: Hans de Goede Cc: Yongbo Zhang Fixes: 1c2d81bded19 ("usb: typec: fusb302: fix scheduling while atomic when using virtio-gpio") Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250818-fusb302-unthreaded-irq-v1-1-3a9a11a9f56f@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/fusb302.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c index a4ff2403ddd6..870a71f953f6 100644 --- a/drivers/usb/typec/tcpm/fusb302.c +++ b/drivers/usb/typec/tcpm/fusb302.c @@ -1485,6 +1485,9 @@ static irqreturn_t fusb302_irq_intn(int irq, void *dev_id) struct fusb302_chip *chip = dev_id; unsigned long flags; + /* Disable our level triggered IRQ until our irq_work has cleared it */ + disable_irq_nosync(chip->gpio_int_n_irq); + spin_lock_irqsave(&chip->irq_lock, flags); if (chip->irq_suspended) chip->irq_while_suspended = true; @@ -1627,6 +1630,7 @@ static void fusb302_irq_work(struct work_struct *work) } done: mutex_unlock(&chip->lock); + enable_irq(chip->gpio_int_n_irq); } static int init_gpio(struct fusb302_chip *chip) @@ -1751,10 +1755,9 @@ static int fusb302_probe(struct i2c_client *client) goto destroy_workqueue; } - ret = devm_request_threaded_irq(dev, chip->gpio_int_n_irq, - NULL, fusb302_irq_intn, - IRQF_ONESHOT | IRQF_TRIGGER_LOW, - "fsc_interrupt_int_n", chip); + ret = request_irq(chip->gpio_int_n_irq, fusb302_irq_intn, + IRQF_ONESHOT | IRQF_TRIGGER_LOW, + "fsc_interrupt_int_n", chip); if (ret < 0) { dev_err(dev, "cannot request IRQ for GPIO Int_N, ret=%d", ret); goto tcpm_unregister_port; @@ -1779,6 +1782,7 @@ static void fusb302_remove(struct i2c_client *client) struct fusb302_chip *chip = i2c_get_clientdata(client); disable_irq_wake(chip->gpio_int_n_irq); + free_irq(chip->gpio_int_n_irq, chip); cancel_work_sync(&chip->irq_work); cancel_delayed_work_sync(&chip->bc_lvl_handler); tcpm_unregister_port(chip->tcpm_port); From 1ca61060de92a4320d73adfe5dc8d335653907ac Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 6 Aug 2025 15:06:05 +0800 Subject: [PATCH 0535/1307] fpga: zynq_fpga: Fix the wrong usage of dma_map_sgtable() dma_map_sgtable() returns only 0 or the error code. Read sgt->nents to get the number of mapped segments. Fixes: 37e00703228a ("zynq_fpga: use sgtable-based scatterlist wrappers") Reported-by: Pavel Pisa Closes: https://lore.kernel.org/linux-fpga/202508041548.22955.pisa@fel.cvut.cz/ Reviewed-by: Jason Gunthorpe Reviewed-by: Marek Szyprowski Signed-off-by: Xu Yilun Tested-by: Pavel Pisa Link: https://lore.kernel.org/r/20250806070605.1920909-2-yilun.xu@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/zynq-fpga.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index 0be0d569589d..b7629a0e4813 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -405,12 +405,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt) } } - priv->dma_nelms = - dma_map_sgtable(mgr->dev.parent, sgt, DMA_TO_DEVICE, 0); - if (priv->dma_nelms == 0) { + err = dma_map_sgtable(mgr->dev.parent, sgt, DMA_TO_DEVICE, 0); + if (err) { dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n"); - return -ENOMEM; + return err; } + priv->dma_nelms = sgt->nents; /* enable clock */ err = clk_enable(priv->clk); From 300a0cfe9f375b2843bcb331bcfa7503475ef5dd Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 6 Aug 2025 11:05:09 +0200 Subject: [PATCH 0536/1307] cdx: Fix off-by-one error in cdx_rpmsg_probe() In cdx_rpmsg_probe(), strscpy() is incorrectly called with the length of the source string (excluding the NUL terminator) rather than the size of the destination buffer. This results in one character less being copied from 'cdx_rpmsg_id_table[0].name' to 'chinfo.name'. Use the destination buffer size instead to ensure the name is copied correctly. Cc: stable Fixes: 2a226927d9b8 ("cdx: add rpmsg communication channel for CDX") Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250806090512.121260-2-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/cdx/controller/cdx_rpmsg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cdx/controller/cdx_rpmsg.c b/drivers/cdx/controller/cdx_rpmsg.c index 04b578a0be17..61f1a290ff08 100644 --- a/drivers/cdx/controller/cdx_rpmsg.c +++ b/drivers/cdx/controller/cdx_rpmsg.c @@ -129,8 +129,7 @@ static int cdx_rpmsg_probe(struct rpmsg_device *rpdev) chinfo.src = RPMSG_ADDR_ANY; chinfo.dst = rpdev->dst; - strscpy(chinfo.name, cdx_rpmsg_id_table[0].name, - strlen(cdx_rpmsg_id_table[0].name)); + strscpy(chinfo.name, cdx_rpmsg_id_table[0].name, sizeof(chinfo.name)); cdx_mcdi->ept = rpmsg_create_ept(rpdev, cdx_rpmsg_cb, NULL, chinfo); if (!cdx_mcdi->ept) { From 96cb948408b3adb69df7e451ba7da9d21f814d00 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Mon, 7 Jul 2025 20:39:58 +0800 Subject: [PATCH 0537/1307] comedi: pcl726: Prevent invalid irq number The reproducer passed in an irq number(0x80008000) that was too large, which triggered the oob. Added an interrupt number check to prevent users from passing in an irq number that was too large. If `it->options[1]` is 31, then `1 << it->options[1]` is still invalid because it shifts a 1-bit into the sign bit (which is UB in C). Possible solutions include reducing the upper bound on the `it->options[1]` value to 30 or lower, or using `1U << it->options[1]`. The old code would just not attempt to request the IRQ if the `options[1]` value were invalid. And it would still configure the device without interrupts even if the call to `request_irq` returned an error. So it would be better to combine this test with the test below. Fixes: fff46207245c ("staging: comedi: pcl726: enable the interrupt support code") Cc: stable # 5.13+ Reported-by: syzbot+5cd373521edd68bebcb3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5cd373521edd68bebcb3 Tested-by: syzbot+5cd373521edd68bebcb3@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Reviewed-by: Ian Abbott Link: https://lore.kernel.org/r/tencent_3C66983CC1369E962436264A50759176BF09@qq.com Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/pcl726.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/comedi/drivers/pcl726.c b/drivers/comedi/drivers/pcl726.c index 0430630e6ebb..b542896fa0e4 100644 --- a/drivers/comedi/drivers/pcl726.c +++ b/drivers/comedi/drivers/pcl726.c @@ -328,7 +328,8 @@ static int pcl726_attach(struct comedi_device *dev, * Hook up the external trigger source interrupt only if the * user config option is valid and the board supports interrupts. */ - if (it->options[1] && (board->irq_mask & (1 << it->options[1]))) { + if (it->options[1] > 0 && it->options[1] < 16 && + (board->irq_mask & (1U << it->options[1]))) { ret = request_irq(it->options[1], pcl726_interrupt, 0, dev->board_name, dev); if (ret == 0) { From 3cd212e895ca2d58963fdc6422502b10dd3966bb Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 25 Jul 2025 13:53:24 +0100 Subject: [PATCH 0538/1307] comedi: Fix use of uninitialized memory in do_insn_ioctl() and do_insnlist_ioctl() syzbot reports a KMSAN kernel-infoleak in `do_insn_ioctl()`. A kernel buffer is allocated to hold `insn->n` samples (each of which is an `unsigned int`). For some instruction types, `insn->n` samples are copied back to user-space, unless an error code is being returned. The problem is that not all the instruction handlers that need to return data to userspace fill in the whole `insn->n` samples, so that there is an information leak. There is a similar syzbot report for `do_insnlist_ioctl()`, although it does not have a reproducer for it at the time of writing. One culprit is `insn_rw_emulate_bits()` which is used as the handler for `INSN_READ` or `INSN_WRITE` instructions for subdevices that do not have a specific handler for that instruction, but do have an `INSN_BITS` handler. For `INSN_READ` it only fills in at most 1 sample, so if `insn->n` is greater than 1, the remaining `insn->n - 1` samples copied to userspace will be uninitialized kernel data. Another culprit is `vm80xx_ai_insn_read()` in the "vm80xx" driver. It never returns an error, even if it fails to fill the buffer. Fix it in `do_insn_ioctl()` and `do_insnlist_ioctl()` by making sure that uninitialized parts of the allocated buffer are zeroed before handling each instruction. Thanks to Arnaud Lecomte for their fix to `do_insn_ioctl()`. That fix replaced the call to `kmalloc_array()` with `kcalloc()`, but it is not always necessary to clear the whole buffer. Fixes: ed9eccbe8970 ("Staging: add comedi core") Reported-by: syzbot+a5e45f768aab5892da5d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a5e45f768aab5892da5d Reported-by: syzbot+fb4362a104d45ab09cf9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fb4362a104d45ab09cf9 Cc: stable # 5.13+ Cc: Arnaud Lecomte Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20250725125324.80276-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/comedi_fops.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c index 23b7178522ae..7e2f2b1a1c36 100644 --- a/drivers/comedi/comedi_fops.c +++ b/drivers/comedi/comedi_fops.c @@ -1587,6 +1587,9 @@ static int do_insnlist_ioctl(struct comedi_device *dev, memset(&data[n], 0, (MIN_SAMPLES - n) * sizeof(unsigned int)); } + } else { + memset(data, 0, max_t(unsigned int, n, MIN_SAMPLES) * + sizeof(unsigned int)); } ret = parse_insn(dev, insns + i, data, file); if (ret < 0) @@ -1670,6 +1673,8 @@ static int do_insn_ioctl(struct comedi_device *dev, memset(&data[insn->n], 0, (MIN_SAMPLES - insn->n) * sizeof(unsigned int)); } + } else { + memset(data, 0, n_data * sizeof(unsigned int)); } ret = parse_insn(dev, insn, data, file); if (ret < 0) From 7afba9221f70d4cbce0f417c558879cba0eb5e66 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 25 Jul 2025 15:10:34 +0100 Subject: [PATCH 0539/1307] comedi: Make insn_rw_emulate_bits() do insn->n samples The `insn_rw_emulate_bits()` function is used as a default handler for `INSN_READ` instructions for subdevices that have a handler for `INSN_BITS` but not for `INSN_READ`. Similarly, it is used as a default handler for `INSN_WRITE` instructions for subdevices that have a handler for `INSN_BITS` but not for `INSN_WRITE`. It works by emulating the `INSN_READ` or `INSN_WRITE` instruction handling with a constructed `INSN_BITS` instruction. However, `INSN_READ` and `INSN_WRITE` instructions are supposed to be able read or write multiple samples, indicated by the `insn->n` value, but `insn_rw_emulate_bits()` currently only handles a single sample. For `INSN_READ`, the comedi core will copy `insn->n` samples back to user-space. (That triggered KASAN kernel-infoleak errors when `insn->n` was greater than 1, but that is being fixed more generally elsewhere in the comedi core.) Make `insn_rw_emulate_bits()` either handle `insn->n` samples, or return an error, to conform to the general expectation for `INSN_READ` and `INSN_WRITE` handlers. Fixes: ed9eccbe8970 ("Staging: add comedi core") Cc: stable # 5.13+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20250725141034.87297-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c index f1dc854928c1..c9ebaadc5e82 100644 --- a/drivers/comedi/drivers.c +++ b/drivers/comedi/drivers.c @@ -620,11 +620,9 @@ static int insn_rw_emulate_bits(struct comedi_device *dev, unsigned int chan = CR_CHAN(insn->chanspec); unsigned int base_chan = (chan < 32) ? 0 : chan; unsigned int _data[2]; + unsigned int i; int ret; - if (insn->n == 0) - return 0; - memset(_data, 0, sizeof(_data)); memset(&_insn, 0, sizeof(_insn)); _insn.insn = INSN_BITS; @@ -635,18 +633,21 @@ static int insn_rw_emulate_bits(struct comedi_device *dev, if (insn->insn == INSN_WRITE) { if (!(s->subdev_flags & SDF_WRITABLE)) return -EINVAL; - _data[0] = 1U << (chan - base_chan); /* mask */ - _data[1] = data[0] ? (1U << (chan - base_chan)) : 0; /* bits */ + _data[0] = 1U << (chan - base_chan); /* mask */ + } + for (i = 0; i < insn->n; i++) { + if (insn->insn == INSN_WRITE) + _data[1] = data[i] ? _data[0] : 0; /* bits */ + + ret = s->insn_bits(dev, s, &_insn, _data); + if (ret < 0) + return ret; + + if (insn->insn == INSN_READ) + data[i] = (_data[1] >> (chan - base_chan)) & 1; } - ret = s->insn_bits(dev, s, &_insn, _data); - if (ret < 0) - return ret; - - if (insn->insn == INSN_READ) - data[0] = (_data[1] >> (chan - base_chan)) & 1; - - return 1; + return insn->n; } static int __comedi_device_postconfig_async(struct comedi_device *dev, From b47b493d6387ae437098112936f32be27f73516c Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 4 Aug 2025 12:29:55 +0400 Subject: [PATCH 0540/1307] most: core: Drop device reference after usage in get_channel() In get_channel(), the reference obtained by bus_find_device_by_name() was dropped via put_device() before accessing the device's driver data Move put_device() after usage to avoid potential issues. Fixes: 2485055394be ("staging: most: core: drop device reference") Cc: stable Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20250804082955.3621026-1-linmq006@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/most/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/most/core.c b/drivers/most/core.c index a635d5082ebb..da319d108ea1 100644 --- a/drivers/most/core.c +++ b/drivers/most/core.c @@ -538,8 +538,8 @@ static struct most_channel *get_channel(char *mdev, char *mdev_ch) dev = bus_find_device_by_name(&mostbus, NULL, mdev); if (!dev) return NULL; - put_device(dev); iface = dev_get_drvdata(dev); + put_device(dev); list_for_each_entry_safe(c, tmp, &iface->p->channel_list, list) { if (!strcmp(dev_name(&c->dev), mdev_ch)) return c; From 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 11 Aug 2025 22:18:30 +0800 Subject: [PATCH 0541/1307] fs/buffer: fix use-after-free when call bh_read() helper There's issue as follows: BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110 Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0 CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: dump_stack_lvl+0x55/0x70 print_address_description.constprop.0+0x2c/0x390 print_report+0xb4/0x270 kasan_report+0xb8/0xf0 end_buffer_read_sync+0xe3/0x110 end_bio_bh_io_sync+0x56/0x80 blk_update_request+0x30a/0x720 scsi_end_request+0x51/0x2b0 scsi_io_completion+0xe3/0x480 ? scsi_device_unbusy+0x11e/0x160 blk_complete_reqs+0x7b/0x90 handle_softirqs+0xef/0x370 irq_exit_rcu+0xa5/0xd0 sysvec_apic_timer_interrupt+0x6e/0x90 Above issue happens when do ntfs3 filesystem mount, issue may happens as follows: mount IRQ ntfs_fill_super read_cache_page do_read_cache_folio filemap_read_folio mpage_read_folio do_mpage_readpage ntfs_get_block_vbo bh_read submit_bh wait_on_buffer(bh); blk_complete_reqs scsi_io_completion scsi_end_request blk_update_request end_bio_bh_io_sync end_buffer_read_sync __end_buffer_read_notouch unlock_buffer wait_on_buffer(bh);--> return will return to caller put_bh --> trigger stack-out-of-bounds In the mpage_read_folio() function, the stack variable 'map_bh' is passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and wait_on_buffer() returns to continue processing, the stack variable is likely to be reclaimed. Consequently, during the end_buffer_read_sync() process, calling put_bh() may result in stack overrun. If the bh is not allocated on the stack, it belongs to a folio. Freeing a buffer head which belongs to a folio is done by drop_buffers() which will fail to free buffers which are still locked. So it is safe to call put_bh() before __end_buffer_read_notouch(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ye Bin Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Christian Brauner --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/buffer.c b/fs/buffer.c index ead4dc85debd..6a8752f7bbed 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -157,8 +157,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) */ void end_buffer_read_sync(struct buffer_head *bh, int uptodate) { - __end_buffer_read_notouch(bh, uptodate); put_bh(bh); + __end_buffer_read_notouch(bh, uptodate); } EXPORT_SYMBOL(end_buffer_read_sync); From 589c12edcd8a7b3b24f407b58443bab3560125e4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Aug 2025 12:41:15 +0300 Subject: [PATCH 0542/1307] coredump: Fix return value in coredump_parse() The coredump_parse() function is bool type. It should return true on success and false on failure. The cn_printf() returns zero on success or negative error codes. This mismatch means that when "return err;" here, it is treated as success instead of failure. Change it to return false instead. Fixes: a5715af549b2 ("coredump: make coredump_parse() return bool") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/aKRGu14w5vPSZLgv@stanley.mountain Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/coredump.c b/fs/coredump.c index fedbead956ed..5dce257c67fc 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -345,7 +345,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, was_space = false; err = cn_printf(cn, "%c", '\0'); if (err) - return err; + return false; (*argv)[(*argc)++] = cn->used; } } From c237aa9884f238e1480897463ca034877ca7530b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 19 Aug 2025 12:08:58 +0200 Subject: [PATCH 0543/1307] kernfs: don't fail listing extended attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace doesn't expect a failure to list extended attributes: $ ls -lA /sys/ ls: /sys/: No data available ls: /sys/kernel: No data available ls: /sys/power: No data available ls: /sys/class: No data available ls: /sys/devices: No data available ls: /sys/dev: No data available ls: /sys/hypervisor: No data available ls: /sys/fs: No data available ls: /sys/bus: No data available ls: /sys/firmware: No data available ls: /sys/block: No data available ls: /sys/module: No data available total 0 drwxr-xr-x 2 root root 0 Jan 1 1970 block drwxr-xr-x 52 root root 0 Jan 1 1970 bus drwxr-xr-x 88 root root 0 Jan 1 1970 class drwxr-xr-x 4 root root 0 Jan 1 1970 dev drwxr-xr-x 11 root root 0 Jan 1 1970 devices drwxr-xr-x 3 root root 0 Jan 1 1970 firmware drwxr-xr-x 10 root root 0 Jan 1 1970 fs drwxr-xr-x 2 root root 0 Jul 2 09:43 hypervisor drwxr-xr-x 14 root root 0 Jan 1 1970 kernel drwxr-xr-x 251 root root 0 Jan 1 1970 module drwxr-xr-x 3 root root 0 Jul 2 09:43 power Fix it by simply reporting success when no extended attributes are available instead of reporting ENODATA. Link: https://lore.kernel.org/78b13bcdae82ade95e88f315682966051f461dde.camel@linaro.org Fixes: d1f4e9026007 ("kernfs: remove iattr_mutex") # mainline only Reported-by: André Draszik Link: https://lore.kernel.org/20250819-ahndung-abgaben-524a535f8101@brauner Signed-off-by: Christian Brauner --- fs/kernfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 3c293a5a21b1..457f91c412d4 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -142,9 +142,9 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size) struct kernfs_node *kn = kernfs_dentry_node(dentry); struct kernfs_iattrs *attrs; - attrs = kernfs_iattrs_noalloc(kn); + attrs = kernfs_iattrs(kn); if (!attrs) - return -ENODATA; + return -ENOMEM; return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size); } From a2c1f82618b0b65f1ef615aa9cfdac8122537d69 Mon Sep 17 00:00:00 2001 From: "Adrian Huang (Lenovo)" Date: Mon, 18 Aug 2025 21:43:10 +0800 Subject: [PATCH 0544/1307] signal: Fix memory leak for PIDFD_SELF* sentinels Commit f08d0c3a7111 ("pidfd: add PIDFD_SELF* sentinels to refer to own thread/process") introduced a leak by acquiring a pid reference through get_task_pid(), which increments pid->count but never drops it with put_pid(). As a result, kmemleak reports unreferenced pid objects after running tools/testing/selftests/pidfd/pidfd_test, for example: unreferenced object 0xff1100206757a940 (size 160): comm "pidfd_test", pid 16965, jiffies 4294853028 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 fd 57 50 04 .............WP. 5e 44 00 00 00 00 00 00 18 de 34 17 01 00 11 ff ^D........4..... backtrace (crc cd8844d4): kmem_cache_alloc_noprof+0x2f4/0x3f0 alloc_pid+0x54/0x3d0 copy_process+0xd58/0x1740 kernel_clone+0x99/0x3b0 __do_sys_clone3+0xbe/0x100 do_syscall_64+0x7b/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix this by calling put_pid() after do_pidfd_send_signal() returns. Fixes: f08d0c3a7111 ("pidfd: add PIDFD_SELF* sentinels to refer to own thread/process") Signed-off-by: Adrian Huang (Lenovo) Link: https://lore.kernel.org/20250818134310.12273-1-adrianhuang0701@gmail.com Tested-by: Lorenzo Stoakes Reviewed-by: Lorenzo Stoakes Signed-off-by: Christian Brauner --- kernel/signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index e2c928de7d2c..fe9190d84f28 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4067,6 +4067,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, { struct pid *pid; enum pid_type type; + int ret; /* Enforce flags be set to 0 until we add an extension. */ if (flags & ~PIDFD_SEND_SIGNAL_FLAGS) @@ -4108,7 +4109,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, } } - return do_pidfd_send_signal(pid, sig, type, info, flags); + ret = do_pidfd_send_signal(pid, sig, type, info, flags); + put_pid(pid); + + return ret; } static int From 23800ad1265f10c2bc6f42154ce4d20e59f2900e Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 14 Aug 2025 13:34:29 -0500 Subject: [PATCH 0545/1307] gpiolib: acpi: Add quirk for ASUS ProArt PX13 The ASUS ProArt PX13 has a spurious wakeup event from the touchpad a few moments after entering hardware sleep. This can be avoided by preventing the touchpad from being a wake source. Add to the wakeup ignore list. Reported-by: Amit Chaudhari Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4482 Tested-by: Amit Chaudhari Signed-off-by: Mario Limonciello (AMD) Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/20250814183430.3887973-1-superm1@kernel.org Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi-quirks.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi-quirks.c b/drivers/gpio/gpiolib-acpi-quirks.c index c13545dce349..bfb04e67c4bc 100644 --- a/drivers/gpio/gpiolib-acpi-quirks.c +++ b/drivers/gpio/gpiolib-acpi-quirks.c @@ -344,6 +344,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_interrupt = "AMDI0030:00@8", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 5.35 + * https://gitlab.freedesktop.org/drm/amd/-/issues/4482 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_FAMILY, "ProArt PX13"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "ASCP1A00:00@8", + }, + }, {} /* Terminating entry */ }; From 9ce43caa4b7be707638d49ad4fb358b6ff646e91 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Aug 2025 11:55:19 +0900 Subject: [PATCH 0546/1307] xfs: Default XFS_RT to Y if CONFIG_BLK_DEV_ZONED is enabled XFS support for zoned block devices requires the realtime subvolume support (XFS_RT) to be enabled. Change the default configuration value of XFS_RT from N to CONFIG_BLK_DEV_ZONED to align with this requirement. This change still allows the user to disable XFS_RT if this feature is not desired for the user use case. Suggested-by: Christoph Hellwig Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index ae0ca6858496..065953475cf5 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -105,6 +105,7 @@ config XFS_POSIX_ACL config XFS_RT bool "XFS Realtime subvolume support" depends on XFS_FS + default BLK_DEV_ZONED help If you say Y here you will be able to mount and use XFS filesystems which contain a realtime subvolume. The realtime subvolume is a From d004d70d6cdf03928da0d05c8c15c2ccc15657cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 07:06:43 +0200 Subject: [PATCH 0547/1307] xfs: remove xfs_last_used_zone This was my first attempt at caching the last used zone. But it turns out for O_DIRECT or RWF_DONTCACHE that operate concurrently or in very short sequence, the bmap btree does not record a written extent yet, so it fails. Because it then still finds the last written zone it can lead to a weird ping-pong around a few zones with writers seeing different values. Remove it entirely as the later added xfs_cached_zone actually does a much better job enforcing the locality as the zone is associated with the inode in the MRU cache as soon as the zone is selected. Fixes: 4e4d52075577 ("xfs: add the zoned space allocator") Signed-off-by: Christoph Hellwig Tested-by: Damien Le Moal Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 45 ++--------------------------------------- 1 file changed, 2 insertions(+), 43 deletions(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index f8bd6d741755..f28214c28ab5 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -374,44 +374,6 @@ xfs_zone_free_blocks( return 0; } -/* - * Check if the zone containing the data just before the offset we are - * writing to is still open and has space. - */ -static struct xfs_open_zone * -xfs_last_used_zone( - struct iomap_ioend *ioend) -{ - struct xfs_inode *ip = XFS_I(ioend->io_inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb = XFS_B_TO_FSB(mp, ioend->io_offset); - struct xfs_rtgroup *rtg = NULL; - struct xfs_open_zone *oz = NULL; - struct xfs_iext_cursor icur; - struct xfs_bmbt_irec got; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (!xfs_iext_lookup_extent_before(ip, &ip->i_df, &offset_fsb, - &icur, &got)) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return NULL; - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - rtg = xfs_rtgroup_grab(mp, xfs_rtb_to_rgno(mp, got.br_startblock)); - if (!rtg) - return NULL; - - xfs_ilock(rtg_rmap(rtg), XFS_ILOCK_SHARED); - oz = READ_ONCE(rtg->rtg_open_zone); - if (oz && (oz->oz_is_gc || !atomic_inc_not_zero(&oz->oz_ref))) - oz = NULL; - xfs_iunlock(rtg_rmap(rtg), XFS_ILOCK_SHARED); - - xfs_rtgroup_rele(rtg); - return oz; -} - static struct xfs_group * xfs_find_free_zone( struct xfs_mount *mp, @@ -918,12 +880,9 @@ xfs_zone_alloc_and_submit( goto out_error; /* - * If we don't have a cached zone in this write context, see if the - * last extent before the one we are writing to points to an active - * zone. If so, just continue writing to it. + * If we don't have a locally cached zone in this write context, see if + * the inode is still associated with a zone and use that if so. */ - if (!*oz && ioend->io_offset) - *oz = xfs_last_used_zone(ioend); if (!*oz) *oz = xfs_cached_zone(mp, ip); From 7d523255f524c95208cefef4edaed149615ff96c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 07:06:44 +0200 Subject: [PATCH 0548/1307] xfs: kick off inodegc when failing to reserve zoned blocks XFS processes truncating unlinked inodes asynchronously and thus the free space pool only sees them with a delay. The non-zoned write path thus calls into inodegc to accelerate this processing before failing an allocation due the lack of free blocks. Do the same for the zoned space reservation. Fixes: 0bb2193056b5 ("xfs: add support for zoned space reservations") Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_space_resv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_zone_space_resv.c b/fs/xfs/xfs_zone_space_resv.c index 1313c55b8cbe..9cd38716fd25 100644 --- a/fs/xfs/xfs_zone_space_resv.c +++ b/fs/xfs/xfs_zone_space_resv.c @@ -10,6 +10,7 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_rtbitmap.h" +#include "xfs_icache.h" #include "xfs_zone_alloc.h" #include "xfs_zone_priv.h" #include "xfs_zones.h" @@ -230,6 +231,11 @@ xfs_zoned_space_reserve( error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, flags & XFS_ZR_RESERVED); + if (error == -ENOSPC && !(flags & XFS_ZR_NOWAIT)) { + xfs_inodegc_flush(mp); + error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, + flags & XFS_ZR_RESERVED); + } if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1) error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags); if (error) From 8e5a2441e18640fb22a25fd097368957bf5cab91 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 07:06:45 +0200 Subject: [PATCH 0549/1307] xfs: reject swapon for inodes on a zoned file system earlier No point in going down into the iomap mapping loop when we know it will be rejected. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_aops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1ee4f835ac3c..a26f79815533 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -760,6 +760,9 @@ xfs_vm_swap_activate( { struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + if (xfs_is_zoned_inode(ip)) + return -EINVAL; + /* * Swap file activation can race against concurrent shared extent * removal in files that have been cloned. If this happens, From 2eb03376151bb8585caa23ed2673583107bb5193 Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Tue, 19 Aug 2025 15:58:43 +0300 Subject: [PATCH 0550/1307] usb: xhci: Fix slot_id resource race conflict xHC controller may immediately reuse a slot_id after it's disabled, giving it to a new enumerating device before the xhci driver freed all resources related to the disabled device. In such a scenario, device-A with slot_id equal to 1 is disconnecting while device-B is enumerating, device-B will fail to enumerate in the follow sequence. 1.[device-A] send disable slot command 2.[device-B] send enable slot command 3.[device-A] disable slot command completed and wakeup waiting thread 4.[device-B] enable slot command completed with slot_id equal to 1 and wakeup waiting thread 5.[device-B] driver checks that slot_id is still in use (by device-A) in xhci_alloc_virt_device, and fail to enumerate due to this conflict 6.[device-A] xhci->devs[slot_id] set to NULL in xhci_free_virt_device To fix driver's slot_id resources conflict, clear xhci->devs[slot_id] and xhci->dcbba->dev_context_ptrs[slot_id] pointers in the interrupt context when disable slot command completes successfully. Simultaneously, adjust function xhci_free_virt_device to accurately handle device release. [minor smatch warning and commit message fix -Mathias] Cc: stable@vger.kernel.org Fixes: 7faac1953ed1 ("xhci: avoid race between disable slot command and host runtime suspend") Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250819125844.2042452-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 3 +-- drivers/usb/host/xhci-mem.c | 22 +++++++++++----------- drivers/usb/host/xhci-ring.c | 9 +++++++-- drivers/usb/host/xhci.c | 21 ++++++++++++++------- drivers/usb/host/xhci.h | 3 ++- 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 92bb84f8132a..b3a59ce1b3f4 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -704,8 +704,7 @@ static int xhci_enter_test_mode(struct xhci_hcd *xhci, if (!xhci->devs[i]) continue; - retval = xhci_disable_slot(xhci, i); - xhci_free_virt_device(xhci, i); + retval = xhci_disable_and_free_slot(xhci, i); if (retval) xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n", i, retval); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 07289333a1e8..81eaad87a3d9 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -865,21 +865,20 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, * will be manipulated by the configure endpoint, allocate device, or update * hub functions while this function is removing the TT entries from the list. */ -void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) +void xhci_free_virt_device(struct xhci_hcd *xhci, struct xhci_virt_device *dev, + int slot_id) { - struct xhci_virt_device *dev; int i; int old_active_eps = 0; /* Slot ID 0 is reserved */ - if (slot_id == 0 || !xhci->devs[slot_id]) + if (slot_id == 0 || !dev) return; - dev = xhci->devs[slot_id]; - - xhci->dcbaa->dev_context_ptrs[slot_id] = 0; - if (!dev) - return; + /* If device ctx array still points to _this_ device, clear it */ + if (dev->out_ctx && + xhci->dcbaa->dev_context_ptrs[slot_id] == cpu_to_le64(dev->out_ctx->dma)) + xhci->dcbaa->dev_context_ptrs[slot_id] = 0; trace_xhci_free_virt_device(dev); @@ -920,8 +919,9 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) dev->udev->slot_id = 0; if (dev->rhub_port && dev->rhub_port->slot_id == slot_id) dev->rhub_port->slot_id = 0; - kfree(xhci->devs[slot_id]); - xhci->devs[slot_id] = NULL; + if (xhci->devs[slot_id] == dev) + xhci->devs[slot_id] = NULL; + kfree(dev); } /* @@ -962,7 +962,7 @@ static void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_i out: /* we are now at a leaf device */ xhci_debugfs_remove_slot(xhci, slot_id); - xhci_free_virt_device(xhci, slot_id); + xhci_free_virt_device(xhci, vdev, slot_id); } int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ecd757d482c5..4f8f5aab109d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1592,7 +1592,8 @@ static void xhci_handle_cmd_enable_slot(int slot_id, struct xhci_command *comman command->slot_id = 0; } -static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) +static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id, + u32 cmd_comp_code) { struct xhci_virt_device *virt_dev; struct xhci_slot_ctx *slot_ctx; @@ -1607,6 +1608,10 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) /* Delete default control endpoint resources */ xhci_free_device_endpoint_resources(xhci, virt_dev, true); + if (cmd_comp_code == COMP_SUCCESS) { + xhci->dcbaa->dev_context_ptrs[slot_id] = 0; + xhci->devs[slot_id] = NULL; + } } static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id) @@ -1856,7 +1861,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, xhci_handle_cmd_enable_slot(slot_id, cmd, cmd_comp_code); break; case TRB_DISABLE_SLOT: - xhci_handle_cmd_disable_slot(xhci, slot_id); + xhci_handle_cmd_disable_slot(xhci, slot_id, cmd_comp_code); break; case TRB_CONFIG_EP: if (!cmd->completion) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 47151ca527bf..0e03691f03bf 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3932,8 +3932,7 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd, * Obtaining a new device slot to inform the xHCI host that * the USB device has been reset. */ - ret = xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); + ret = xhci_disable_and_free_slot(xhci, udev->slot_id); if (!ret) { ret = xhci_alloc_dev(hcd, udev); if (ret == 1) @@ -4090,7 +4089,7 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_disable_slot(xhci, udev->slot_id); spin_lock_irqsave(&xhci->lock, flags); - xhci_free_virt_device(xhci, udev->slot_id); + xhci_free_virt_device(xhci, virt_dev, udev->slot_id); spin_unlock_irqrestore(&xhci->lock, flags); } @@ -4139,6 +4138,16 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) return 0; } +int xhci_disable_and_free_slot(struct xhci_hcd *xhci, u32 slot_id) +{ + struct xhci_virt_device *vdev = xhci->devs[slot_id]; + int ret; + + ret = xhci_disable_slot(xhci, slot_id); + xhci_free_virt_device(xhci, vdev, slot_id); + return ret; +} + /* * Checks if we have enough host controller resources for the default control * endpoint. @@ -4245,8 +4254,7 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) return 1; disable_slot: - xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_and_free_slot(xhci, udev->slot_id); return 0; } @@ -4382,8 +4390,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, dev_warn(&udev->dev, "Device not responding to setup %s.\n", act); mutex_unlock(&xhci->mutex); - ret = xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); + ret = xhci_disable_and_free_slot(xhci, udev->slot_id); if (!ret) { if (xhci_alloc_dev(hcd, udev) == 1) xhci_setup_addressable_virt_dev(xhci, udev); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index a20f4e7cd43a..85d5b964bf1e 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1791,7 +1791,7 @@ void xhci_dbg_trace(struct xhci_hcd *xhci, void (*trace)(struct va_format *), /* xHCI memory management */ void xhci_mem_cleanup(struct xhci_hcd *xhci); int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags); -void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id); +void xhci_free_virt_device(struct xhci_hcd *xhci, struct xhci_virt_device *dev, int slot_id); int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, struct usb_device *udev, gfp_t flags); int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *udev); void xhci_copy_ep0_dequeue_into_input_ctx(struct xhci_hcd *xhci, @@ -1888,6 +1888,7 @@ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags); int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); +int xhci_disable_and_free_slot(struct xhci_hcd *xhci, u32 slot_id); int xhci_ext_cap_init(struct xhci_hcd *xhci); int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup); From ff9a09b3e09c7b794b56f2f5858f5ce42ba46cb3 Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Tue, 19 Aug 2025 15:58:44 +0300 Subject: [PATCH 0551/1307] usb: xhci: fix host not responding after suspend and resume Partially revert commit e1db856bd288 ("usb: xhci: remove '0' write to write-1-to-clear register") because the patch cleared the Interrupt Pending bit during interrupt enabling and disabling. The Interrupt Pending bit should only be cleared when the driver has handled the interrupt. Ideally, all interrupts should be handled before disabling the interrupt; consequently, no interrupt should be pending when enabling the interrupt. For this reason, keep the debug message informing if an interrupt is still pending when an interrupt is disabled. Because the Interrupt Pending bit is write-1-to-clear, writing '0' to it ensures that the state does not change. Link: https://lore.kernel.org/linux-usb/20250818231103.672ec7ed@foxbook Fixes: e1db856bd288 ("usb: xhci: remove '0' write to write-1-to-clear register") Closes: https://bbs.archlinux.org/viewtopic.php?id=307641 cc: stable@vger.kernel.org # 6.16+ Signed-off-by: Niklas Neronin Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250819125844.2042452-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0e03691f03bf..742c23826e17 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -309,6 +309,7 @@ int xhci_enable_interrupter(struct xhci_interrupter *ir) return -EINVAL; iman = readl(&ir->ir_set->iman); + iman &= ~IMAN_IP; iman |= IMAN_IE; writel(iman, &ir->ir_set->iman); @@ -325,6 +326,7 @@ int xhci_disable_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir) return -EINVAL; iman = readl(&ir->ir_set->iman); + iman &= ~IMAN_IP; iman &= ~IMAN_IE; writel(iman, &ir->ir_set->iman); From 658a1c8e0a66d0777e0e37a11ba19f27a81e77f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Pi=C3=B3rkowski?= Date: Mon, 11 Aug 2025 12:43:57 +0200 Subject: [PATCH 0552/1307] drm/xe: Assign ioctl xe file handler to vm in xe_vm_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In several code paths, such as xe_pt_create(), the vm->xef field is used to determine whether a VM originates from userspace or the kernel. Previously, this handler was only assigned in xe_vm_create_ioctl(), after the VM was created by xe_vm_create(). However, xe_vm_create() triggers page table creation, and that function assumes vm->xef should be already set. This could lead to incorrect origin detection. To fix this problem and ensure consistency in the initialization of the VM object, let's move the assignment of this handler to xe_vm_create. v2: - take reference to the xe file object only when xef is not NULL - release the reference to the xe file object on the error path (Matthew) Fixes: 7f387e6012b6 ("drm/xe: add XE_BO_FLAG_PINNED_LATE_RESTORE") Signed-off-by: Piotr Piórkowski Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250811104358.2064150-2-piotr.piorkowski@intel.com Signed-off-by: Michał Winiarski (cherry picked from commit 9337166fa1d80f7bb7c7d3a8f901f21c348c0f2a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_pxp_submit.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 11 ++++++----- drivers/gpu/drm/xe/xe_vm.h | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7d20ac4bb633..84f412fd3c5d 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -408,7 +408,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) /* Special layout, prepared below.. */ vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | - XE_VM_FLAG_SET_TILE_ID(tile)); + XE_VM_FLAG_SET_TILE_ID(tile), NULL); if (IS_ERR(vm)) return ERR_CAST(vm); diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c index d92ec0f515b0..ca95f2a4d4ef 100644 --- a/drivers/gpu/drm/xe/xe_pxp_submit.c +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c @@ -101,7 +101,7 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, xe_assert(xe, hwe); /* PXP instructions must be issued from PPGTT */ - vm = xe_vm_create(xe, XE_VM_FLAG_GSC); + vm = xe_vm_create(xe, XE_VM_FLAG_GSC, NULL); if (IS_ERR(vm)) return PTR_ERR(vm); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2035604121e6..5bff317e335a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1640,7 +1640,7 @@ static void xe_vm_free_scratch(struct xe_vm *vm) } } -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; struct xe_vm *vm; @@ -1661,9 +1661,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->xe = xe; vm->size = 1ull << xe->info.va_bits; - vm->flags = flags; + if (xef) + vm->xef = xe_file_get(xef); /** * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be * manipulated under the PXP mutex. However, the PXP mutex can be taken @@ -1814,6 +1815,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); + if (vm->xef) + xe_file_put(vm->xef); kfree(vm); if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); @@ -2097,7 +2100,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; - vm = xe_vm_create(xe, flags); + vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) return PTR_ERR(vm); @@ -2113,8 +2116,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, vm->usm.asid = asid; } - vm->xef = xe_file_get(xef); - /* Record BO memory for VM pagetable created against client */ for_each_tile(tile, xe, id) if (vm->pt_root[id]) diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 3475a118f666..2f213737c7e5 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -26,7 +26,7 @@ struct xe_sync_entry; struct xe_svm_range; struct drm_exec; -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef); struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); From 11cd7a5c21db020b8001aedcae27bd3fa9e1e901 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Aug 2025 12:40:41 +0300 Subject: [PATCH 0553/1307] regulator: tps65219: regulator: tps65219: Fix error codes in probe() There is a copy and paste error and we accidentally use "PTR_ERR(rdev)" instead of "error". The "rdev" pointer is valid at this point. Also there is no need to print the error code in the error message because dev_err_probe() already prints that. So clean up the error message a bit. Fixes: 38c9f98db20a ("regulator: tps65219: Add support for TPS65215 Regulator IRQs") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aKRGmVdbvT1HBvm8@stanley.mountain Signed-off-by: Mark Brown --- drivers/regulator/tps65219-regulator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c index 5e67fdc88f49..d77ca486879f 100644 --- a/drivers/regulator/tps65219-regulator.c +++ b/drivers/regulator/tps65219-regulator.c @@ -454,9 +454,9 @@ static int tps65219_regulator_probe(struct platform_device *pdev) irq_type->irq_name, irq_data); if (error) - return dev_err_probe(tps->dev, PTR_ERR(rdev), - "Failed to request %s IRQ %d: %d\n", - irq_type->irq_name, irq, error); + return dev_err_probe(tps->dev, error, + "Failed to request %s IRQ %d\n", + irq_type->irq_name, irq); } for (i = 0; i < pmic->dev_irq_size; ++i) { @@ -477,9 +477,9 @@ static int tps65219_regulator_probe(struct platform_device *pdev) irq_type->irq_name, irq_data); if (error) - return dev_err_probe(tps->dev, PTR_ERR(rdev), - "Failed to request %s IRQ %d: %d\n", - irq_type->irq_name, irq, error); + return dev_err_probe(tps->dev, error, + "Failed to request %s IRQ %d\n", + irq_type->irq_name, irq); } return 0; From 0ddfb62f5d018edcb571a3d8ea30ad5332cf2a69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Aug 2025 15:38:14 -0400 Subject: [PATCH 0554/1307] fix the softlockups in attach_recursive_mnt() In case when we mounting something on top of a large stack of overmounts, all of them being peers of each other, we get quadratic time by the depth of overmount stack. Easily fixed by doing commit_tree() before reparenting the overmount; simplifies commit_tree() as well - it doesn't need to skip the already mounted stuff that had been reparented on top of the new mounts. Since we are holding mount_lock through both reparenting and call of commit_tree(), the order does not matter from the mount hash point of view. Reported-by: "Lai, Yi" Tested-by: "Lai, Yi" Reviewed-by: Christian Brauner Fixes: 663206854f02 "copy_tree(): don't link the mounts via mnt_list" Signed-off-by: Al Viro --- fs/namespace.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index ddfd4457d338..1c97f93d1865 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1197,10 +1197,7 @@ static void commit_tree(struct mount *mnt) if (!mnt_ns_attached(mnt)) { for (struct mount *m = mnt; m; m = next_mnt(m, mnt)) - if (unlikely(mnt_ns_attached(m))) - m = skip_mnt_tree(m); - else - mnt_add_to_ns(n, m); + mnt_add_to_ns(n, m); n->nr_mounts += n->pending_mounts; n->pending_mounts = 0; } @@ -2704,6 +2701,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, lock_mnt_tree(child); q = __lookup_mnt(&child->mnt_parent->mnt, child->mnt_mountpoint); + commit_tree(child); if (q) { struct mountpoint *mp = root.mp; struct mount *r = child; @@ -2713,7 +2711,6 @@ static int attach_recursive_mnt(struct mount *source_mnt, mp = shorter; mnt_change_mountpoint(r, mp, q); } - commit_tree(child); } unpin_mountpoint(&root); unlock_mount_hash(); From da025cdb97a23c1916d8491925b878f3e1de0bca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Aug 2025 23:32:26 -0400 Subject: [PATCH 0555/1307] propagate_umount(): only surviving overmounts should be reparented ... as the comments in reparent() clearly say. As it is, we reparent *all* overmounts of the mounts being taken out, including those that are taken out themselves. It's not only a potentially massive slowdown (on a pathological setup we might end up with O(N^2) time for N mounts being kicked out), it can end up with incorrect ->overmount in the surviving mounts. Fixes: f0d0ba19985d "Rewrite of propagate_umount()" Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/pnode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/pnode.c b/fs/pnode.c index 81f7599bdac4..1c789f88b3d2 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -637,10 +637,11 @@ void propagate_umount(struct list_head *set) } // now to_umount consists of all acceptable candidates - // deal with reparenting of remaining overmounts on those + // deal with reparenting of surviving overmounts on those list_for_each_entry(m, &to_umount, mnt_list) { - if (m->overmount) - reparent(m->overmount); + struct mount *over = m->overmount; + if (over && !will_be_unmounted(over)) + reparent(over); } // and fold them into the set From cffd0441872e7f6b1fce5e78fb1c99187a291330 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Aug 2025 01:44:31 -0400 Subject: [PATCH 0556/1307] use uniform permission checks for all mount propagation changes do_change_type() and do_set_group() are operating on different aspects of the same thing - propagation graph. The latter asks for mounts involved to be mounted in namespace(s) the caller has CAP_SYS_ADMIN for. The former is a mess - originally it didn't even check that mount *is* mounted. That got fixed, but the resulting check turns out to be too strict for userland - in effect, we check that mount is in our namespace, having already checked that we have CAP_SYS_ADMIN there. What we really need (in both cases) is * only touch mounts that are mounted. That's a must-have constraint - data corruption happens if it get violated. * don't allow to mess with a namespace unless you already have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns). That's an equivalent of what do_set_group() does; let's extract that into a helper (may_change_propagation()) and use it in both do_set_group() and do_change_type(). Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts" Acked-by: Andrei Vagin Reviewed-by: Pavel Tikhomirov Tested-by: Pavel Tikhomirov Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1c97f93d1865..88db58061919 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2859,6 +2859,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) return attach_recursive_mnt(mnt, p, mp); } +static int may_change_propagation(const struct mount *m) +{ + struct mnt_namespace *ns = m->mnt_ns; + + // it must be mounted in some namespace + if (IS_ERR_OR_NULL(ns)) // is_mounted() + return -EINVAL; + // and the caller must be admin in userns of that namespace + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + /* * Sanity check the flags to change_mnt_propagation. */ @@ -2895,10 +2908,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); - if (!check_mnt(mnt)) { - err = -EINVAL; + err = may_change_propagation(mnt); + if (err) goto out_unlock; - } + if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -3344,18 +3357,11 @@ static int do_set_group(struct path *from_path, struct path *to_path) namespace_lock(); - err = -EINVAL; - /* To and From must be mounted */ - if (!is_mounted(&from->mnt)) + err = may_change_propagation(from); + if (err) goto out; - if (!is_mounted(&to->mnt)) - goto out; - - err = -EPERM; - /* We should be allowed to modify mount namespaces of both mounts */ - if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN)) - goto out; - if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN)) + err = may_change_propagation(to); + if (err) goto out; err = -EINVAL; From fb924b7b8669503582e003dd7b7340ee49029801 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Aug 2025 15:23:08 -0400 Subject: [PATCH 0557/1307] change_mnt_propagation(): calculate propagation source only if we'll need it We only need it when mount in question was sending events downstream (then recepients need to switch to new master) or the mount is being turned into slave (then we need a new master for it). That wouldn't be a big deal, except that it causes quite a bit of work when umount_tree() is taking a large peer group out. Adding a trivial "don't bother calling propagation_source() unless we are going to use its results" logics improves the things quite a bit. We are still doing unnecessary work on bulk removals from propagation graph, but the full solution for that will have to wait for the next merge window. Fixes: 955336e204ab "do_make_slave(): choose new master sanely" Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/pnode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/pnode.c b/fs/pnode.c index 1c789f88b3d2..6f7d02f3fa98 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -111,7 +111,8 @@ void change_mnt_propagation(struct mount *mnt, int type) return; } if (IS_MNT_SHARED(mnt)) { - m = propagation_source(mnt); + if (type == MS_SLAVE || !hlist_empty(&mnt->mnt_slave_list)) + m = propagation_source(mnt); if (list_empty(&mnt->mnt_share)) { mnt_release_group_id(mnt); } else { From 453a6d2a68e54a483d67233c6e1e24c4095ee4be Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 19 Aug 2025 16:27:36 +0100 Subject: [PATCH 0558/1307] cifs: Fix oops due to uninitialised variable Fix smb3_init_transform_rq() to initialise buffer to NULL before calling netfs_alloc_folioq_buffer() as netfs assumes it can append to the buffer it is given. Setting it to NULL means it should start a fresh buffer, but the value is currently undefined. Fixes: a2906d3316fc ("cifs: Switch crypto buffer to use a folio_queue rather than an xarray") Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 3b251de874ec..94b1d7a395d5 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4496,7 +4496,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, for (int i = 1; i < num_rqst; i++) { struct smb_rqst *old = &old_rq[i - 1]; struct smb_rqst *new = &new_rq[i]; - struct folio_queue *buffer; + struct folio_queue *buffer = NULL; size_t size = iov_iter_count(&old->rq_iter); orig_len += smb_rqst_len(server, old); From 76d2e3890fb169168c73f2e4f8375c7cc24a765e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Aug 2025 07:25:20 -0700 Subject: [PATCH 0559/1307] NFS: Fix a race when updating an existing write After nfs_lock_and_join_requests() tests for whether the request is still attached to the mapping, nothing prevents a call to nfs_inode_remove_request() from succeeding until we actually lock the page group. The reason is that whoever called nfs_inode_remove_request() doesn't necessarily have a lock on the page group head. So in order to avoid races, let's take the page group lock earlier in nfs_lock_and_join_requests(), and hold it across the removal of the request in nfs_inode_remove_request(). Reported-by: Jeff Layton Tested-by: Joe Quanaim Tested-by: Andrew Steffen Reviewed-by: Jeff Layton Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 9 +++++---- fs/nfs/write.c | 29 ++++++++++------------------- include/linux/nfs_page.h | 1 + 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 11968dcb7243..6e69ce43a13f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -253,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req) nfs_page_clear_headlock(req); } -/* - * nfs_page_group_sync_on_bit_locked +/** + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set + * @req: request in page group + * @bit: PG_* bit that is used to sync page group * * must be called with page group lock held */ -static bool -nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) { struct nfs_page *head = req->wb_head; struct nfs_page *tmp; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fa5c41d0989a..8b7c04737967 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) } } -static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; } /** @@ -585,19 +575,18 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) } } + ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + /* Ensure that nobody removed the request before we locked it */ if (head != folio->private) { + nfs_page_group_unlock(head); nfs_unlock_and_release_request(head); goto retry; } - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) - goto out_unlock; - - ret = nfs_page_group_lock(head); - if (ret < 0) - goto out_unlock; + nfs_cancel_remove_inode(head, inode); /* lock each request in the page group */ for (subreq = head->wb_this_page; @@ -786,7 +775,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); - if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio->mapping; @@ -798,6 +788,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } spin_unlock(&mapping->i_private_lock); } + nfs_page_group_unlock(req); if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { atomic_long_dec(&nfsi->nrequests); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 169b4ae30ff4..9aed39abc94b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head, extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); From d41e327582e172f30e4e15f9124796a10fd1b0f9 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 19 Aug 2025 10:03:49 +0800 Subject: [PATCH 0560/1307] MAINTAINERS: i2c: Update i2c_hisi entry Because Yicong Yang will no longer work on i2c_hisi driver, update the maintainer information for i2c_hisi. Signed-off-by: Devyn Liu Acked-by: Yicong Yang Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250819020349.4027842-1-liudingyuan@h-partners.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index daf520a13bdf..9d1270ddcbff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11013,7 +11013,7 @@ F: Documentation/admin-guide/perf/hns3-pmu.rst F: drivers/perf/hisilicon/hns3_pmu.c HISILICON I2C CONTROLLER DRIVER -M: Yicong Yang +M: Devyn Liu L: linux-i2c@vger.kernel.org S: Maintained W: https://www.hisilicon.com From 57f312b955938fc4663f430cb57a71f2414f601b Mon Sep 17 00:00:00 2001 From: Alex Guo Date: Sun, 10 Aug 2025 20:05:13 +0200 Subject: [PATCH 0561/1307] i2c: rtl9300: Fix out-of-bounds bug in rtl9300_i2c_smbus_xfer The data->block[0] variable comes from user. Without proper check, the variable may be very large to cause an out-of-bounds bug. Fix this bug by checking the value of data->block[0] first. 1. commit 39244cc75482 ("i2c: ismt: Fix an out-of-bounds bug in ismt_access()") 2. commit 92fbb6d1296f ("i2c: xgene-slimpro: Fix out-of-bounds bug in xgene_slimpro_i2c_xfer()") Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Signed-off-by: Alex Guo Cc: # v6.13+ Reviewed-by: Chris Packham Tested-by: Chris Packham Reviewed-by: Wolfram Sang Signed-off-by: Sven Eckelmann Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250810-i2c-rtl9300-multi-byte-v5-1-cd9dca0db722@narfation.org --- drivers/i2c/busses/i2c-rtl9300.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index e064e8a4a1f0..568495720810 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -281,6 +281,10 @@ static int rtl9300_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned s ret = rtl9300_i2c_reg_addr_set(i2c, command, 1); if (ret) goto out_unlock; + if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) { + ret = -EINVAL; + goto out_unlock; + } ret = rtl9300_i2c_config_xfer(i2c, chan, addr, data->block[0]); if (ret) goto out_unlock; From d67b740b9edfa46310355e2b68050f79ebf05a4c Mon Sep 17 00:00:00 2001 From: Harshal Gohel Date: Sun, 10 Aug 2025 20:05:14 +0200 Subject: [PATCH 0562/1307] i2c: rtl9300: Fix multi-byte I2C write The RTL93xx I2C controller has 4 32 bit registers to store the bytes for the upcoming I2C transmission. The first byte is stored in the least-significant byte of the first register. And the last byte in the most significant byte of the last register. A map of the transferred bytes to their order in the registers is: reg 0: 0x04_03_02_01 reg 1: 0x08_07_06_05 reg 2: 0x0c_0b_0a_09 reg 3: 0x10_0f_0e_0d The i2c_read() function basically demonstrates how the hardware would pick up bytes from this register set. But the i2c_write() function was just pushing bytes one after another to the least significant byte of a register AFTER shifting the last one to the next more significant byte position. If you would then have tried to send a buffer with numbers 1-11 using i2c_write(), you would have ended up with following register content: reg 0: 0x01_02_03_04 reg 1: 0x05_06_07_08 reg 2: 0x00_09_0a_0b reg 3: 0x00_00_00_00 On the wire, you would then have seen: Sr Addr Wr [A] 04 A 03 A 02 A 01 A 08 A 07 A 06 A 05 A 0b A 0a A 09 A P But the correct data transmission was expected to be Sr Addr Wr [A] 01 A 02 A 03 A 04 A 05 A 06 A 07 A 08 A 09 A 0a A 0b A P Because of this multi-byte ordering problem, only single byte i2c_write() operations were executed correctly (on the wire). By shifting the byte directly to the correct end position in the register, it is possible to avoid this incorrect byte ordering and fix multi-byte transmissions. The second initialization (to 0) of vals was also be dropped because this array is initialized to 0 on the stack by using `= {};`. This makes the fix a lot more readable. Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Signed-off-by: Harshal Gohel Cc: # v6.13+ Co-developed-by: Sven Eckelmann Signed-off-by: Sven Eckelmann Reviewed-by: Chris Packham Tested-by: Chris Packham Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250810-i2c-rtl9300-multi-byte-v5-2-cd9dca0db722@narfation.org --- drivers/i2c/busses/i2c-rtl9300.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index 568495720810..4a538b266080 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -143,10 +143,10 @@ static int rtl9300_i2c_write(struct rtl9300_i2c *i2c, u8 *buf, int len) return -EIO; for (i = 0; i < len; i++) { - if (i % 4 == 0) - vals[i/4] = 0; - vals[i/4] <<= 8; - vals[i/4] |= buf[i]; + unsigned int shift = (i % 4) * 8; + unsigned int reg = i / 4; + + vals[reg] |= buf[i] << shift; } return regmap_bulk_write(i2c->regmap, i2c->reg_base + RTL9300_I2C_MST_DATA_WORD0, From ceee7776c010c5f09d30985c9e5223b363a6172a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 10 Aug 2025 20:05:15 +0200 Subject: [PATCH 0563/1307] i2c: rtl9300: Increase timeout for transfer polling The timeout for transfers was only set to 2ms. Because of this relatively low limit, 12-byte read operations to the frontend MCU of a RTL8239 POE PSE chip cluster was consistently resulting in a timeout. The original OpenWrt downstream driver [1] was not using any timeout limit at all. This is also possible by setting the timeout_us parameter of regmap_read_poll_timeout() to 0. But since the driver currently implements the ETIMEDOUT error, it is more sensible to increase the timeout in such a way that communication with the (quite common) Realtek I2C-connected POE management solution is possible. [1] https://git.openwrt.org/?p=openwrt/openwrt.git;a=blob;f=target/linux/realtek/files-6.12/drivers/i2c/busses/i2c-rtl9300.c;h=c4d973195ef39dc56d6207e665d279745525fcac#l202 Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Signed-off-by: Sven Eckelmann Cc: # v6.13+ Reviewed-by: Chris Packham Tested-by: Chris Packham Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250810-i2c-rtl9300-multi-byte-v5-3-cd9dca0db722@narfation.org --- drivers/i2c/busses/i2c-rtl9300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index 4a538b266080..4a282d57e2c1 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -175,7 +175,7 @@ static int rtl9300_i2c_execute_xfer(struct rtl9300_i2c *i2c, char read_write, return ret; ret = regmap_read_poll_timeout(i2c->regmap, i2c->reg_base + RTL9300_I2C_MST_CTRL1, - val, !(val & RTL9300_I2C_MST_CTRL1_I2C_TRIG), 100, 2000); + val, !(val & RTL9300_I2C_MST_CTRL1_I2C_TRIG), 100, 100000); if (ret) return ret; From 82b350dd8185ce790e61555c436f90b6501af23c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 10 Aug 2025 20:05:16 +0200 Subject: [PATCH 0564/1307] i2c: rtl9300: Add missing count byte for SMBus Block Ops The expected on-wire format of an SMBus Block Write is S Addr Wr [A] Comm [A] Count [A] Data [A] Data [A] ... [A] Data [A] P Everything starting from the Count byte is provided by the I2C subsystem in the array data->block. But the driver was skipping the Count byte (data->block[0]) when sending it to the RTL93xx I2C controller. Only the actual data could be seen on the wire: S Addr Wr [A] Comm [A] Data [A] Data [A] ... [A] Data [A] P This wire format is not SMBus Block Write compatible but matches the format of an I2C Block Write. Simply adding the count byte to the buffer for the I2C controller is enough to fix the transmission. This also affects read because the I2C controller must receive the count byte + $count * data bytes. Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Signed-off-by: Sven Eckelmann Cc: # v6.13+ Reviewed-by: Chris Packham Tested-by: Chris Packham Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250810-i2c-rtl9300-multi-byte-v5-4-cd9dca0db722@narfation.org --- drivers/i2c/busses/i2c-rtl9300.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index 4a282d57e2c1..cfafe089102a 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -285,15 +285,15 @@ static int rtl9300_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned s ret = -EINVAL; goto out_unlock; } - ret = rtl9300_i2c_config_xfer(i2c, chan, addr, data->block[0]); + ret = rtl9300_i2c_config_xfer(i2c, chan, addr, data->block[0] + 1); if (ret) goto out_unlock; if (read_write == I2C_SMBUS_WRITE) { - ret = rtl9300_i2c_write(i2c, &data->block[1], data->block[0]); + ret = rtl9300_i2c_write(i2c, &data->block[0], data->block[0] + 1); if (ret) goto out_unlock; } - len = data->block[0]; + len = data->block[0] + 1; break; default: From 63b17b653df30e90f95338083cb44c35d64bcae4 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 8 Aug 2025 20:18:02 +0000 Subject: [PATCH 0565/1307] kho: init new_physxa->phys_bits to fix lockdep Patch series "Several KHO Hotfixes". Three unrelated fixes for Kexec Handover. This patch (of 3): Lockdep shows the following warning: INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. [] dump_stack_lvl+0x66/0xa0 [] assign_lock_key+0x10c/0x120 [] register_lock_class+0xf4/0x2f0 [] __lock_acquire+0x7f/0x2c40 [] ? __pfx_hlock_conflict+0x10/0x10 [] ? native_flush_tlb_global+0x8e/0xa0 [] ? __flush_tlb_all+0x4e/0xa0 [] ? __kernel_map_pages+0x112/0x140 [] ? xa_load_or_alloc+0x67/0xe0 [] lock_acquire+0xe6/0x280 [] ? xa_load_or_alloc+0x67/0xe0 [] _raw_spin_lock+0x30/0x40 [] ? xa_load_or_alloc+0x67/0xe0 [] xa_load_or_alloc+0x67/0xe0 [] kho_preserve_folio+0x90/0x100 [] __kho_finalize+0xcf/0x400 [] kho_finalize+0x34/0x70 This is becase xa has its own lock, that is not initialized in xa_load_or_alloc. Modifiy __kho_preserve_order(), to properly call xa_init(&new_physxa->phys_bits); Link: https://lkml.kernel.org/r/20250808201804.772010-2-pasha.tatashin@soleen.com Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation") Signed-off-by: Pasha Tatashin Acked-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Arnd Bergmann Cc: Baoquan He Cc: Changyuan Lyu Cc: Coiby Xu Cc: Dave Vasilevsky Cc: Eric Biggers Cc: Kees Cook Cc: Pratyush Yadav Cc: Signed-off-by: Andrew Morton --- kernel/kexec_handover.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index e49743ae52c5..65145972d6d6 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -144,14 +144,34 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, unsigned int order) { struct kho_mem_phys_bits *bits; - struct kho_mem_phys *physxa; + struct kho_mem_phys *physxa, *new_physxa; const unsigned long pfn_high = pfn >> order; might_sleep(); - physxa = xa_load_or_alloc(&track->orders, order, sizeof(*physxa)); - if (IS_ERR(physxa)) - return PTR_ERR(physxa); + physxa = xa_load(&track->orders, order); + if (!physxa) { + int err; + + new_physxa = kzalloc(sizeof(*physxa), GFP_KERNEL); + if (!new_physxa) + return -ENOMEM; + + xa_init(&new_physxa->phys_bits); + physxa = xa_cmpxchg(&track->orders, order, NULL, new_physxa, + GFP_KERNEL); + + err = xa_err(physxa); + if (err || physxa) { + xa_destroy(&new_physxa->phys_bits); + kfree(new_physxa); + + if (err) + return err; + } else { + physxa = new_physxa; + } + } bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS, sizeof(*bits)); From 8b66ed2c3f42cc462e05704af6b94e6a7bad2f5e Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 8 Aug 2025 20:18:03 +0000 Subject: [PATCH 0566/1307] kho: mm: don't allow deferred struct page with KHO KHO uses struct pages for the preserved memory early in boot, however, with deferred struct page initialization, only a small portion of memory has properly initialized struct pages. This problem was detected where vmemmap is poisoned, and illegal flag combinations are detected. Don't allow them to be enabled together, and later we will have to teach KHO to work properly with deferred struct page init kernel feature. Link: https://lkml.kernel.org/r/20250808201804.772010-3-pasha.tatashin@soleen.com Fixes: 4e1d010e3bda ("kexec: add config option for KHO") Signed-off-by: Pasha Tatashin Acked-by: Mike Rapoport (Microsoft) Acked-by: Pratyush Yadav Cc: Alexander Graf Cc: Arnd Bergmann Cc: Baoquan He Cc: Changyuan Lyu Cc: Coiby Xu Cc: Dave Vasilevsky Cc: Eric Biggers Cc: Kees Cook Cc: Signed-off-by: Andrew Morton --- kernel/Kconfig.kexec | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 2ee603a98813..1224dd937df0 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -97,6 +97,7 @@ config KEXEC_JUMP config KEXEC_HANDOVER bool "kexec handover" depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE + depends on !DEFERRED_STRUCT_PAGE_INIT select MEMBLOCK_KHO_SCRATCH select KEXEC_FILE select DEBUG_FS From 44958f2025ed3f29fc3e93bb1f6c16121d7847ad Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 8 Aug 2025 20:18:04 +0000 Subject: [PATCH 0567/1307] kho: warn if KHO is disabled due to an error During boot scratch area is allocated based on command line parameters or auto calculated. However, scratch area may fail to allocate, and in that case KHO is disabled. Currently, no warning is printed that KHO is disabled, which makes it confusing for the end user to figure out why KHO is not available. Add the missing warning message. Link: https://lkml.kernel.org/r/20250808201804.772010-4-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Acked-by: Mike Rapoport (Microsoft) Acked-by: Pratyush Yadav Cc: Alexander Graf Cc: Arnd Bergmann Cc: Baoquan He Cc: Changyuan Lyu Cc: Coiby Xu Cc: Dave Vasilevsky Cc: Eric Biggers Cc: Kees Cook Cc: Signed-off-by: Andrew Morton --- kernel/kexec_handover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 65145972d6d6..ecd1ac210dbd 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -564,6 +564,7 @@ static void __init kho_reserve_scratch(void) err_free_scratch_desc: memblock_free(kho_scratch, kho_scratch_cnt * sizeof(*kho_scratch)); err_disable_kho: + pr_warn("Failed to reserve scratch area, disabling kexec handover\n"); kho_enable = false; } From b64700d41bdc4e9f82f1346c15a3678ebb91a89c Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 11 Aug 2025 23:37:40 +0100 Subject: [PATCH 0568/1307] squashfs: fix memory leak in squashfs_fill_super If sb_min_blocksize returns 0, squashfs_fill_super exits without freeing allocated memory (sb->s_fs_info). Fix this by moving the call to sb_min_blocksize to before memory is allocated. Link: https://lkml.kernel.org/r/20250811223740.110392-1-phillip@squashfs.org.uk Fixes: 734aa85390ea ("Squashfs: check return result of sb_min_blocksize") Signed-off-by: Phillip Lougher Reported-by: Scott GUO Closes: https://lore.kernel.org/all/20250811061921.3807353-1-scott_gzh@163.com Cc: Signed-off-by: Andrew Morton --- fs/squashfs/super.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 992ea0e37257..4465cf05603a 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -187,10 +187,15 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) unsigned short flags; unsigned int fragments; u64 lookup_table_start, xattr_id_table_start, next_table; - int err; + int err, devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); TRACE("Entered squashfs_fill_superblock\n"); + if (!devblksize) { + errorf(fc, "squashfs: unable to set blocksize\n"); + return -EINVAL; + } + sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); if (sb->s_fs_info == NULL) { ERROR("Failed to allocate squashfs_sb_info\n"); @@ -201,12 +206,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) msblk->panic_on_errors = (opts->errors == Opt_errors_panic); - msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); - if (!msblk->devblksize) { - errorf(fc, "squashfs: unable to set blocksize\n"); - return -EINVAL; - } - + msblk->devblksize = devblksize; msblk->devblksize_log2 = ffz(~msblk->devblksize); mutex_init(&msblk->meta_index_mutex); From dde30854bddfb5d69f30022b53c5955a41088b33 Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Thu, 31 Jul 2025 18:40:51 -0300 Subject: [PATCH 0569/1307] mm/debug_vm_pgtable: clear page table entries at destroy_args() The mm/debug_vm_pagetable test allocates manually page table entries for the tests it runs, using also its manually allocated mm_struct. That in itself is ok, but when it exits, at destroy_args() it fails to clear those entries with the *_clear functions. The problem is that leaves stale entries. If another process allocates an mm_struct with a pgd at the same address, it may end up running into the stale entry. This is happening in practice on a debug kernel with CONFIG_DEBUG_VM_PGTABLE=y, for example this is the output with some extra debugging I added (it prints a warning trace if pgtables_bytes goes negative, in addition to the warning at check_mm() function): [ 2.539353] debug_vm_pgtable: [get_random_vaddr ]: random_vaddr is 0x7ea247140000 [ 2.539366] kmem_cache info [ 2.539374] kmem_cachep 0x000000002ce82385 - freelist 0x0000000000000000 - offset 0x508 [ 2.539447] debug_vm_pgtable: [init_args ]: args->mm is 0x000000002267cc9e (...) [ 2.552800] WARNING: CPU: 5 PID: 116 at include/linux/mm.h:2841 free_pud_range+0x8bc/0x8d0 [ 2.552816] Modules linked in: [ 2.552843] CPU: 5 UID: 0 PID: 116 Comm: modprobe Not tainted 6.12.0-105.debug_vm2.el10.ppc64le+debug #1 VOLUNTARY [ 2.552859] Hardware name: IBM,9009-41A POWER9 (architected) 0x4e0202 0xf000005 of:IBM,FW910.00 (VL910_062) hv:phyp pSeries [ 2.552872] NIP: c0000000007eef3c LR: c0000000007eef30 CTR: c0000000003d8c90 [ 2.552885] REGS: c0000000622e73b0 TRAP: 0700 Not tainted (6.12.0-105.debug_vm2.el10.ppc64le+debug) [ 2.552899] MSR: 800000000282b033 CR: 24002822 XER: 0000000a [ 2.552954] CFAR: c0000000008f03f0 IRQMASK: 0 [ 2.552954] GPR00: c0000000007eef30 c0000000622e7650 c000000002b1ac00 0000000000000001 [ 2.552954] GPR04: 0000000000000008 0000000000000000 c0000000007eef30 ffffffffffffffff [ 2.552954] GPR08: 00000000ffff00f5 0000000000000001 0000000000000048 0000000000004000 [ 2.552954] GPR12: 00000003fa440000 c000000017ffa300 c0000000051d9f80 ffffffffffffffdb [ 2.552954] GPR16: 0000000000000000 0000000000000008 000000000000000a 60000000000000e0 [ 2.552954] GPR20: 4080000000000000 c0000000113af038 00007fffcf130000 0000700000000000 [ 2.552954] GPR24: c000000062a6a000 0000000000000001 8000000062a68000 0000000000000001 [ 2.552954] GPR28: 000000000000000a c000000062ebc600 0000000000002000 c000000062ebc760 [ 2.553170] NIP [c0000000007eef3c] free_pud_range+0x8bc/0x8d0 [ 2.553185] LR [c0000000007eef30] free_pud_range+0x8b0/0x8d0 [ 2.553199] Call Trace: [ 2.553207] [c0000000622e7650] [c0000000007eef30] free_pud_range+0x8b0/0x8d0 (unreliable) [ 2.553229] [c0000000622e7750] [c0000000007f40b4] free_pgd_range+0x284/0x3b0 [ 2.553248] [c0000000622e7800] [c0000000007f4630] free_pgtables+0x450/0x570 [ 2.553274] [c0000000622e78e0] [c0000000008161c0] exit_mmap+0x250/0x650 [ 2.553292] [c0000000622e7a30] [c0000000001b95b8] __mmput+0x98/0x290 [ 2.558344] [c0000000622e7a80] [c0000000001d1018] exit_mm+0x118/0x1b0 [ 2.558361] [c0000000622e7ac0] [c0000000001d141c] do_exit+0x2ec/0x870 [ 2.558376] [c0000000622e7b60] [c0000000001d1ca8] do_group_exit+0x88/0x150 [ 2.558391] [c0000000622e7bb0] [c0000000001d1db8] sys_exit_group+0x48/0x50 [ 2.558407] [c0000000622e7be0] [c00000000003d810] system_call_exception+0x1e0/0x4c0 [ 2.558423] [c0000000622e7e50] [c00000000000d05c] system_call_vectored_common+0x15c/0x2ec (...) [ 2.558892] ---[ end trace 0000000000000000 ]--- [ 2.559022] BUG: Bad rss-counter state mm:000000002267cc9e type:MM_ANONPAGES val:1 [ 2.559037] BUG: non-zero pgtables_bytes on freeing mm: -6144 Here the modprobe process ended up with an allocated mm_struct from the mm_struct slab that was used before by the debug_vm_pgtable test. That is not a problem, since the mm_struct is initialized again etc., however, if it ends up using the same pgd table, it bumps into the old stale entry when clearing/freeing the page table entries, so it tries to free an entry already gone (that one which was allocated by the debug_vm_pgtable test), which also explains the negative pgtables_bytes since it's accounting for not allocated entries in the current process. As far as I looked pgd_{alloc,free} etc. does not clear entries, and clearing of the entries is explicitly done in the free_pgtables-> free_pgd_range->free_p4d_range->free_pud_range->free_pmd_range-> free_pte_range path. However, the debug_vm_pgtable test does not call free_pgtables, since it allocates mm_struct and entries manually for its test and eg. not goes through page faults. So it also should clear manually the entries before exit at destroy_args(). This problem was noticed on a reboot X number of times test being done on a powerpc host, with a debug kernel with CONFIG_DEBUG_VM_PGTABLE enabled. Depends on the system, but on a 100 times reboot loop the problem could manifest once or twice, if a process ends up getting the right mm->pgd entry with the stale entries used by mm/debug_vm_pagetable. After using this patch, I couldn't reproduce/experience the problems anymore. I was able to reproduce the problem as well on latest upstream kernel (6.16). I also modified destroy_args() to use mmput() instead of mmdrop(), there is no reason to hold mm_users reference and not release the mm_struct entirely, and in the output above with my debugging prints I already had patched it to use mmput, it did not fix the problem, but helped in the debugging as well. Link: https://lkml.kernel.org/r/20250731214051.4115182-1-herton@redhat.com Fixes: 3c9b84f044a9 ("mm/debug_vm_pgtable: introduce struct pgtable_debug_args") Signed-off-by: Herton R. Krzesinski Cc: Anshuman Khandual Cc: Christophe Leroy Cc: Gavin Shan Cc: Gerald Schaefer Cc: Signed-off-by: Andrew Morton --- mm/debug_vm_pgtable.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index d19031f275a3..830107b6dd08 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -990,29 +990,34 @@ static void __init destroy_args(struct pgtable_debug_args *args) /* Free page table entries */ if (args->start_ptep) { + pmd_clear(args->pmdp); pte_free(args->mm, args->start_ptep); mm_dec_nr_ptes(args->mm); } if (args->start_pmdp) { + pud_clear(args->pudp); pmd_free(args->mm, args->start_pmdp); mm_dec_nr_pmds(args->mm); } if (args->start_pudp) { + p4d_clear(args->p4dp); pud_free(args->mm, args->start_pudp); mm_dec_nr_puds(args->mm); } - if (args->start_p4dp) + if (args->start_p4dp) { + pgd_clear(args->pgdp); p4d_free(args->mm, args->start_p4dp); + } /* Free vma and mm struct */ if (args->vma) vm_area_free(args->vma); if (args->mm) - mmdrop(args->mm); + mmput(args->mm); } static struct page * __init From 9a6a6a3191574a01dcf7a7d9385246d7bc8736bc Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 11 Aug 2025 06:26:54 +0100 Subject: [PATCH 0570/1307] tools/testing: add linux/args.h header and fix radix, VMA tests Commit 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") accidentally broke the radix tree, VMA userland tests by including linux/args.h which is not present in the tools/include directory. This patch copies this over and adds an #ifdef block to avoid duplicate __CONCAT declaration in conflict with system headers when we ultimately include this. Link: https://lkml.kernel.org/r/20250811052654.33286-1-lorenzo.stoakes@oracle.com Fixes: 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: John Hubbard Cc: Liam Howlett Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Dan Williams Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- tools/include/linux/args.h | 28 ++++++++++++++++++++++++++++ tools/testing/shared/linux/idr.h | 4 ++++ 2 files changed, 32 insertions(+) create mode 100644 tools/include/linux/args.h diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h new file mode 100644 index 000000000000..2e8e65d975c7 --- /dev/null +++ b/tools/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/tools/testing/shared/linux/idr.h b/tools/testing/shared/linux/idr.h index 4e342f2e37cf..676c5564e33f 100644 --- a/tools/testing/shared/linux/idr.h +++ b/tools/testing/shared/linux/idr.h @@ -1 +1,5 @@ +/* Avoid duplicate definitions due to system headers. */ +#ifdef __CONCAT +#undef __CONCAT +#endif #include "../../../../include/linux/idr.h" From 63f5dec16760f2cd7d3f9034d18fc1fa0d83652f Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Sun, 10 Aug 2025 21:42:01 +0900 Subject: [PATCH 0571/1307] mm/damon/core: fix commit_ops_filters by using correct nth function damos_commit_ops_filters() incorrectly uses damos_nth_filter() which iterates core_filters. As a result, performing a commit unintentionally corrupts ops_filters. Add damos_nth_ops_filter() which iterates ops_filters. Use this function to fix issues caused by wrong iteration. Link: https://lkml.kernel.org/r/20250810124201.15743-1-ekffu200098@gmail.com Fixes: 3607cc590f18 ("mm/damon/core: support committing ops_filters") # 6.15.x Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/damon/core.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 52a48c9316bc..467c2d78126f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -845,6 +845,18 @@ static struct damos_filter *damos_nth_filter(int n, struct damos *s) return NULL; } +static struct damos_filter *damos_nth_ops_filter(int n, struct damos *s) +{ + struct damos_filter *filter; + int i = 0; + + damos_for_each_ops_filter(filter, s) { + if (i++ == n) + return filter; + } + return NULL; +} + static void damos_commit_filter_arg( struct damos_filter *dst, struct damos_filter *src) { @@ -908,7 +920,7 @@ static int damos_commit_ops_filters(struct damos *dst, struct damos *src) int i = 0, j = 0; damos_for_each_ops_filter_safe(dst_filter, next, dst) { - src_filter = damos_nth_filter(i++, src); + src_filter = damos_nth_ops_filter(i++, src); if (src_filter) damos_commit_filter(dst_filter, src_filter); else From 7c91e0b91aaa3fa1f897efb06565af0ceb75195c Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 3 Aug 2025 12:11:21 +0100 Subject: [PATCH 0572/1307] mm/mremap: allow multi-VMA move when filesystem uses thp_get_unmapped_area The multi-VMA move functionality introduced in commit d23cb648e365 ("mm/mremap: permit mremap() move of multiple VMA") doesn't allow moves of file-backed mappings which specify a custom f_op->get_unmapped_area handler excepting hugetlb and shmem. We expand this to include thp_get_unmapped_area to support file-backed mappings for filesystems which use large folios. Additionally, when the first VMA in a range is not compatible with a multi-VMA move, instead of moving the first VMA and returning an error, this series results in us not moving anything and returning an error immediately. Examining this second change in detail: The semantics of multi-VMA moves in mremap() very clearly indicate that a failure can result in a partial move of VMAs. This is in line with other aggregate operations within the kernel, which share these semantics. There are two classes of failures we're concerned with - eligiblity for mutli-VMA move, and transient failures that would occur even if the user individually moved each VMA. The latter is due to out-of-memory conditions (which, given the allocations involved are small, would likely be fatal in any case), or hitting the mapping limit. Regardless of the cause, transient issues would be fatal anyway, so it isn't really material which VMAs succeeded at being moved or not. However with when it comes to multi-VMA move eligiblity, we face another issue - we must allow a single VMA to succeed regardless of this eligiblity (as, of course, it is not a multi-VMA move) - but we must then fail multi-VMA operations. The two means by which VMAs may fail the eligbility test are - the VMAs being UFFD-armed, or the VMA being file-backed and providing its own f_op->get_unmapped_area() helper (because this may result in MREMAP_FIXED being disregarded), excepting those known to correctly handle MREMAP_FIXED. It is therefore conceivable that a user could erroneously try to use this functionality in these instances, and would prefer to not perform any move at all should that occur. This series therefore avoids any move of subsequent VMAs should the first be multi-VMA move ineligble and the input span exceeds that of the first VMA. We also add detailed test logic to assert that multi VMA move with ineligible VMAs functions as expected. This patch (of 3): We currently restrict multi-VMA move to avoid filesystems or drivers which provide a custom f_op->get_unmapped_area handler unless it is known to correctly handle MREMAP_FIXED. We do this so we do not get unexpected result when moving from one area to another (for instance, if the handler would align things resulting in the moved VMAs having different gaps than the original mapping). More and more filesystems are moving to using large folios, and typically do so (in part) by setting f_op->get_unmapped_area to thp_get_unmapped_area. When mremap() invokes the file system's get_unmapped MREMAP_FIXED, it does so via get_unmapped_area(), called in vrm_set_new_addr(). In order to do so, it converts the MREMAP_FIXED flag to a MAP_FIXED flag and passes this to the unmapped area handler. The __get_unmapped_area() function (called by get_unmapped_area()) in turn invokes the filesystem or driver's f_op->get_unmapped_area() handler. Therefore this is a point at which thp_get_unmapped_area() may be called (also, this is the case for anonymous mappings where the size is huge page aligned). thp_get_unmapped_area() calls thp_get_unmapped_area_vmflags() and __thp_get_unmapped_area() in turn (falling back to mm_get_unmapped_area_vm_flags() which is known to handle MAP_FIXED correctly). The __thp_get_unmapped_area() function in turn does nothing to change the address hint, nor the MAP_FIXED flag, only adjusting alignment parameters. It hten calls mm_get_unmapped_area_vmflags(), and in turn arch-specific unmapped area functions, all of which honour MAP_FIXED correctly. Therefore, we can safely add thp_get_unmapped_area to the known-good handlers. Link: https://lkml.kernel.org/r/cover.1754218667.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/4f2542340c29c84d3d470b0c605e916b192f6c81.1754218667.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- mm/mremap.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index 9afa8cd524f5..eb37d1668770 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1620,7 +1620,7 @@ static void notify_uffd(struct vma_remap_struct *vrm, bool failed) static bool vma_multi_allowed(struct vm_area_struct *vma) { - struct file *file; + struct file *file = vma->vm_file; /* * We can't support moving multiple uffd VMAs as notify requires @@ -1633,15 +1633,17 @@ static bool vma_multi_allowed(struct vm_area_struct *vma) * Custom get unmapped area might result in MREMAP_FIXED not * being obeyed. */ - file = vma->vm_file; - if (file && !vma_is_shmem(vma) && !is_vm_hugetlb_page(vma)) { - const struct file_operations *fop = file->f_op; + if (!file || !file->f_op->get_unmapped_area) + return true; + /* Known good. */ + if (vma_is_shmem(vma)) + return true; + if (is_vm_hugetlb_page(vma)) + return true; + if (file->f_op->get_unmapped_area == thp_get_unmapped_area) + return true; - if (fop->get_unmapped_area) - return false; - } - - return true; + return false; } static int check_prep_vma(struct vma_remap_struct *vrm) From d5f416c7c36456676c2cf5ab98776db2e7601f27 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 3 Aug 2025 12:11:22 +0100 Subject: [PATCH 0573/1307] mm/mremap: catch invalid multi VMA moves earlier Previously, any attempt to solely move a VMA would require that the span specified reside within the span of that single VMA, with no gaps before or afterwards. After commit d23cb648e365 ("mm/mremap: permit mremap() move of multiple VMAs"), the multi VMA move permitted a gap to exist only after VMAs. This was done to provide maximum flexibility. However, We have consequently permitted this behaviour for the move of a single VMA including those not eligible for multi VMA move. The change introduced here means that we no longer permit non-eligible VMAs from being moved in this way. This is consistent, as it means all eligible VMA moves are treated the same, and all non-eligible moves are treated as they were before. This change does not break previous behaviour, which equally would have disallowed such a move (only in all cases). [lorenzo.stoakes@oracle.com: do not incorrectly reference invalid VMA in VM_WARN_ON_ONCE()] Link: https://lkml.kernel.org/r/b6dbda20-667e-4053-abae-8ed4fa84bb6c@lucifer.local Link: https://lkml.kernel.org/r/2b5aad5681573be85b5b8fac61399af6fb6b68b6.1754218667.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- mm/mremap.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index eb37d1668770..33b642076205 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1820,10 +1820,11 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) unsigned long start = vrm->addr; unsigned long end = vrm->addr + vrm->old_len; unsigned long new_addr = vrm->new_addr; - bool allowed = true, seen_vma = false; unsigned long target_addr = new_addr; unsigned long res = -EFAULT; unsigned long last_end; + bool seen_vma = false; + VMA_ITERATOR(vmi, current->mm, start); /* @@ -1836,9 +1837,7 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) unsigned long addr = max(vma->vm_start, start); unsigned long len = min(end, vma->vm_end) - addr; unsigned long offset, res_vma; - - if (!allowed) - return -EFAULT; + bool multi_allowed; /* No gap permitted at the start of the range. */ if (!seen_vma && start < vma->vm_start) @@ -1867,9 +1866,15 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) vrm->new_addr = target_addr + offset; vrm->old_len = vrm->new_len = len; - allowed = vma_multi_allowed(vma); - if (seen_vma && !allowed) - return -EFAULT; + multi_allowed = vma_multi_allowed(vma); + if (!multi_allowed) { + /* This is not the first VMA, abort immediately. */ + if (seen_vma) + return -EFAULT; + /* This is the first, but there are more, abort. */ + if (vma->vm_end < end) + return -EFAULT; + } res_vma = check_prep_vma(vrm); if (!res_vma) @@ -1878,7 +1883,7 @@ static unsigned long remap_move(struct vma_remap_struct *vrm) return res_vma; if (!seen_vma) { - VM_WARN_ON_ONCE(allowed && res_vma != new_addr); + VM_WARN_ON_ONCE(multi_allowed && res_vma != new_addr); res = res_vma; } From 742d3663a5775cb7b957f4ca2ddb4ccd26badb94 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 3 Aug 2025 12:11:23 +0100 Subject: [PATCH 0574/1307] selftests/mm: add test for invalid multi VMA operations We can use UFFD to easily assert invalid multi VMA moves, so do so, asserting expected behaviour when VMAs invalid for a multi VMA operation are encountered. We assert both that such operations are not permitted, and that we do not even attempt to move the first VMA under these circumstances. We also assert that we can still move a single VMA regardless. We then assert that a partial failure can occur if the invalid VMA appears later in the range of multiple VMAs, both at the very next VMA, and also at the end of the range. As part of this change, we are using the is_range_valid() helper more aggressively. Therefore, fix a bug where stale buffered data would hang around on success, causing subsequent calls to is_range_valid() to potentially give invalid results. We simply have to fflush() the stream on success to resolve this issue. Link: https://lkml.kernel.org/r/c4fb86dd5ba37610583ad5fc0e0c2306ddf318b9.1754218667.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 264 ++++++++++++++++++++++- 1 file changed, 261 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index fccf9e797a0c..5bd52a951cbd 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -5,10 +5,14 @@ #define _GNU_SOURCE #include +#include +#include #include #include #include +#include #include +#include #include #include @@ -168,6 +172,7 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, if (first_val <= start && second_val >= end) { success = true; + fflush(maps_fp); break; } } @@ -175,6 +180,15 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, return success; } +/* Check if [ptr, ptr + size) mapped in /proc/self/maps. */ +static bool is_ptr_mapped(FILE *maps_fp, void *ptr, unsigned long size) +{ + unsigned long start = (unsigned long)ptr; + unsigned long end = start + size; + + return is_range_mapped(maps_fp, start, end); +} + /* * Returns the start address of the mapping on success, else returns * NULL on failure. @@ -733,6 +747,249 @@ static void mremap_move_multiple_vmas_split(unsigned int pattern_seed, dont_unmap ? " [dontunnmap]" : ""); } +#ifdef __NR_userfaultfd +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, + unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int uffd, err, i; + void *res; + struct uffdio_api api = { + .api = UFFD_API, + .features = UFFD_EVENT_PAGEFAULT, + }; + + uffd = syscall(__NR_userfaultfd, O_NONBLOCK); + if (uffd == -1) { + err = errno; + perror("userfaultfd"); + if (err == EPERM) { + ksft_test_result_skip("%s - missing uffd", test_name); + return; + } + success = false; + goto out; + } + if (ioctl(uffd, UFFDIO_API, &api)) { + perror("ioctl UFFDIO_API"); + success = false; + goto out_close_uffd; + } + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + + tgt_ptr = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 6 8 10 offset in buffer + * |*| |*| |*| |*| |*| + * |*| |*| |*| |*| |*| + * + * Additionally, register each with UFFD. + */ + for (i = 0; i < 10; i += 2) { + void *unmap_ptr = &ptr[(i + 1) * page_size]; + unsigned long start = (unsigned long)&ptr[i * page_size]; + struct uffdio_register reg = { + .range = { + .start = start, + .len = page_size, + }, + .mode = UFFDIO_REGISTER_MODE_MISSING, + }; + + if (ioctl(uffd, UFFDIO_REGISTER, ®) == -1) { + perror("ioctl UFFDIO_REGISTER"); + success = false; + goto out_unmap; + } + if (munmap(unmap_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range which is invalid for multi VMA move. + * + * This will fail, and no VMA should be moved, as we check this ahead of + * time. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, + "Invalid uffd-armed VMA at start of multi range moved\n"); + success = false; + goto out_unmap; + } + + /* + * Now try to move a single VMA, this should succeed as not multi VMA + * move. + */ + res = mremap(ptr, page_size, page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + if (res == MAP_FAILED) { + perror("mremap single invalid-multi VMA"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and remap a non-uffd registered (therefore, multi VMA + * move valid) VMA at the start of ptr range. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + res = mmap(ptr, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + + /* + * Now try to move the entire range, we should succeed in moving the + * first VMA, but no others, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (!is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, "Valid VMA not moved\n"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and map valid VMA at start of ptr range, and replace + * all existing multi-move invalid VMAs, except the last, with valid + * multi-move VMAs. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + if (munmap(ptr, size - 2 * page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + for (i = 0; i < 8; i += 2) { + res = mmap(&ptr[i * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range, we should succeed in moving all but + * the last VMA, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + + for (i = 0; i < 10; i += 2) { + bool is_mapped = is_ptr_mapped(maps_fp, + &tgt_ptr[i * page_size], page_size); + + if (i < 8 && !is_mapped) { + fprintf(stderr, "Valid VMA not moved at %d\n", i); + success = false; + goto out_unmap; + } else if (i == 8 && is_mapped) { + fprintf(stderr, "Invalid VMA moved at %d\n", i); + success = false; + goto out_unmap; + } + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out_close_uffd: + close(uffd); +out: + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); +} +#else +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + + ksft_test_result_skip("%s - missing uffd", test_name); +} +#endif /* __NR_userfaultfd */ + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -1074,7 +1331,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 8; + int num_misc_tests = 9; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -1197,8 +1454,6 @@ int main(int argc, char **argv) mremap_expand_merge(maps_fp, page_size); mremap_expand_merge_offset(maps_fp, page_size); - fclose(maps_fp); - mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); mremap_shrink_multiple_vmas(page_size, /* inplace= */true); @@ -1207,6 +1462,9 @@ int main(int argc, char **argv) mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true); + mremap_move_multi_invalid_vmas(maps_fp, page_size); + + fclose(maps_fp); if (run_perf_tests) { ksft_print_msg("\n%s\n", From 8b26f0a8b4f220c1ec410e1350e8594911ec5742 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 12 Aug 2025 18:02:14 +0000 Subject: [PATCH 0575/1307] .mailmap: add entry for Easwar Hariharan Map my old, obsolete work email address to my current one. Link: https://lkml.kernel.org/r/20250812180218.92755-1-easwar.hariharan@linux.microsoft.com Signed-off-by: Easwar Hariharan Cc: Carlos Bilbao Cc: Jarkko Sakkinen Cc: Shannon Nelson Cc: Dmitry Baryshkov Cc: Hans Verkuil Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index d9fa1b555116..a124aeed52a2 100644 --- a/.mailmap +++ b/.mailmap @@ -226,6 +226,8 @@ Domen Puncer Douglas Gilbert Drew Fustini +Easwar Hariharan +Easwar Hariharan Ed L. Cashin Elliot Berman Enric Balletbo i Serra From 0cc2a4880ced1486acc34898cd85edc6ee109c4c Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Tue, 12 Aug 2025 23:00:46 +0900 Subject: [PATCH 0576/1307] selftests/damon: fix selftests by installing drgn related script drgn_dump_damon_status is not installed during kselftest setup. It can break other tests which depend on drgn_dump_damon_status. Install drgn_dump_damon_status files to fix broken test. Link: https://lkml.kernel.org/r/20250812140046.660486-1-ekffu200098@gmail.com Fixes: f3e8e1e51362 ("selftests/damon: add drgn script for extracting damon status") Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: Alexandre Ghiti Cc: Honggyu Kim Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 5b230deb19e8..9a3499827d4b 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -4,6 +4,7 @@ TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _damon_sysfs.py +TEST_FILES += drgn_dump_damon_status.py # functionality tests TEST_PROGS += sysfs.sh From 808471ddb0fa785559c3e7aee59be20a13b46ef5 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 13 Aug 2025 15:04:55 +0900 Subject: [PATCH 0577/1307] iov_iter: iterate_folioq: fix handling of offset >= folio size It's apparently possible to get an iov advanced all the way up to the end of the current page we're looking at, e.g. (gdb) p *iter $24 = {iter_type = 4 '\004', nofault = false, data_source = false, iov_offset = 4096, {__ubuf_iovec = { iov_base = 0xffff88800f5bc000, iov_len = 655}, {{__iov = 0xffff88800f5bc000, kvec = 0xffff88800f5bc000, bvec = 0xffff88800f5bc000, folioq = 0xffff88800f5bc000, xarray = 0xffff88800f5bc000, ubuf = 0xffff88800f5bc000}, count = 655}}, {nr_segs = 2, folioq_slot = 2 '\002', xarray_start = 2}} Where iov_offset is 4k with 4k-sized folios This should have been fine because we're only in the 2nd slot and there's another one after this, but iterate_folioq should not try to map a folio that skips the whole size, and more importantly part here does not end up zero (because 'PAGE_SIZE - skip % PAGE_SIZE' ends up PAGE_SIZE and not zero..), so skip forward to the "advance to next folio" code Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-0-a0ffad2b665a@codewreck.org Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-1-a0ffad2b665a@codewreck.org Signed-off-by: Dominique Martinet Fixes: db0aa2e9566f ("mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios") Reported-by: Maximilian Bosch Reported-by: Ryan Lahfa Reported-by: Christian Theune Reported-by: Arnout Engelen Link: https://lkml.kernel.org/r/D4LHHUNLG79Y.12PI0X6BEHRHW@mbosch.me/ Acked-by: David Howells Cc: Al Viro Cc: Christian Brauner Cc: Matthew Wilcox (Oracle) Cc: [6.12+] Signed-off-by: Andrew Morton --- include/linux/iov_iter.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index c4aa58032faf..f9a17fbbd398 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -160,7 +160,7 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 do { struct folio *folio = folioq_folio(folioq, slot); - size_t part, remain, consumed; + size_t part, remain = 0, consumed; size_t fsize; void *base; @@ -168,14 +168,16 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 break; fsize = folioq_folio_size(folioq, slot); - base = kmap_local_folio(folio, skip); - part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); - remain = step(base, progress, part, priv, priv2); - kunmap_local(base); - consumed = part - remain; - len -= consumed; - progress += consumed; - skip += consumed; + if (skip < fsize) { + base = kmap_local_folio(folio, skip); + part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + } if (skip >= fsize) { skip = 0; slot++; From c7b70f76db0703a93b39a85b540d5b3911e166e8 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 14 Aug 2025 07:54:54 +0000 Subject: [PATCH 0578/1307] mm: rust: add page.rs to MEMORY MANAGEMENT - RUST The page.rs file currently isn't included anywhere, and I think it's a good fit for the MEMORY MANAGEMENT - RUST entry. The file was originally added for use by Rust Binder, but I believe there is also work to use it in the upcoming scatterlist abstractions. Link: https://lkml.kernel.org/r/20250814075454.1596482-1-aliceryhl@google.com Signed-off-by: Alice Ryhl Acked-by: Danilo Krummrich Cc: Danilo Krummrich Cc: Liam Howlett Cc: "Uladzislau Rezki (Sony)" Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index daf520a13bdf..afd7663c367f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16248,8 +16248,10 @@ S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: rust/helpers/mm.c +F: rust/helpers/page.c F: rust/kernel/mm.rs F: rust/kernel/mm/ +F: rust/kernel/page.rs MEMORY MAPPING M: Andrew Morton From 44958000badae5488d91431de194f747acc5dcac Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Fri, 15 Aug 2025 14:59:14 -0700 Subject: [PATCH 0579/1307] MAINTAINERS: mark MGLRU as maintained The three folks being added here are actively working on MGLRU within Google, so we can review patches for this feature and plan to contribute some improvements / extensions to it on an ongoing basis. With three of us we may have some hope filling Yu Zhao's shoes, since he has moved on to other projects these days. Link: https://lkml.kernel.org/r/20250815215914.3671925-1-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Cc: Lorenzo Stoakes Cc: Axel Rasmussen Cc: Shakeel Butt Cc: Wei Xu Cc: Yuanchu Xie Cc: Yu Zhao Signed-off-by: Andrew Morton --- MAINTAINERS | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index afd7663c367f..64e6a25b3aa2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16058,6 +16058,23 @@ F: mm/mempolicy.c F: mm/migrate.c F: mm/migrate_device.c +MEMORY MANAGEMENT - MGLRU (MULTI-GEN LRU) +M: Andrew Morton +M: Axel Rasmussen +M: Yuanchu Xie +R: Wei Xu +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/admin-guide/mm/multigen_lru.rst +F: Documentation/mm/multigen_lru.rst +F: include/linux/mm_inline.h +F: include/linux/mmzone.h +F: mm/swap.c +F: mm/vmscan.c +F: mm/workingset.c + MEMORY MANAGEMENT - MISC M: Andrew Morton M: David Hildenbrand From 2e6053fea379806269c4f7f5e36b523c9c0fb35c Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Fri, 15 Aug 2025 15:32:09 +0800 Subject: [PATCH 0580/1307] mm/memory-failure: fix infinite UCE for VM_PFNMAP pfn When memory_failure() is called for a already hwpoisoned pfn, kill_accessing_process() will be called to kill current task. However, if the vma of the accessing vaddr is VM_PFNMAP, walk_page_range() will skip the vma in walk_page_test() and return 0. Before commit aaf99ac2ceb7 ("mm/hwpoison: do not send SIGBUS to processes with recovered clean pages"), kill_accessing_process() will return EFAULT. For x86, the current task will be killed in kill_me_maybe(). However, after this commit, kill_accessing_process() simplies return 0, that means UCE is handled properly, but it doesn't actually. In such case, the user task will trigger UCE infinitely. To fix it, add .test_walk callback for hwpoison_walk_ops to scan all vmas. Link: https://lkml.kernel.org/r/20250815073209.1984582-1-tujinjiang@huawei.com Fixes: aaf99ac2ceb7 ("mm/hwpoison: do not send SIGBUS to processes with recovered clean pages") Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Acked-by: Miaohe Lin Reviewed-by: Jane Chu Cc: Kefeng Wang Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Shuai Xue Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e2e685b971bb..fc30ca4804bf 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -853,9 +853,17 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, #define hwpoison_hugetlb_range NULL #endif +static int hwpoison_test_walk(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + /* We also want to consider pages mapped into VM_PFNMAP. */ + return 0; +} + static const struct mm_walk_ops hwpoison_walk_ops = { .pmd_entry = hwpoison_pte_range, .hugetlb_entry = hwpoison_hugetlb_range, + .test_walk = hwpoison_test_walk, .walk_lock = PGWALK_RDLOCK, }; From b3dee902b6c26b7d8031a4df19753e27dcfcba01 Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Sat, 16 Aug 2025 10:51:16 +0900 Subject: [PATCH 0581/1307] mm/damon/core: fix damos_commit_filter not changing allow Current damos_commit_filter() does not persist the `allow' value of the filter. As a result, changing the `allow' value of a filter and committing doesn't change the `allow' value. Add the missing `allow' value update, so committing the filter persistently changes the `allow' value well. Link: https://lkml.kernel.org/r/20250816015116.194589-1-ekffu200098@gmail.com Fixes: fe6d7fdd6249 ("mm/damon/core: add damos_filter->allow field") Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: [6.14.x] Signed-off-by: Andrew Morton --- mm/damon/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 467c2d78126f..70eff5cbe6ee 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -883,6 +883,7 @@ static void damos_commit_filter( { dst->type = src->type; dst->matching = src->matching; + dst->allow = src->allow; damos_commit_filter_arg(dst, src); } From 053c8ebe74f7e1f4c072e59428da80b9d78bc4b7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 17 Aug 2025 23:17:59 +0800 Subject: [PATCH 0582/1307] mm/migrate: fix NULL movable_ops if CONFIG_ZSMALLOC=m After commit 84caf98838a3e5f4bdb34 ("mm: stop storing migration_ops in page->mapping") we get such an error message if CONFIG_ZSMALLOC=m: WARNING: CPU: 3 PID: 42 at mm/migrate.c:142 isolate_movable_ops_page+0xa8/0x1c0 CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT pc 9000000000540bd8 ra 9000000000540b84 tp 9000000100420000 sp 9000000100423a60 a0 9000000100193a80 a1 000000000000000c a2 000000000000001b a3 ffffffffffffffff a4 ffffffffffffffff a5 0000000000000267 a6 0000000000000000 a7 9000000100423ae0 t0 00000000000000f1 t1 00000000000000f6 t2 0000000000000000 t3 0000000000000001 t4 ffffff00010eb834 t5 0000000000000040 t6 900000010c89d380 t7 90000000023fcc70 t8 0000000000000018 u0 0000000000000000 s9 ffffff00010eb800 s0 ffffff00010eb800 s1 000000000000000c s2 0000000000043ae0 s3 0000800000000000 s4 900000000219cc40 s5 0000000000000000 s6 ffffff00010eb800 s7 0000000000000001 s8 90000000025b4000 ra: 9000000000540b84 isolate_movable_ops_page+0x54/0x1c0 ERA: 9000000000540bd8 isolate_movable_ops_page+0xa8/0x1c0 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT Stack : 90000000021fd000 0000000000000000 9000000000247720 9000000100420000 90000001004236a0 90000001004236a8 0000000000000000 90000001004237e8 90000001004237e0 90000001004237e0 9000000100423550 0000000000000001 0000000000000001 90000001004236a8 725a84864a19e2d9 90000000023fcc58 9000000100420000 90000000024c6848 9000000002416848 0000000000000001 0000000000000000 000000000000000a 0000000007fe0000 ffffff00010eb800 0000000000000000 90000000021fd000 0000000000000000 900000000205cf30 000000000000008e 0000000000000009 ffffff00010eb800 0000000000000001 90000000025b4000 0000000000000000 900000000024773c 00007ffff103d748 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d ... Call Trace: [<900000000024773c>] show_stack+0x5c/0x190 [<90000000002415e0>] dump_stack_lvl+0x70/0x9c [<90000000004abe6c>] isolate_migratepages_block+0x3bc/0x16e0 [<90000000004af408>] compact_zone+0x558/0x1000 [<90000000004b0068>] compact_node+0xa8/0x1e0 [<90000000004b0aa4>] kcompactd+0x394/0x410 [<90000000002b3c98>] kthread+0x128/0x140 [<9000000001779148>] ret_from_kernel_thread+0x28/0xc0 [<9000000000245528>] ret_from_kernel_thread_asm+0x10/0x88 The reason is that defined(CONFIG_ZSMALLOC) evaluates to 1 only when CONFIG_ZSMALLOC=y, we should use IS_ENABLED(CONFIG_ZSMALLOC) instead. But when I use IS_ENABLED(CONFIG_ZSMALLOC), page_movable_ops() cannot access zsmalloc_mops because zsmalloc_mops is in a module. To solve this problem, we define a set_movable_ops() interface to register and unregister offline_movable_ops / zsmalloc_movable_ops in mm/migrate.c, and call them at mm/balloon_compaction.c & mm/zsmalloc.c. Since offline_movable_ops / zsmalloc_movable_ops are always accessible, all #ifdef / #endif are removed in page_movable_ops(). Link: https://lkml.kernel.org/r/20250817151759.2525174-1-chenhuacai@loongson.cn Fixes: 84caf98838a3 ("mm: stop storing migration_ops in page->mapping") Signed-off-by: Huacai Chen Acked-by: Zi Yan Acked-by: David Hildenbrand Cc: Huacai Chen Cc: Huacai Chen Cc: Lorenzo Stoakes Cc: "Michael S. Tsirkin" Cc: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/migrate.h | 5 +++++ mm/balloon_compaction.c | 6 ++++++ mm/migrate.c | 38 ++++++++++++++++++++++++++++++-------- mm/zsmalloc.c | 10 ++++++++++ 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index acadd41e0b5c..9009e27b5f44 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -79,6 +79,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); +int set_movable_ops(const struct movable_operations *ops, enum pagetype type); #else @@ -100,6 +101,10 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } +static inline int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + return -ENOSYS; +} #endif /* CONFIG_MIGRATION */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 2a4a649805c1..03c5dbabb156 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -254,4 +254,10 @@ const struct movable_operations balloon_mops = { .putback_page = balloon_page_putback, }; +static int __init balloon_init(void) +{ + return set_movable_ops(&balloon_mops, PGTY_offline); +} +core_initcall(balloon_init); + #endif /* CONFIG_BALLOON_COMPACTION */ diff --git a/mm/migrate.c b/mm/migrate.c index 425401b2d4e1..9e5ef39ce73a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -43,8 +43,6 @@ #include #include #include -#include -#include #include @@ -53,6 +51,33 @@ #include "internal.h" #include "swap.h" +static const struct movable_operations *offline_movable_ops; +static const struct movable_operations *zsmalloc_movable_ops; + +int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + /* + * We only allow for selected types and don't handle concurrent + * registration attempts yet. + */ + switch (type) { + case PGTY_offline: + if (offline_movable_ops && ops) + return -EBUSY; + offline_movable_ops = ops; + break; + case PGTY_zsmalloc: + if (zsmalloc_movable_ops && ops) + return -EBUSY; + zsmalloc_movable_ops = ops; + break; + default: + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(set_movable_ops); + static const struct movable_operations *page_movable_ops(struct page *page) { VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page); @@ -62,15 +87,12 @@ static const struct movable_operations *page_movable_ops(struct page *page) * it as movable, the page type must be sticky until the page gets freed * back to the buddy. */ -#ifdef CONFIG_BALLOON_COMPACTION if (PageOffline(page)) /* Only balloon compaction sets PageOffline pages movable. */ - return &balloon_mops; -#endif /* CONFIG_BALLOON_COMPACTION */ -#if defined(CONFIG_ZSMALLOC) && defined(CONFIG_COMPACTION) + return offline_movable_ops; if (PageZsmalloc(page)) - return &zsmalloc_mops; -#endif /* defined(CONFIG_ZSMALLOC) && defined(CONFIG_COMPACTION) */ + return zsmalloc_movable_ops; + return NULL; } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 2c5e56a65354..805a10b41266 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2246,8 +2246,15 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool); static int __init zs_init(void) { + int rc __maybe_unused; + #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); +#endif +#ifdef CONFIG_COMPACTION + rc = set_movable_ops(&zsmalloc_mops, PGTY_zsmalloc); + if (rc) + return rc; #endif zs_stat_init(); return 0; @@ -2257,6 +2264,9 @@ static void __exit zs_exit(void) { #ifdef CONFIG_ZPOOL zpool_unregister_driver(&zs_zpool_driver); +#endif +#ifdef CONFIG_COMPACTION + set_movable_ops(NULL, PGTY_zsmalloc); #endif zs_stat_exit(); } From ba1dd7ac735d604249f1e614d997dc66b30844ab Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 16 Aug 2025 09:55:59 -0700 Subject: [PATCH 0583/1307] mm/damon/sysfs-schemes: put damos dests dir after removing its files damon_sysfs_scheme_rm_dirs() puts dests directory kobject before removing its internal files. Sincee putting the kobject frees its container struct, and the internal files removal accesses the container, use-after-free happens. Fix it by putting the reference _after_ removing the files. Link: https://lkml.kernel.org/r/20250816165559.2601-1-sj@kernel.org Fixes: 2cd0bf85a203 ("mm/damon/sysfs-schemes: implement DAMOS action destinations directory") Signed-off-by: SeongJae Park Reported-by: Alexandre Ghiti Closes: https://lore.kernel.org/2d39a734-320d-4341-8f8a-4019eec2dbf2@ghiti.fr Tested-by: Alexandre Ghiti Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 74056bcd6a2c..6536f16006c9 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2158,8 +2158,8 @@ static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme) { damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); kobject_put(&scheme->access_pattern->kobj); - kobject_put(&scheme->dests->kobj); damos_sysfs_dests_rm_dirs(scheme->dests); + kobject_put(&scheme->dests->kobj); damon_sysfs_quotas_rm_dirs(scheme->quotas); kobject_put(&scheme->quotas->kobj); kobject_put(&scheme->watermarks->kobj); From 772e5b4a5e8360743645b9a466842d16092c4f94 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Aug 2025 19:53:58 +0200 Subject: [PATCH 0584/1307] mm/mremap: fix WARN with uffd that has remap events disabled Registering userfaultd on a VMA that spans at least one PMD and then mremap()'ing that VMA can trigger a WARN when recovering from a failed page table move due to a page table allocation error. The code ends up doing the right thing (recurse, avoiding moving actual page tables), but triggering that WARN is unpleasant: WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_normal_pmd mm/mremap.c:357 [inline] WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_pgt_entry mm/mremap.c:595 [inline] WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_page_tables+0x3832/0x44a0 mm/mremap.c:852 Modules linked in: CPU: 2 UID: 0 PID: 6133 Comm: syz.0.19 Not tainted 6.17.0-rc1-syzkaller-00004-g53e760d89498 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:move_normal_pmd mm/mremap.c:357 [inline] RIP: 0010:move_pgt_entry mm/mremap.c:595 [inline] RIP: 0010:move_page_tables+0x3832/0x44a0 mm/mremap.c:852 Code: ... RSP: 0018:ffffc900037a76d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000032930007 RCX: ffffffff820c6645 RDX: ffff88802e56a440 RSI: ffffffff820c7201 RDI: 0000000000000007 RBP: ffff888037728fc0 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000032930007 R11: 0000000000000000 R12: 0000000000000000 R13: ffffc900037a79a8 R14: 0000000000000001 R15: dffffc0000000000 FS: 000055556316a500(0000) GS:ffff8880d68bc000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b30863fff CR3: 0000000050171000 CR4: 0000000000352ef0 Call Trace: copy_vma_and_data+0x468/0x790 mm/mremap.c:1215 move_vma+0x548/0x1780 mm/mremap.c:1282 mremap_to+0x1b7/0x450 mm/mremap.c:1406 do_mremap+0xfad/0x1f80 mm/mremap.c:1921 __do_sys_mremap+0x119/0x170 mm/mremap.c:1977 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x4c0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f00d0b8ebe9 Code: ... RSP: 002b:00007ffe5ea5ee98 EFLAGS: 00000246 ORIG_RAX: 0000000000000019 RAX: ffffffffffffffda RBX: 00007f00d0db5fa0 RCX: 00007f00d0b8ebe9 RDX: 0000000000400000 RSI: 0000000000c00000 RDI: 0000200000000000 RBP: 00007ffe5ea5eef0 R08: 0000200000c00000 R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000002 R13: 00007f00d0db5fa0 R14: 00007f00d0db5fa0 R15: 0000000000000005 The underlying issue is that we recurse during the original page table move, but not during the recovery move. Fix it by checking for both VMAs and performing the check before the pmd_none() sanity check. Add a new helper where we perform+document that check for the PMD and PUD level. Thanks to Harry for bisecting. Link: https://lkml.kernel.org/r/20250818175358.1184757-1-david@redhat.com Fixes: 0cef0bb836e3 ("mm: clear uffd-wp PTE/PMD state on mremap()") Signed-off-by: David Hildenbrand Reported-by: syzbot+4d9a13f0797c46a29e42@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/689bb893.050a0220.7f033.013a.GAE@google.com Tested-by: Harry Yoo Cc: "Liam R. Howlett" Cc: Lorenzo Stoakes Cc: Vlastimil Babka Cc: Jann Horn Cc: Pedro Falcato Cc: Signed-off-by: Andrew Morton --- mm/mremap.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index 33b642076205..e618a706aff5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -323,6 +323,25 @@ static inline bool arch_supports_page_table_move(void) } #endif +static inline bool uffd_supports_page_table_move(struct pagetable_move_control *pmc) +{ + /* + * If we are moving a VMA that has uffd-wp registered but with + * remap events disabled (new VMA will not be registered with uffd), we + * need to ensure that the uffd-wp state is cleared from all pgtables. + * This means recursing into lower page tables in move_page_tables(). + * + * We might get called with VMAs reversed when recovering from a + * failed page table move. In that case, the + * "old"-but-actually-"originally new" VMA during recovery will not have + * a uffd context. Recursing into lower page tables during the original + * move but not during the recovery move will cause trouble, because we + * run into already-existing page tables. So check both VMAs. + */ + return !vma_has_uffd_without_event_remap(pmc->old) && + !vma_has_uffd_without_event_remap(pmc->new); +} + #ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct pagetable_move_control *pmc, pmd_t *old_pmd, pmd_t *new_pmd) @@ -335,6 +354,8 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, if (!arch_supports_page_table_move()) return false; + if (!uffd_supports_page_table_move(pmc)) + return false; /* * The destination pmd shouldn't be established, free_pgtables() * should have released it. @@ -361,15 +382,6 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, if (WARN_ON_ONCE(!pmd_none(*new_pmd))) return false; - /* If this pmd belongs to a uffd vma with remap events disabled, we need - * to ensure that the uffd-wp state is cleared from all pgtables. This - * means recursing into lower page tables in move_page_tables(), and we - * can reuse the existing code if we simply treat the entry as "not - * moved". - */ - if (vma_has_uffd_without_event_remap(vma)) - return false; - /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. @@ -418,6 +430,8 @@ static bool move_normal_pud(struct pagetable_move_control *pmc, if (!arch_supports_page_table_move()) return false; + if (!uffd_supports_page_table_move(pmc)) + return false; /* * The destination pud shouldn't be established, free_pgtables() * should have released it. @@ -425,15 +439,6 @@ static bool move_normal_pud(struct pagetable_move_control *pmc, if (WARN_ON_ONCE(!pud_none(*new_pud))) return false; - /* If this pud belongs to a uffd vma with remap events disabled, we need - * to ensure that the uffd-wp state is cleared from all pgtables. This - * means recursing into lower page tables in move_page_tables(), and we - * can reuse the existing code if we simply treat the entry as "not - * moved". - */ - if (vma_has_uffd_without_event_remap(vma)) - return false; - /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. From 4a73a36cb704813f588af13d9842d0ba5a185758 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 14 Aug 2025 17:42:14 +0200 Subject: [PATCH 0585/1307] cdc_ncm: Flag Intel OEM version of Fibocom L850-GL as WWAN This lets NetworkManager/ModemManager know that this is a modem and needs to be connected first. Signed-off-by: Lubomir Rintel Link: https://patch.msgid.link/20250814154214.250103-1-lkundrak@v3.sk Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index ea0e5e276cd6..5d123df0a866 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -2087,6 +2087,13 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long)&wwan_info, }, + /* Intel modem (label from OEM reads Fibocom L850-GL) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x8087, 0x095a, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, + }, + /* DisplayLink docking stations */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, From 26ebba25e210116053609f4c7ee701bffa7ebd7d Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Fri, 8 Aug 2025 12:05:26 +0800 Subject: [PATCH 0586/1307] tools/latency-collector: Check pkg-config install The tool pkg-config used to check libtraceevent and libtracefs, if not installed, it will report the libs not found, even though they have already been installed. Before: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel libtracefs is missing. Please install libtracefs-dev/libtracefs-devel After: Makefile.config:10: *** Error: pkg-config needed by libtraceevent/libtracefs is missing on this system, please install it. Link: https://lore.kernel.org/20250808040527.2036023-1-chen.dylane@linux.dev Fixes: 9d56c88e5225 ("tools/tracing: Use tools/build makefiles on latency-collector") Signed-off-by: Tao Chen Signed-off-by: Steven Rostedt (Google) --- tools/tracing/latency/Makefile.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/tracing/latency/Makefile.config b/tools/tracing/latency/Makefile.config index 0fe6b50f029b..6efa13e3ca93 100644 --- a/tools/tracing/latency/Makefile.config +++ b/tools/tracing/latency/Makefile.config @@ -1,7 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-only +include $(srctree)/tools/scripts/utilities.mak + STOP_ERROR := +ifndef ($(NO_LIBTRACEEVENT),1) + ifeq ($(call get-executable,$(PKG_CONFIG)),) + $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it) + endif +endif + define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) From 7b128f1d53dcaa324d4aa05d821a6bf4a7b203e7 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Fri, 8 Aug 2025 12:05:27 +0800 Subject: [PATCH 0587/1307] rtla: Check pkg-config install The tool pkg-config used to check libtraceevent and libtracefs, if not installed, it will report the libs not found, even though they have already been installed. Before: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel libtracefs is missing. Please install libtracefs-dev/libtracefs-devel After: Makefile.config:10: *** Error: pkg-config needed by libtraceevent/libtracefs is missing on this system, please install it. Link: https://lore.kernel.org/20250808040527.2036023-2-chen.dylane@linux.dev Fixes: 01474dc706ca ("tools/rtla: Use tools/build makefiles to build rtla") Signed-off-by: Tao Chen Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/Makefile.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config index 5f2231d8d626..07ff5e8f3006 100644 --- a/tools/tracing/rtla/Makefile.config +++ b/tools/tracing/rtla/Makefile.config @@ -1,10 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-only +include $(srctree)/tools/scripts/utilities.mak + STOP_ERROR := LIBTRACEEVENT_MIN_VERSION = 1.5 LIBTRACEFS_MIN_VERSION = 1.6 +ifndef ($(NO_LIBTRACEEVENT),1) + ifeq ($(call get-executable,$(PKG_CONFIG)),) + $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it) + endif +endif + define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) From f179f5bc158f07693b74c264f8933c8b0f07503f Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 15 Aug 2025 10:53:17 -0300 Subject: [PATCH 0588/1307] net/sched: sch_dualpi2: Run prob update timer in softirq to avoid deadlock When a user creates a dualpi2 qdisc it automatically sets a timer. This timer will run constantly and update the qdisc's probability field. The issue is that the timer acquires the qdisc root lock and runs in hardirq. The qdisc root lock is also acquired in dev.c whenever a packet arrives for this qdisc. Since the dualpi2 timer callback runs in hardirq, it may interrupt the packet processing running in softirq. If that happens and it runs on the same CPU, it will acquire the same lock and cause a deadlock. The following splat shows up when running a kernel compiled with lock debugging: [ +0.000224] WARNING: inconsistent lock state [ +0.000224] 6.16.0+ #10 Not tainted [ +0.000169] -------------------------------- [ +0.000029] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ +0.000000] ping/156 [HC0[0]:SC0[2]:HE1:SE0] takes: [ +0.000000] ffff897841242110 (&sch->root_lock_key){?.-.}-{3:3}, at: __dev_queue_xmit+0x86d/0x1140 [ +0.000000] {IN-HARDIRQ-W} state was registered at: [ +0.000000] lock_acquire.part.0+0xb6/0x220 [ +0.000000] _raw_spin_lock+0x31/0x80 [ +0.000000] dualpi2_timer+0x6f/0x270 [ +0.000000] __hrtimer_run_queues+0x1c5/0x360 [ +0.000000] hrtimer_interrupt+0x115/0x260 [ +0.000000] __sysvec_apic_timer_interrupt+0x6d/0x1a0 [ +0.000000] sysvec_apic_timer_interrupt+0x6e/0x80 [ +0.000000] asm_sysvec_apic_timer_interrupt+0x1a/0x20 [ +0.000000] pv_native_safe_halt+0xf/0x20 [ +0.000000] default_idle+0x9/0x10 [ +0.000000] default_idle_call+0x7e/0x1e0 [ +0.000000] do_idle+0x1e8/0x250 [ +0.000000] cpu_startup_entry+0x29/0x30 [ +0.000000] rest_init+0x151/0x160 [ +0.000000] start_kernel+0x6f3/0x700 [ +0.000000] x86_64_start_reservations+0x24/0x30 [ +0.000000] x86_64_start_kernel+0xc8/0xd0 [ +0.000000] common_startup_64+0x13e/0x148 [ +0.000000] irq event stamp: 6884 [ +0.000000] hardirqs last enabled at (6883): [] neigh_resolve_output+0x223/0x270 [ +0.000000] hardirqs last disabled at (6882): [] neigh_resolve_output+0x1e8/0x270 [ +0.000000] softirqs last enabled at (6880): [] neigh_resolve_output+0x1db/0x270 [ +0.000000] softirqs last disabled at (6884): [] __dev_queue_xmit+0x73/0x1140 [ +0.000000] other info that might help us debug this: [ +0.000000] Possible unsafe locking scenario: [ +0.000000] CPU0 [ +0.000000] ---- [ +0.000000] lock(&sch->root_lock_key); [ +0.000000] [ +0.000000] lock(&sch->root_lock_key); [ +0.000000] *** DEADLOCK *** [ +0.000000] 4 locks held by ping/156: [ +0.000000] #0: ffff897842332e08 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0x41e/0xf40 [ +0.000000] #1: ffffffffa816f880 (rcu_read_lock){....}-{1:3}, at: ip_output+0x2c/0x190 [ +0.000000] #2: ffffffffa816f880 (rcu_read_lock){....}-{1:3}, at: ip_finish_output2+0xad/0x950 [ +0.000000] #3: ffffffffa816f840 (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit+0x73/0x1140 I am able to reproduce it consistently when running the following: tc qdisc add dev lo handle 1: root dualpi2 ping -f 127.0.0.1 To fix it, make the timer run in softirq. Fixes: 320d031ad6e4 ("sched: Struct definition and parsing of dualpi2 qdisc") Reviewed-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250815135317.664993-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_dualpi2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c index 845375ebd4ea..4b975feb52b1 100644 --- a/net/sched/sch_dualpi2.c +++ b/net/sched/sch_dualpi2.c @@ -927,7 +927,8 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, q->sch = sch; dualpi2_reset_default(sch); - hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED_SOFT); if (opt && nla_len(opt)) { err = dualpi2_change(sch, opt, extack); @@ -937,7 +938,7 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, } hrtimer_start(&q->pi2_timer, next_pi2_timeout(q), - HRTIMER_MODE_ABS_PINNED); + HRTIMER_MODE_ABS_PINNED_SOFT); return 0; } From bc1a59cff9f797bfbf8f3104507584d89e9ecf2e Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 18 Aug 2025 10:10:29 +0200 Subject: [PATCH 0589/1307] phy: mscc: Fix timestamping for vsc8584 There was a problem when we received frames and the frames were timestamped. The driver is configured to store the nanosecond part of the timestmap in the ptp reserved bits and it would take the second part by reading the LTC. The problem is that when reading the LTC we are in atomic context and to read the second part will go over mdio bus which might sleep, so we get an error. The fix consists in actually put all the frames in a queue and start the aux work and in that work to read the LTC and then calculate the full received time. Fixes: 7d272e63e0979d ("net: phy: mscc: timestamping and PHC support") Signed-off-by: Horatiu Vultur Reviewed-by: Vadim Fedorenko Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250818081029.1300780-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc.h | 12 ++++++++ drivers/net/phy/mscc/mscc_main.c | 12 ++++++++ drivers/net/phy/mscc/mscc_ptp.c | 49 ++++++++++++++++++++++++-------- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 6a3d8a754eb8..58c6d47fbe04 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -362,6 +362,13 @@ struct vsc85xx_hw_stat { u16 mask; }; +struct vsc8531_skb_cb { + u32 ns; +}; + +#define VSC8531_SKB_CB(skb) \ + ((struct vsc8531_skb_cb *)((skb)->cb)) + struct vsc8531_private { int rate_magic; u16 supp_led_modes; @@ -410,6 +417,11 @@ struct vsc8531_private { */ struct mutex ts_lock; struct mutex phc_lock; + + /* list of skbs that were received and need timestamp information but it + * didn't received it yet + */ + struct sk_buff_head rx_skbs_list; }; /* Shared structure between the PHYs of the same package. diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 7ed6522fb0ef..f1c9ce351ab4 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2335,6 +2335,13 @@ static int vsc85xx_probe(struct phy_device *phydev) return vsc85xx_dt_led_modes_get(phydev, default_mode); } +static void vsc85xx_remove(struct phy_device *phydev) +{ + struct vsc8531_private *priv = phydev->priv; + + skb_queue_purge(&priv->rx_skbs_list); +} + /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { @@ -2589,6 +2596,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8574_probe, .set_wol = &vsc85xx_wol_set, .get_wol = &vsc85xx_wol_get, @@ -2614,6 +2622,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8574_probe, .set_wol = &vsc85xx_wol_set, .get_wol = &vsc85xx_wol_get, @@ -2639,6 +2648,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8584_probe, .get_tunable = &vsc85xx_get_tunable, .set_tunable = &vsc85xx_set_tunable, @@ -2662,6 +2672,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8584_probe, .get_tunable = &vsc85xx_get_tunable, .set_tunable = &vsc85xx_set_tunable, @@ -2685,6 +2696,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .remove = &vsc85xx_remove, .probe = &vsc8584_probe, .get_tunable = &vsc85xx_get_tunable, .set_tunable = &vsc85xx_set_tunable, diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index 275706de5847..de6c7312e8f2 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1194,9 +1194,7 @@ static bool vsc85xx_rxtstamp(struct mii_timestamper *mii_ts, { struct vsc8531_private *vsc8531 = container_of(mii_ts, struct vsc8531_private, mii_ts); - struct skb_shared_hwtstamps *shhwtstamps = NULL; struct vsc85xx_ptphdr *ptphdr; - struct timespec64 ts; unsigned long ns; if (!vsc8531->ptp->configured) @@ -1206,27 +1204,52 @@ static bool vsc85xx_rxtstamp(struct mii_timestamper *mii_ts, type == PTP_CLASS_NONE) return false; - vsc85xx_gettime(&vsc8531->ptp->caps, &ts); - ptphdr = get_ptp_header_rx(skb, vsc8531->ptp->rx_filter); if (!ptphdr) return false; - shhwtstamps = skb_hwtstamps(skb); - memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); - ns = ntohl(ptphdr->rsrvd2); - /* nsec is in reserved field */ - if (ts.tv_nsec < ns) - ts.tv_sec--; + VSC8531_SKB_CB(skb)->ns = ns; + skb_queue_tail(&vsc8531->rx_skbs_list, skb); - shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, ns); - netif_rx(skb); + ptp_schedule_worker(vsc8531->ptp->ptp_clock, 0); return true; } +static long vsc85xx_do_aux_work(struct ptp_clock_info *info) +{ + struct vsc85xx_ptp *ptp = container_of(info, struct vsc85xx_ptp, caps); + struct skb_shared_hwtstamps *shhwtstamps = NULL; + struct phy_device *phydev = ptp->phydev; + struct vsc8531_private *priv = phydev->priv; + struct sk_buff_head received; + struct sk_buff *rx_skb; + struct timespec64 ts; + unsigned long flags; + + __skb_queue_head_init(&received); + spin_lock_irqsave(&priv->rx_skbs_list.lock, flags); + skb_queue_splice_tail_init(&priv->rx_skbs_list, &received); + spin_unlock_irqrestore(&priv->rx_skbs_list.lock, flags); + + vsc85xx_gettime(info, &ts); + while ((rx_skb = __skb_dequeue(&received)) != NULL) { + shhwtstamps = skb_hwtstamps(rx_skb); + memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); + + if (ts.tv_nsec < VSC8531_SKB_CB(rx_skb)->ns) + ts.tv_sec--; + + shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, + VSC8531_SKB_CB(rx_skb)->ns); + netif_rx(rx_skb); + } + + return -1; +} + static const struct ptp_clock_info vsc85xx_clk_caps = { .owner = THIS_MODULE, .name = "VSC85xx timer", @@ -1240,6 +1263,7 @@ static const struct ptp_clock_info vsc85xx_clk_caps = { .adjfine = &vsc85xx_adjfine, .gettime64 = &vsc85xx_gettime, .settime64 = &vsc85xx_settime, + .do_aux_work = &vsc85xx_do_aux_work, }; static struct vsc8531_private *vsc8584_base_priv(struct phy_device *phydev) @@ -1567,6 +1591,7 @@ int vsc8584_ptp_probe(struct phy_device *phydev) mutex_init(&vsc8531->phc_lock); mutex_init(&vsc8531->ts_lock); + skb_queue_head_init(&vsc8531->rx_skbs_list); /* Retrieve the shared load/save GPIO. Request it as non exclusive as * the same GPIO can be requested by all the PHYs of the same package. From 24ef2f53c07f273bad99173e27ee88d44d135b1c Mon Sep 17 00:00:00 2001 From: Yuichiro Tsuji Date: Mon, 18 Aug 2025 17:45:07 +0900 Subject: [PATCH 0590/1307] net: usb: asix_devices: Fix PHY address mask in MDIO bus initialization Syzbot reported shift-out-of-bounds exception on MDIO bus initialization. The PHY address should be masked to 5 bits (0-31). Without this mask, invalid PHY addresses could be used, potentially causing issues with MDIO bus operations. Fix this by masking the PHY address with 0x1f (31 decimal) to ensure it stays within the valid range. Fixes: 4faff70959d5 ("net: usb: asix_devices: add phy_mask for ax88772 mdio bus") Reported-by: syzbot+20537064367a0f98d597@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=20537064367a0f98d597 Tested-by: syzbot+20537064367a0f98d597@syzkaller.appspotmail.com Signed-off-by: Yuichiro Tsuji Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250818084541.1958-1-yuichtsu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index d9f5942ccc44..792ddda1ad49 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -676,7 +676,7 @@ static int ax88772_init_mdio(struct usbnet *dev) priv->mdio->read = &asix_mdio_bus_read; priv->mdio->write = &asix_mdio_bus_write; priv->mdio->name = "Asix MDIO Bus"; - priv->mdio->phy_mask = ~(BIT(priv->phy_addr) | BIT(AX_EMBD_PHY_ADDR)); + priv->mdio->phy_mask = ~(BIT(priv->phy_addr & 0x1f) | BIT(AX_EMBD_PHY_ADDR)); /* mii bus name is usb-- */ snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); From 75a9a46d67f46d608205888f9b34e315c1786345 Mon Sep 17 00:00:00 2001 From: Jordan Rhee Date: Mon, 18 Aug 2025 14:12:45 -0700 Subject: [PATCH 0591/1307] gve: prevent ethtool ops after shutdown A crash can occur if an ethtool operation is invoked after shutdown() is called. shutdown() is invoked during system shutdown to stop DMA operations without performing expensive deallocations. It is discouraged to unregister the netdev in this path, so the device may still be visible to userspace and kernel helpers. In gve, shutdown() tears down most internal data structures. If an ethtool operation is dispatched after shutdown(), it will dereference freed or NULL pointers, leading to a kernel panic. While graceful shutdown normally quiesces userspace before invoking the reboot syscall, forced shutdowns (as observed on GCP VMs) can still trigger this path. Fix by calling netif_device_detach() in shutdown(). This marks the device as detached so the ethtool ioctl handler will skip dispatching operations to the driver. Fixes: 974365e51861 ("gve: Implement suspend/resume/shutdown") Signed-off-by: Jordan Rhee Signed-off-by: Jeroen de Borst Link: https://patch.msgid.link/20250818211245.1156919-1-jeroendb@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 1f411d7c4373..1be1b1ef31ee 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2870,6 +2870,8 @@ static void gve_shutdown(struct pci_dev *pdev) struct gve_priv *priv = netdev_priv(netdev); bool was_up = netif_running(priv->dev); + netif_device_detach(netdev); + rtnl_lock(); netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { From 6d6714bf0c4e8eb2274081b4b023dfa01581c123 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 15 Aug 2025 10:48:03 +0000 Subject: [PATCH 0592/1307] net: stmmac: thead: Enable TX clock before MAC initialization The clk_tx_i clock must be supplied to the MAC for successful initialization. On TH1520 SoC, the clock is provided by an internal divider configured through GMAC_PLLCLK_DIV register when using RGMII interface. However, currently we don't setup the divider before initialization of the MAC, resulting in DMA reset failures if the bootloader/firmware doesn't enable the divider, [ 7.839601] thead-dwmac ffe7060000.ethernet eth0: Register MEM_TYPE_PAGE_POOL RxQ-0 [ 7.938338] thead-dwmac ffe7060000.ethernet eth0: PHY [stmmac-0:02] driver [RTL8211F Gigabit Ethernet] (irq=POLL) [ 8.160746] thead-dwmac ffe7060000.ethernet eth0: Failed to reset the dma [ 8.170118] thead-dwmac ffe7060000.ethernet eth0: stmmac_hw_setup: DMA engine initialization failed [ 8.179384] thead-dwmac ffe7060000.ethernet eth0: __stmmac_open: Hw setup failed Let's simply write GMAC_PLLCLK_DIV_EN to GMAC_PLLCLK_DIV to enable the divider before MAC initialization. Note that for reconfiguring the divisor, the divider must be disabled first and re-enabled later to make sure the new divisor take effect. The exact clock rate doesn't affect MAC's initialization according to my test. It's set to the speed required by RGMII when the linkspeed is 1Gbps and could be reclocked later after link is up if necessary. Fixes: 33a1a01e3afa ("net: stmmac: Add glue layer for T-HEAD TH1520 SoC") Signed-off-by: Yao Zi Reviewed-by: Drew Fustini Link: https://patch.msgid.link/20250815104803.55294-1-ziyao@disroot.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c index f2946bea0bc2..6c6c49e4b66f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c @@ -152,7 +152,7 @@ static int thead_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, static int thead_dwmac_enable_clk(struct plat_stmmacenet_data *plat) { struct thead_dwmac *dwmac = plat->bsp_priv; - u32 reg; + u32 reg, div; switch (plat->mac_interface) { case PHY_INTERFACE_MODE_MII: @@ -164,6 +164,13 @@ static int thead_dwmac_enable_clk(struct plat_stmmacenet_data *plat) case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: /* use pll */ + div = clk_get_rate(plat->stmmac_clk) / rgmii_clock(SPEED_1000); + reg = FIELD_PREP(GMAC_PLLCLK_DIV_EN, 1) | + FIELD_PREP(GMAC_PLLCLK_DIV_NUM, div); + + writel(0, dwmac->apb_base + GMAC_PLLCLK_DIV); + writel(reg, dwmac->apb_base + GMAC_PLLCLK_DIV); + writel(GMAC_GTXCLK_SEL_PLL, dwmac->apb_base + GMAC_GTXCLK_SEL); reg = GMAC_TX_CLK_EN | GMAC_TX_CLK_N_EN | GMAC_TX_CLK_OUT_EN | GMAC_RX_CLK_EN | GMAC_RX_CLK_N_EN; From d9cef55ed49117bd63695446fb84b4b91815c0b4 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Mon, 18 Aug 2025 13:46:18 +0800 Subject: [PATCH 0593/1307] net/smc: fix UAF on smcsk after smc_listen_out() BPF CI testing report a UAF issue: [ 16.446633] BUG: kernel NULL pointer dereference, address: 000000000000003 0 [ 16.447134] #PF: supervisor read access in kernel mod e [ 16.447516] #PF: error_code(0x0000) - not-present pag e [ 16.447878] PGD 0 P4D 0 [ 16.448063] Oops: Oops: 0000 [#1] PREEMPT SMP NOPT I [ 16.448409] CPU: 0 UID: 0 PID: 9 Comm: kworker/0:1 Tainted: G OE 6.13.0-rc3-g89e8a75fda73-dirty #4 2 [ 16.449124] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODUL E [ 16.449502] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/201 4 [ 16.450201] Workqueue: smc_hs_wq smc_listen_wor k [ 16.450531] RIP: 0010:smc_listen_work+0xc02/0x159 0 [ 16.452158] RSP: 0018:ffffb5ab40053d98 EFLAGS: 0001024 6 [ 16.452526] RAX: 0000000000000001 RBX: 0000000000000002 RCX: 000000000000030 0 [ 16.452994] RDX: 0000000000000280 RSI: 00003513840053f0 RDI: 000000000000000 0 [ 16.453492] RBP: ffffa097808e3800 R08: ffffa09782dba1e0 R09: 000000000000000 5 [ 16.453987] R10: 0000000000000000 R11: 0000000000000000 R12: ffffa0978274640 0 [ 16.454497] R13: 0000000000000000 R14: 0000000000000000 R15: ffffa09782d4092 0 [ 16.454996] FS: 0000000000000000(0000) GS:ffffa097bbc00000(0000) knlGS:000000000000000 0 [ 16.455557] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003 3 [ 16.455961] CR2: 0000000000000030 CR3: 0000000102788004 CR4: 0000000000770ef 0 [ 16.456459] PKRU: 5555555 4 [ 16.456654] Call Trace : [ 16.456832] [ 16.456989] ? __die+0x23/0x7 0 [ 16.457215] ? page_fault_oops+0x180/0x4c 0 [ 16.457508] ? __lock_acquire+0x3e6/0x249 0 [ 16.457801] ? exc_page_fault+0x68/0x20 0 [ 16.458080] ? asm_exc_page_fault+0x26/0x3 0 [ 16.458389] ? smc_listen_work+0xc02/0x159 0 [ 16.458689] ? smc_listen_work+0xc02/0x159 0 [ 16.458987] ? lock_is_held_type+0x8f/0x10 0 [ 16.459284] process_one_work+0x1ea/0x6d 0 [ 16.459570] worker_thread+0x1c3/0x38 0 [ 16.459839] ? __pfx_worker_thread+0x10/0x1 0 [ 16.460144] kthread+0xe0/0x11 0 [ 16.460372] ? __pfx_kthread+0x10/0x1 0 [ 16.460640] ret_from_fork+0x31/0x5 0 [ 16.460896] ? __pfx_kthread+0x10/0x1 0 [ 16.461166] ret_from_fork_asm+0x1a/0x3 0 [ 16.461453] [ 16.461616] Modules linked in: bpf_testmod(OE) [last unloaded: bpf_testmod(OE) ] [ 16.462134] CR2: 000000000000003 0 [ 16.462380] ---[ end trace 0000000000000000 ]--- [ 16.462710] RIP: 0010:smc_listen_work+0xc02/0x1590 The direct cause of this issue is that after smc_listen_out_connected(), newclcsock->sk may be NULL since it will releases the smcsk. Therefore, if the application closes the socket immediately after accept, newclcsock->sk can be NULL. A possible execution order could be as follows: smc_listen_work | userspace ----------------------------------------------------------------- lock_sock(sk) | smc_listen_out_connected() | | \- smc_listen_out | | | \- release_sock | | |- sk->sk_data_ready() | | fd = accept(); | close(fd); | \- socket->sk = NULL; /* newclcsock->sk is NULL now */ SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk)) Since smc_listen_out_connected() will not fail, simply swapping the order of the code can easily fix this issue. Fixes: 3b2dec2603d5 ("net/smc: restructure client and server code in af_smc") Signed-off-by: D. Wythe Reviewed-by: Guangguan Wang Reviewed-by: Alexandra Winter Reviewed-by: Dust Li Link: https://patch.msgid.link/20250818054618.41615-1-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9311c38f7abe..e0e48f24cd61 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2568,8 +2568,9 @@ static void smc_listen_work(struct work_struct *work) goto out_decl; } - smc_listen_out_connected(new_smc); SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini); + /* smc_listen_out() will release smcsk */ + smc_listen_out_connected(new_smc); goto out_free; out_unlock: From 2462c1b9217246a889ec318b3894d84e4dd709c6 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:17 +0300 Subject: [PATCH 0594/1307] net/mlx5: HWS, fix bad parameter in CQ creation 'cqe_sz' valid value should be 0 for 64-byte CQE. Fixes: 2ca62599aa0b ("net/mlx5: HWS, added send engine and context handling") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index c4b22be19a9b..b0595c9b09e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -964,7 +964,6 @@ static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev, return -ENOMEM; MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index); - MLX5_SET(cqc, cqc_data, cqe_sz, queue->num_entries); MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries)); err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq); From 615b690612b7785ab8632f6a5a941550622e4e36 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:18 +0300 Subject: [PATCH 0595/1307] net/mlx5: HWS, fix simple rules rehash error flow Moving rules from matcher to matcher should not fail. However, if it does fail due to various reasons, the error flow should allow the kernel to continue functioning (albeit with broken steering rules) instead of going into series of soft lock-ups or some other problematic behaviour. This patch fixes the error flow for moving simple rules: - If new rule creation fails before it was even enqeued, do not poll for completion - If TIMEOUT happened while moving the rule, no point trying to poll for completions for other rules. Something is broken, completion won't come, just abort the rehash sequence. - If some other completion with error received, don't give up. Continue handling rest of the rules to minimize the damage. - Make sure that the first error code that was received will be actually returned to the caller instead of replacing it with the generic error code. All the aforementioned issues stem from the same bad error flow, so no point fixing them one by one and leaving partially broken code - fixing them in one patch. Fixes: ef94799a8741 ("net/mlx5: HWS, rework rehash loop") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/bwc.c | 61 +++++++++++++------ 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 92de4b761a83..0219a49b2326 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -74,9 +74,9 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) { - bool move_error = false, poll_error = false, drain_error = false; struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_matcher *matcher = bwc_matcher->matcher; + int drain_error = 0, move_error = 0, poll_error = 0; u16 bwc_queues = mlx5hws_bwc_queues(ctx); struct mlx5hws_rule_attr rule_attr; struct mlx5hws_bwc_rule *bwc_rule; @@ -99,11 +99,15 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) ret = mlx5hws_matcher_resize_rule_move(matcher, bwc_rule->rule, &rule_attr); - if (unlikely(ret && !move_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", - ret); - move_error = true; + if (unlikely(ret)) { + if (!move_error) { + mlx5hws_err(ctx, + "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = ret; + } + /* Rule wasn't queued, no need to poll */ + continue; } pending_rules++; @@ -111,11 +115,19 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) rule_attr.queue_id, &pending_rules, false); - if (unlikely(ret && !poll_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: poll failed (%d), attempting to move rest of the rules\n", - ret); - poll_error = true; + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving BWC rule: timeout polling for completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!poll_error) { + mlx5hws_err(ctx, + "Moving BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = ret; + } } } @@ -126,17 +138,30 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) rule_attr.queue_id, &pending_rules, true); - if (unlikely(ret && !drain_error)) { - mlx5hws_err(ctx, - "Moving BWC rule: drain failed (%d), attempting to move rest of the rules\n", - ret); - drain_error = true; + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving bwc rule: timeout draining completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!drain_error) { + mlx5hws_err(ctx, + "Moving bwc rule: drain failed (%d), attempting to move rest of the rules\n", + ret); + drain_error = ret; + } } } } - if (move_error || poll_error || drain_error) - ret = -EINVAL; + /* Return the first error that happened */ + if (unlikely(move_error)) + return move_error; + if (unlikely(poll_error)) + return poll_error; + if (unlikely(drain_error)) + return drain_error; return ret; } From 4a842b1bf18a32ee0c25dd6dd98728b786a76fe4 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:19 +0300 Subject: [PATCH 0596/1307] net/mlx5: HWS, fix complex rules rehash error flow Moving rules from matcher to matcher should not fail. However, if it does fail due to various reasons, the error flow should allow the kernel to continue functioning (albeit with broken steering rules) instead of going into series of soft lock-ups or some other problematic behaviour. Similar to the simple rules, complex rules rehash logic suffers from the same problems. This patch fixes the error flow for moving complex rules: - If new rule creation fails before it was even enqeued, do not poll for completion - If TIMEOUT happened while moving the rule, no point trying to poll for completions for other rules. Something is broken, completion won't come, just abort the rehash sequence. - If some other completion with error received, don't give up. Continue handling rest of the rules to minimize the damage. - Make sure that the first error code that was received will be actually returned to the caller instead of replacing it with the generic error code. All the aforementioned issues stem from the same bad error flow, so no point fixing them one by one and leaving partially broken code - fixing them in one patch. Fixes: 17e0accac577 ("net/mlx5: HWS, support complex matchers") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mlx5/core/steering/hws/bwc_complex.c | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index ca7501c57468..14e79579c719 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -1328,11 +1328,11 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) { struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_matcher *matcher = bwc_matcher->matcher; - bool move_error = false, poll_error = false; u16 bwc_queues = mlx5hws_bwc_queues(ctx); struct mlx5hws_bwc_rule *tmp_bwc_rule; struct mlx5hws_rule_attr rule_attr; struct mlx5hws_table *isolated_tbl; + int move_error = 0, poll_error = 0; struct mlx5hws_rule *tmp_rule; struct list_head *rules_list; u32 expected_completions = 1; @@ -1391,11 +1391,15 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) ret = mlx5hws_matcher_resize_rule_move(matcher, tmp_rule, &rule_attr); - if (unlikely(ret && !move_error)) { - mlx5hws_err(ctx, - "Moving complex BWC rule failed (%d), attempting to move rest of the rules\n", - ret); - move_error = true; + if (unlikely(ret)) { + if (!move_error) { + mlx5hws_err(ctx, + "Moving complex BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = ret; + } + /* Rule wasn't queued, no need to poll */ + continue; } expected_completions = 1; @@ -1403,11 +1407,19 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) rule_attr.queue_id, &expected_completions, true); - if (unlikely(ret && !poll_error)) { - mlx5hws_err(ctx, - "Moving complex BWC rule: poll failed (%d), attempting to move rest of the rules\n", - ret); - poll_error = true; + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving complex BWC rule: timeout polling for completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!poll_error) { + mlx5hws_err(ctx, + "Moving complex BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = ret; + } } /* Done moving the rule to the new matcher, @@ -1422,8 +1434,11 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) } } - if (move_error || poll_error) - ret = -EINVAL; + /* Return the first error that happened */ + if (unlikely(move_error)) + return move_error; + if (unlikely(poll_error)) + return poll_error; return ret; } From 1a72298d27ce4d41b3fd405f6921e8711815767a Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:20 +0300 Subject: [PATCH 0597/1307] net/mlx5: HWS, prevent rehash from filling up the queues While moving the rules during rehash, CQ is not drained. The flush and drain happens only when all the rules of a certain queue have been moved. This behaviour can lead to accumulating large quantity of rules that haven't got their completion yet, and eventually will fill up the queue and will cause the rehash to fail. Fix this problem by requiring drain once the number of outstanding completions reaches a certain threshold. Fixes: ef94799a8741 ("net/mlx5: HWS, rework rehash loop") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 0219a49b2326..2a59be11fe55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -84,6 +84,7 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) struct list_head *rules_list; u32 pending_rules; int i, ret = 0; + bool drain; mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); @@ -111,10 +112,12 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) } pending_rules++; + drain = pending_rules >= + hws_bwc_get_burst_th(ctx, rule_attr.queue_id); ret = mlx5hws_bwc_queue_poll(ctx, rule_attr.queue_id, &pending_rules, - false); + drain); if (unlikely(ret)) { if (ret == -ETIMEDOUT) { mlx5hws_err(ctx, From 7c60952f83584bc4950057cfed2cc3c87343b5db Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Aug 2025 23:23:21 +0300 Subject: [PATCH 0598/1307] net/mlx5: HWS, don't rehash on every kind of insertion failure If rule creation failed due to a full queue, due to timeout in polling for completion, or due to matcher being in resize, don't try to initiate rehash sequence - rehash would have failed anyway. Fixes: 2111bb970c78 ("net/mlx5: HWS, added backward-compatible API handling") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Vlad Dogaru Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/bwc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 2a59be11fe55..adeccc588e5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -1063,6 +1063,21 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, return 0; /* rule inserted successfully */ } + /* Rule insertion could fail due to queue being full, timeout, or + * matcher in resize. In such cases, no point in trying to rehash. + */ + if (ret == -EBUSY || ret == -ETIMEDOUT || ret == -EAGAIN) { + mutex_unlock(queue_lock); + mlx5hws_err(ctx, + "BWC rule insertion failed - %s (%d)\n", + ret == -EBUSY ? "queue is full" : + ret == -ETIMEDOUT ? "timeout" : + ret == -EAGAIN ? "matcher in resize" : "N/A", + ret); + hws_bwc_rule_cnt_dec(bwc_rule); + return ret; + } + /* At this point the rule wasn't added. * It could be because there was collision, or some other problem. * Try rehash by size and insert rule again - last chance. From 8a51507320ebddaab32610199774f69cd7d53e78 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Sun, 17 Aug 2025 23:23:22 +0300 Subject: [PATCH 0599/1307] net/mlx5: HWS, Fix table creation UID During table creation, caller passes a UID using ft_attr. The UID value was ignored, which leads to problems when the caller sets the UID to a non-zero value, such as SHARED_RESOURCE_UID (0xffff) - the internal FT objects will be created with UID=0. Fixes: 0869701cba3d ("net/mlx5: HWS, added FW commands handling") Signed-off-by: Alex Vesker Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/steering/hws/cmd.c | 1 + .../ethernet/mellanox/mlx5/core/steering/hws/cmd.h | 1 + .../mellanox/mlx5/core/steering/hws/fs_hws.c | 1 + .../mellanox/mlx5/core/steering/hws/matcher.c | 5 ++++- .../mellanox/mlx5/core/steering/hws/mlx5hws.h | 1 + .../mellanox/mlx5/core/steering/hws/table.c | 13 ++++++++++--- .../mellanox/mlx5/core/steering/hws/table.h | 3 ++- 7 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index 9c83753e4592..0bdcab2e5cf3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -55,6 +55,7 @@ int mlx5hws_cmd_flow_table_create(struct mlx5_core_dev *mdev, MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); MLX5_SET(create_flow_table_in, in, table_type, ft_attr->type); + MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid); ft_ctx = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); MLX5_SET(flow_table_context, ft_ctx, level, ft_attr->level); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h index fa6bff210266..122ccc671628 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h @@ -36,6 +36,7 @@ struct mlx5hws_cmd_set_fte_attr { struct mlx5hws_cmd_ft_create_attr { u8 type; u8 level; + u16 uid; bool rtc_valid; bool decap_en; bool reformat_en; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 57592b92e24b..131e74b2b774 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -267,6 +267,7 @@ static int mlx5_cmd_hws_create_flow_table(struct mlx5_flow_root_namespace *ns, tbl_attr.type = MLX5HWS_TABLE_TYPE_FDB; tbl_attr.level = ft_attr->level; + tbl_attr.uid = ft_attr->uid; tbl = mlx5hws_table_create(ctx, &tbl_attr); if (!tbl) { mlx5_core_err(ns->dev, "Failed creating hws flow_table\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index f3ea09caba2b..32f87fdf3213 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -85,6 +85,7 @@ static int hws_matcher_create_end_ft_isolated(struct mlx5hws_matcher *matcher) ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, + 0, &matcher->end_ft_id); if (ret) { mlx5hws_err(tbl->ctx, "Isolated matcher: failed to create end flow table\n"); @@ -112,7 +113,9 @@ static int hws_matcher_create_end_ft(struct mlx5hws_matcher *matcher) if (mlx5hws_matcher_is_isolated(matcher)) ret = hws_matcher_create_end_ft_isolated(matcher); else - ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + 0, &matcher->end_ft_id); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 59c14745ed0c..2498ceff2060 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -75,6 +75,7 @@ struct mlx5hws_context_attr { struct mlx5hws_table_attr { enum mlx5hws_table_type type; u32 level; + u16 uid; }; enum mlx5hws_matcher_flow_src { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c index 568f691733f3..6113383ae47b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c @@ -9,6 +9,7 @@ u32 mlx5hws_table_get_id(struct mlx5hws_table *tbl) } static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl, + u16 uid, struct mlx5hws_cmd_ft_create_attr *ft_attr) { ft_attr->type = tbl->fw_ft_type; @@ -16,7 +17,9 @@ static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl, ft_attr->level = tbl->ctx->caps->fdb_ft.max_level - 1; else ft_attr->level = tbl->ctx->caps->nic_ft.max_level - 1; + ft_attr->rtc_valid = true; + ft_attr->uid = uid; } static void hws_table_set_cap_attr(struct mlx5hws_table *tbl, @@ -119,12 +122,12 @@ static int hws_table_connect_to_default_miss_tbl(struct mlx5hws_table *tbl, u32 int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev, struct mlx5hws_table *tbl, - u32 *ft_id) + u16 uid, u32 *ft_id) { struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; int ret; - hws_table_init_next_ft_attr(tbl, &ft_attr); + hws_table_init_next_ft_attr(tbl, uid, &ft_attr); hws_table_set_cap_attr(tbl, &ft_attr); ret = mlx5hws_cmd_flow_table_create(mdev, &ft_attr, ft_id); @@ -189,7 +192,10 @@ static int hws_table_init(struct mlx5hws_table *tbl) } mutex_lock(&ctx->ctrl_lock); - ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &tbl->ft_id); + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + tbl->uid, + &tbl->ft_id); if (ret) { mlx5hws_err(tbl->ctx, "Failed to create flow table object\n"); mutex_unlock(&ctx->ctrl_lock); @@ -239,6 +245,7 @@ struct mlx5hws_table *mlx5hws_table_create(struct mlx5hws_context *ctx, tbl->ctx = ctx; tbl->type = attr->type; tbl->level = attr->level; + tbl->uid = attr->uid; ret = hws_table_init(tbl); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h index 0400cce0c317..1246f9bd8422 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h @@ -18,6 +18,7 @@ struct mlx5hws_table { enum mlx5hws_table_type type; u32 fw_ft_type; u32 level; + u16 uid; struct list_head matchers_list; struct list_head tbl_list_node; struct mlx5hws_default_miss default_miss; @@ -47,7 +48,7 @@ u32 mlx5hws_table_get_res_fw_ft_type(enum mlx5hws_table_type tbl_type, int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev, struct mlx5hws_table *tbl, - u32 *ft_id); + u16 uid, u32 *ft_id); void mlx5hws_table_destroy_default_ft(struct mlx5hws_table *tbl, u32 ft_id); From d2d6f950cb43be6845a41cac5956cb2a10e657e5 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Sun, 17 Aug 2025 23:23:23 +0300 Subject: [PATCH 0600/1307] net/mlx5: CT: Use the correct counter offset Specifying the counter action is not enough, as it is used by multiple counters that were allocated in a bulk. By omitting the offset, rules will be associated with a different counter from the same bulk. Subsequently, the CT subsystem checks the correct counter, assumes that no traffic has triggered the rule, and ages out the rule. The end result is intermittent offloading of long lived connections, as rules are aged out then promptly re-added. Fix this by specifying the correct offset along with the counter rule. Fixes: 34eea5b12a10 ("net/mlx5e: CT: Add initial support for Hardware Steering") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250817202323.308604-8-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c index a4263137fef5..01d522b02947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c @@ -173,6 +173,8 @@ static void mlx5_ct_fs_hmfs_fill_rule_actions(struct mlx5_ct_fs_hmfs *fs_hmfs, memset(rule_actions, 0, NUM_CT_HMFS_RULES * sizeof(*rule_actions)); rule_actions[0].action = mlx5_fc_get_hws_action(fs_hmfs->ctx, attr->counter); + rule_actions[0].counter.offset = + attr->counter->id - attr->counter->bulk->base_id; /* Modify header is special, it may require extra arguments outside the action itself. */ if (mh_action->mh_data) { rule_actions[1].modify_header.offset = mh_action->mh_data->offset; From 1683fd1b2fa79864d3c7a951d9cea0a9ba1a1923 Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 18 Aug 2025 11:35:13 +0530 Subject: [PATCH 0601/1307] microchip: lan865x: fix missing netif_start_queue() call on device open This fixes an issue where the transmit queue is started implicitly only the very first time the device is registered. When the device is taken down and brought back up again (using `ip` or `ifconfig`), the transmit queue is not restarted, causing packet transmission to hang. Adding an explicit call to netif_start_queue() in lan865x_net_open() ensures the transmit queue is properly started every time the device is reopened. Fixes: 5cd2340cb6a3 ("microchip: lan865x: add driver support for Microchip's LAN865X MAC-PHY") Signed-off-by: Parthiban Veerasooran Link: https://patch.msgid.link/20250818060514.52795-2-parthiban.veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan865x/lan865x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index dd436bdff0f8..d03f5a8de58d 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -311,6 +311,8 @@ static int lan865x_net_open(struct net_device *netdev) phy_start(netdev->phydev); + netif_start_queue(netdev); + return 0; } From 2cd58fec912acec273cb155911ab8f06ddbb131a Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Mon, 18 Aug 2025 11:35:14 +0530 Subject: [PATCH 0602/1307] microchip: lan865x: fix missing Timer Increment config for Rev.B0/B1 Fix missing configuration for LAN865x silicon revisions B0 and B1 as per Microchip Application Note AN1760 (Rev F, June 2024). The Timer Increment register was not being set, which is required for accurate timestamping. As per the application note, configure the MAC to set timestamping at the end of the Start of Frame Delimiter (SFD), and set the Timer Increment register to 40 ns (corresponding to a 25 MHz internal clock). Link: https://www.microchip.com/en-us/application-notes/an1760 Fixes: 5cd2340cb6a3 ("microchip: lan865x: add driver support for Microchip's LAN865X MAC-PHY") Signed-off-by: Parthiban Veerasooran Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250818060514.52795-3-parthiban.veerasooran@microchip.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/lan865x/lan865x.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index d03f5a8de58d..84c41f193561 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -32,6 +32,10 @@ /* MAC Specific Addr 1 Top Reg */ #define LAN865X_REG_MAC_H_SADDR1 0x00010023 +/* MAC TSU Timer Increment Register */ +#define LAN865X_REG_MAC_TSU_TIMER_INCR 0x00010077 +#define MAC_TSU_TIMER_INCR_COUNT_NANOSECONDS 0x0028 + struct lan865x_priv { struct work_struct multicast_work; struct net_device *netdev; @@ -346,6 +350,21 @@ static int lan865x_probe(struct spi_device *spi) goto free_netdev; } + /* LAN865x Rev.B0/B1 configuration parameters from AN1760 + * As per the Configuration Application Note AN1760 published in the + * link, https://www.microchip.com/en-us/application-notes/an1760 + * Revision F (DS60001760G - June 2024), configure the MAC to set time + * stamping at the end of the Start of Frame Delimiter (SFD) and set the + * Timer Increment reg to 40 ns to be used as a 25 MHz internal clock. + */ + ret = oa_tc6_write_register(priv->tc6, LAN865X_REG_MAC_TSU_TIMER_INCR, + MAC_TSU_TIMER_INCR_COUNT_NANOSECONDS); + if (ret) { + dev_err(&spi->dev, "Failed to config TSU Timer Incr reg: %d\n", + ret); + goto oa_tc6_exit; + } + /* As per the point s3 in the below errata, SPI receive Ethernet frame * transfer may halt when starting the next frame in the same data block * (chunk) as the end of a previous frame. The RFA field should be From 3f4422e7c9436abf81a00270be7e4d6d3760ec0e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 20 Aug 2025 07:19:01 +0200 Subject: [PATCH 0603/1307] ALSA: hda: tas2781: Fix wrong reference of tasdevice_priv During the conversion to unify the calibration data management, the reference to tasdevice_priv was wrongly set to h->hda_priv instead of h->priv. This resulted in memory corruption and crashes eventually. Unfortunately it's a void pointer, hence the compiler couldn't know that it's wrong. Fixes: 4fe238513407 ("ALSA: hda/tas2781: Move and unified the calibrated-data getting function for SPI and I2C into the tas2781_hda lib") Link: https://bugzilla.suse.com/show_bug.cgi?id=1248270 Cc: Link: https://patch.msgid.link/20250820051902.4523-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index b91fff3fde97..e34b17f0c9b9 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -305,7 +305,7 @@ static int tas2563_save_calibration(struct tas2781_hda *h) efi_char16_t efi_name[TAS2563_CAL_VAR_NAME_MAX]; unsigned long max_size = TAS2563_CAL_DATA_SIZE; unsigned char var8[TAS2563_CAL_VAR_NAME_MAX]; - struct tasdevice_priv *p = h->hda_priv; + struct tasdevice_priv *p = h->priv; struct calidata *cd = &p->cali_data; struct cali_reg *r = &cd->cali_reg_array; unsigned int offset = 0; From 02c1b0824eb1873b15676257cf1dc80070927e1e Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Wed, 6 Aug 2025 15:56:07 +1000 Subject: [PATCH 0604/1307] KVM: PPC: Fix misleading interrupts comment in kvmppc_prepare_to_enter() Until commit 6c85f52b10fd ("kvm/ppc: IRQ disabling cleanup"), kvmppc_prepare_to_enter() was called with interrupts already disabled by the caller, which was documented in the comment above the function. Post-cleanup, the function is now called with interrupts enabled, and disables interrupts itself. Fix the comment to reflect the current behaviour. Fixes: 6c85f52b10fd ("kvm/ppc: IRQ disabling cleanup") Signed-off-by: Andrew Donnellan Reviewed-by: Amit Machhiwal Reviewed-by: Gautam Menghani Reviewed-by: Shrikanth Hegde [Fixed the double colon in Reviewed-by line] Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250806055607.17081-1-ajd@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 153587741864..2ba057171ebe 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -69,7 +69,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) /* * Common checks before entering the guest world. Call with interrupts - * disabled. + * enabled. * * returns: * From b018bb26c42049e05d3d65b057cc1250d17d9b0a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 23 Jul 2025 08:28:42 +0200 Subject: [PATCH 0605/1307] powerpc: Use dev_fwnode() irq_domain_create_simple() takes fwnode as the first argument. It can be extracted from the struct device using dev_fwnode() helper instead of using of_node with of_fwnode_handle(). So use the dev_fwnode() helper. Signed-off-by: Jiri Slaby (SUSE) Acked-by: Christophe Leroy Link: https://lore.kernel.org/all/4bc0e1ca-a523-424a-8759-59e353317fba@kernel.org/ Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250723062842.1831271-1-jirislaby@kernel.org --- arch/powerpc/platforms/8xx/cpm1-ic.c | 3 +-- arch/powerpc/sysdev/fsl_msi.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/8xx/cpm1-ic.c b/arch/powerpc/platforms/8xx/cpm1-ic.c index a49d4a9ab3bc..3292071e4da3 100644 --- a/arch/powerpc/platforms/8xx/cpm1-ic.c +++ b/arch/powerpc/platforms/8xx/cpm1-ic.c @@ -110,8 +110,7 @@ static int cpm_pic_probe(struct platform_device *pdev) out_be32(&data->reg->cpic_cimr, 0); - data->host = irq_domain_create_linear(of_fwnode_handle(dev->of_node), - 64, &cpm_pic_host_ops, data); + data->host = irq_domain_create_linear(dev_fwnode(dev), 64, &cpm_pic_host_ops, data); if (!data->host) return -ENODEV; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 4fe8a7b1b288..2a007bfb038d 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -412,9 +412,8 @@ static int fsl_of_msi_probe(struct platform_device *dev) } platform_set_drvdata(dev, msi); - msi->irqhost = irq_domain_create_linear(of_fwnode_handle(dev->dev.of_node), - NR_MSI_IRQS_MAX, &fsl_msi_host_ops, msi); - + msi->irqhost = irq_domain_create_linear(dev_fwnode(&dev->dev), NR_MSI_IRQS_MAX, + &fsl_msi_host_ops, msi); if (msi->irqhost == NULL) { dev_err(&dev->dev, "No memory for MSI irqhost\n"); err = -ENOMEM; From 8b5d86a63bc9510e094a15d7268c60bd4347b95c Mon Sep 17 00:00:00 2001 From: Xichao Zhao Date: Fri, 1 Aug 2025 11:59:08 +0800 Subject: [PATCH 0606/1307] powerpc/64: Drop unnecessary 'rc' variable Simplify the code to enhance readability and maintain a consistent coding style. Signed-off-by: Xichao Zhao Acked-by: Gautam Menghani Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250801035908.370463-1-zhao.xichao@vivo.com --- arch/powerpc/kernel/setup_64.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7284c8021eeb..8fd7cbf3bd04 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -141,10 +141,7 @@ void __init check_smt_enabled(void) smt_enabled_at_boot = 0; else { int smt; - int rc; - - rc = kstrtoint(smt_enabled_cmdline, 10, &smt); - if (!rc) + if (!kstrtoint(smt_enabled_cmdline, 10, &smt)) smt_enabled_at_boot = min(threads_per_core, smt); } From eb59d4c5948d93e940b5dde9d1bf3b33367fbcb8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Jun 2025 01:32:24 +0900 Subject: [PATCH 0607/1307] powerpc: use always-y instead of extra-y in Makefiles The extra-y syntax is planned for deprecation because it is similar to always-y. When building the boot wrapper, always-y and extra-y are equivalent. Use always-y instead. In arch/powerpc/kernel/Makefile, I added ifdef KBUILD_BUILTIN to keep the current behavior: prom_init_check is skipped when building only modular objects. Signed-off-by: Masahiro Yamada Acked-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250602163302.478765-1-masahiroy@kernel.org --- arch/powerpc/boot/Makefile | 6 +++--- arch/powerpc/kernel/Makefile | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a7ab087d412c..c47b78c1d3e7 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -243,13 +243,13 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE hostprogs := addnote hack-coff mktree targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds -extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ +always-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds dtstree := $(src)/dts wrapper := $(src)/wrapper -wrapperbits := $(extra-y) $(addprefix $(obj)/,addnote hack-coff mktree) \ +wrapperbits := $(always-y) $(addprefix $(obj)/,addnote hack-coff mktree) \ $(wrapper) FORCE ############# @@ -456,7 +456,7 @@ WRAPPER_DTSDIR := /usr/lib/kernel-wrapper/dts WRAPPER_BINDIR := /usr/sbin INSTALL := install -extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(extra-y)) +extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(always-y)) hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs)) wrapper-installed := $(DESTDIR)$(WRAPPER_BINDIR)/wrapper dts-installed := $(patsubst $(dtstree)/%, $(DESTDIR)$(WRAPPER_DTSDIR)/%, $(wildcard $(dtstree)/*.dts)) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fb2b95267022..2f0a2e69c607 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -199,7 +199,9 @@ obj-$(CONFIG_ALTIVEC) += vector.o obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o obj64-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_entry_64.o -extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +ifdef KBUILD_BUILTIN +always-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +endif obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) From 6a859f1a19d1f8756ffb097f5973dfebbca4811a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jun 2025 19:13:51 +0900 Subject: [PATCH 0608/1307] powerpc: unify two CONFIG_POWERPC64_CPU entries in the same choice block There are two CONFIG_POWERPC64_CPU entries in the "CPU selection" choice block. I guess the intent is to display a different prompt depending on CPU_LITTLE_ENDIAN: "Generic (POWER5 and PowerPC 970 and above)" for big endian, and "Generic (POWER8 and above)" for little endian. I stumbled on this tricky use case, and worked around it on Kconfig with commit 4d46b5b623e0 ("kconfig: fix infinite loop in sym_calc_choice()"). However, I doubt that supporting multiple entries with the same symbol in a choice block is worth the complexity - this is the only such case in the kernel tree. This commit merges the two entries. Once this cleanup is accepted in the powerpc subsystem, I will proceed to refactor the Kconfig parser. Signed-off-by: Masahiro Yamada Reviewed-by: Christophe Leroy Acked-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250607102005.1965409-1-masahiroy@kernel.org --- arch/powerpc/platforms/Kconfig.cputype | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 613b383ed8b3..7b527d18aa5e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -122,16 +122,11 @@ choice If unsure, select Generic. config POWERPC64_CPU - bool "Generic (POWER5 and PowerPC 970 and above)" - depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + bool "Generic 64 bits powerpc" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER if CPU_LITTLE_ENDIAN select PPC_64S_HASH_MMU - -config POWERPC64_CPU - bool "Generic (POWER8 and above)" - depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN - select ARCH_HAS_FAST_MULTIPLIER - select PPC_64S_HASH_MMU - select PPC_HAS_LBARX_LHARX + select PPC_HAS_LBARX_LHARX if CPU_LITTLE_ENDIAN config POWERPC_CPU bool "Generic 32 bits powerpc" From 88688a2c8ac6c8036d983ad8b34ce191c46a10aa Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 18 May 2025 10:11:04 +0530 Subject: [PATCH 0609/1307] powerpc/kvm: Fix ifdef to remove build warning When compiling for pseries or powernv defconfig with "make C=1", these warning were reported bu sparse tool in powerpc/kernel/kvm.c arch/powerpc/kernel/kvm.c:635:9: warning: switch with no cases arch/powerpc/kernel/kvm.c:646:9: warning: switch with no cases Currently #ifdef were added after the switch case which are specific for BOOKE and PPC_BOOK3S_32. These are not enabled in pseries/powernv defconfig. Fix it by moving the #ifdef before switch(){} Fixes: cbe487fac7fc0 ("KVM: PPC: Add mtsrin PV code") Tested-by: Venkat Rao Bagalkote Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250518044107.39928-1-maddy@linux.ibm.com --- arch/powerpc/kernel/kvm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 5b3c093611ba..7209d00a9c25 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -632,19 +632,19 @@ static void __init kvm_check_ins(u32 *inst, u32 features) #endif } - switch (inst_no_rt & ~KVM_MASK_RB) { #ifdef CONFIG_PPC_BOOK3S_32 + switch (inst_no_rt & ~KVM_MASK_RB) { case KVM_INST_MTSRIN: if (features & KVM_MAGIC_FEAT_SR) { u32 inst_rb = _inst & KVM_MASK_RB; kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); } break; -#endif } +#endif - switch (_inst) { #ifdef CONFIG_BOOKE + switch (_inst) { case KVM_INST_WRTEEI_0: kvm_patch_ins_wrteei_0(inst); break; @@ -652,8 +652,8 @@ static void __init kvm_check_ins(u32 *inst, u32 features) case KVM_INST_WRTEEI_1: kvm_patch_ins_wrtee(inst, 0, 1); break; -#endif } +#endif } extern u32 kvm_template_start[]; From d40ae9033418095642f65f4fd54dc5a7d292ee39 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 18 May 2025 10:11:06 +0530 Subject: [PATCH 0610/1307] powerpc/prom_init: Fix shellcheck warnings Fix "Double quote to prevent globbing and word splitting." warning from shellcheck Tested-by: Venkat Rao Bagalkote Reviewed-by: Stephen Rothwell Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250518044107.39928-3-maddy@linux.ibm.com --- arch/powerpc/kernel/prom_init_check.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 69623b9045d5..3090b97258ae 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -15,8 +15,8 @@ has_renamed_memintrinsics() { - grep -q "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} && \ - ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" ${KCONFIG_CONFIG} + grep -q "^CONFIG_KASAN=y$" "${KCONFIG_CONFIG}" && \ + ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" "${KCONFIG_CONFIG}" } if has_renamed_memintrinsics @@ -42,15 +42,15 @@ check_section() { file=$1 section=$2 - size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") + size=$(objdump -h -j "$section" "$file" 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") size=${size:-0} - if [ $size -ne 0 ]; then + if [ "$size" -ne 0 ]; then ERROR=1 echo "Error: Section $section not empty in prom_init.c" >&2 fi } -for UNDEF in $($NM -u $OBJ | awk '{print $2}') +for UNDEF in $($NM -u "$OBJ" | awk '{print $2}') do # On 64-bit nm gives us the function descriptors, which have # a leading . on the name, so strip it off here. @@ -87,8 +87,8 @@ do fi done -check_section $OBJ .data -check_section $OBJ .bss -check_section $OBJ .init.data +check_section "$OBJ" .data +check_section "$OBJ" .bss +check_section "$OBJ" .init.data exit $ERROR From 8763d2257f5231cfdfd8a53594647927dbf8bb06 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 18 May 2025 10:11:07 +0530 Subject: [PATCH 0611/1307] powerpc/boot/install.sh: Fix shellcheck warnings Fix shellcheck warning such as "Double quote to prevent globbing and word splitting." and Use $(...) notation instead of legacy backticks `...`. Tested-by: Venkat Rao Bagalkote Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250518044107.39928-4-maddy@linux.ibm.com --- arch/powerpc/boot/install.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh index 101fcb397a0f..c3df6c27ce75 100755 --- a/arch/powerpc/boot/install.sh +++ b/arch/powerpc/boot/install.sh @@ -19,19 +19,19 @@ set -e # this should work for both the pSeries zImage and the iSeries vmlinux.sm -image_name=`basename $2` +image_name=$(basename "$2") echo "Warning: '${INSTALLKERNEL}' command not available... Copying" \ "directly to $4/$image_name-$1" >&2 -if [ -f $4/$image_name-$1 ]; then - mv $4/$image_name-$1 $4/$image_name-$1.old +if [ -f "$4"/"$image_name"-"$1" ]; then + mv "$4"/"$image_name"-"$1" "$4"/"$image_name"-"$1".old fi -if [ -f $4/System.map-$1 ]; then - mv $4/System.map-$1 $4/System-$1.old +if [ -f "$4"/System.map-"$1" ]; then + mv "$4"/System.map-"$1" "$4"/System-"$1".old fi -cat $2 > $4/$image_name-$1 -cp $3 $4/System.map-$1 +cat "$2" > "$4"/"$image_name"-"$1" +cp "$3" "$4"/System.map-"$1" From d072148a8631f102de60ed5a3a827e85d09d24f0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Aug 2025 10:25:00 +0200 Subject: [PATCH 0612/1307] fs: add a FMODE_ flag to indicate IOCB_HAS_METADATA availability Currently the kernel will happily route io_uring requests with metadata to file operations that don't support it. Add a FMODE_ flag to guard that. Fixes: 4de2ce04c862 ("fs: introduce IOCB_HAS_METADATA for metadata") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250819082517.2038819-2-hch@lst.de Signed-off-by: Christian Brauner --- block/fops.c | 3 +++ include/linux/fs.h | 3 ++- io_uring/rw.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/block/fops.c b/block/fops.c index 82451ac8ff25..08e7c21bd9f1 100644 --- a/block/fops.c +++ b/block/fops.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -687,6 +688,8 @@ static int blkdev_open(struct inode *inode, struct file *filp) if (bdev_can_atomic_write(bdev)) filp->f_mode |= FMODE_CAN_ATOMIC_WRITE; + if (blk_get_integrity(bdev->bd_disk)) + filp->f_mode |= FMODE_HAS_METADATA; ret = bdev_open(bdev, mode, filp->private_data, NULL, filp); if (ret) diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..601d036a6c78 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -149,7 +149,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)(1 << 12)) -/* FMODE_* bit 13 */ +/* Supports IOCB_HAS_METADATA */ +#define FMODE_HAS_METADATA ((__force fmode_t)(1 << 13)) /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)(1 << 14)) diff --git a/io_uring/rw.c b/io_uring/rw.c index 52a5b950b2e5..af5a54b5db12 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -886,6 +886,9 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) if (req->flags & REQ_F_HAS_METADATA) { struct io_async_rw *io = req->async_data; + if (!(file->f_mode & FMODE_HAS_METADATA)) + return -EINVAL; + /* * We have a union of meta fields with wpq used for buffered-io * in io_async_rw, so fail it here. From 2729a60bbfb9215997f25372ebe9b7964f038296 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Aug 2025 10:25:01 +0200 Subject: [PATCH 0613/1307] block: don't silently ignore metadata for sync read/write The block fops don't try to handle metadata for synchronous requests, probably because the completion handler looks at dio->iocb which is not valid for synchronous requests. But silently ignoring metadata (or warning in case of __blkdev_direct_IO_simple) is a really bad idea as that can cause silent data corruption if a user ever shows up. Instead simply handle metadata for synchronous requests as the completion handler can simply check for bio_integrity() as the block layer default integrity will already be freed at this point, and thus bio_integrity() will only return true for user mapped integrity. Fixes: 3d8b5a22d404 ("block: add support to pass user meta buffer") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250819082517.2038819-3-hch@lst.de Reviewed-by: Martin K. Petersen Signed-off-by: Christian Brauner --- block/fops.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/fops.c b/block/fops.c index 08e7c21bd9f1..ddbc69c0922b 100644 --- a/block/fops.c +++ b/block/fops.c @@ -55,7 +55,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, struct bio bio; ssize_t ret; - WARN_ON_ONCE(iocb->ki_flags & IOCB_HAS_METADATA); if (nr_pages <= DIO_INLINE_BIO_VECS) vecs = inline_vecs; else { @@ -132,7 +131,7 @@ static void blkdev_bio_end_io(struct bio *bio) if (bio->bi_status && !dio->bio.bi_status) dio->bio.bi_status = bio->bi_status; - if (!is_sync && (dio->iocb->ki_flags & IOCB_HAS_METADATA)) + if (bio_integrity(bio)) bio_integrity_unmap_user(bio); if (atomic_dec_and_test(&dio->ref)) { @@ -234,7 +233,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } bio->bi_opf |= REQ_NOWAIT; } - if (!is_sync && (iocb->ki_flags & IOCB_HAS_METADATA)) { + if (iocb->ki_flags & IOCB_HAS_METADATA) { ret = bio_integrity_map_iter(bio, iocb->private); if (unlikely(ret)) goto fail; @@ -302,7 +301,7 @@ static void blkdev_bio_end_io_async(struct bio *bio) ret = blk_status_to_errno(bio->bi_status); } - if (iocb->ki_flags & IOCB_HAS_METADATA) + if (bio_integrity(bio)) bio_integrity_unmap_user(bio); iocb->ki_complete(iocb, ret); @@ -423,7 +422,8 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); - if (likely(nr_pages <= BIO_MAX_VECS)) { + if (likely(nr_pages <= BIO_MAX_VECS && + !(iocb->ki_flags & IOCB_HAS_METADATA))) { if (is_sync_kiocb(iocb)) return __blkdev_direct_IO_simple(iocb, iter, bdev, nr_pages); From 6a909ea83f226803ea0e718f6e88613df9234d58 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Wed, 13 Aug 2025 04:02:32 +0000 Subject: [PATCH 0614/1307] tracing: Limit access to parser->buffer when trace_get_user failed When the length of the string written to set_ftrace_filter exceeds FTRACE_BUFF_MAX, the following KASAN alarm will be triggered: BUG: KASAN: slab-out-of-bounds in strsep+0x18c/0x1b0 Read of size 1 at addr ffff0000d00bd5ba by task ash/165 CPU: 1 UID: 0 PID: 165 Comm: ash Not tainted 6.16.0-g6bcdbd62bd56-dirty Hardware name: linux,dummy-virt (DT) Call trace: show_stack+0x34/0x50 (C) dump_stack_lvl+0xa0/0x158 print_address_description.constprop.0+0x88/0x398 print_report+0xb0/0x280 kasan_report+0xa4/0xf0 __asan_report_load1_noabort+0x20/0x30 strsep+0x18c/0x1b0 ftrace_process_regex.isra.0+0x100/0x2d8 ftrace_regex_release+0x484/0x618 __fput+0x364/0xa58 ____fput+0x28/0x40 task_work_run+0x154/0x278 do_notify_resume+0x1f0/0x220 el0_svc+0xec/0xf0 el0t_64_sync_handler+0xa0/0xe8 el0t_64_sync+0x1ac/0x1b0 The reason is that trace_get_user will fail when processing a string longer than FTRACE_BUFF_MAX, but not set the end of parser->buffer to 0. Then an OOB access will be triggered in ftrace_regex_release-> ftrace_process_regex->strsep->strpbrk. We can solve this problem by limiting access to parser->buffer when trace_get_user failed. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250813040232.1344527-1-pulehui@huaweicloud.com Fixes: 8c9af478c06b ("ftrace: Handle commands when closing set_ftrace_filter file") Signed-off-by: Pu Lehui Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 18 ++++++++++++------ kernel/trace/trace.h | 8 +++++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4283ed4e8f59..8d8935ed416d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1816,7 +1816,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, ret = get_user(ch, ubuf++); if (ret) - return ret; + goto fail; read++; cnt--; @@ -1830,7 +1830,7 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && isspace(ch)) { ret = get_user(ch, ubuf++); if (ret) - return ret; + goto fail; read++; cnt--; } @@ -1848,12 +1848,14 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, while (cnt && !isspace(ch) && ch) { if (parser->idx < parser->size - 1) parser->buffer[parser->idx++] = ch; - else - return -EINVAL; + else { + ret = -EINVAL; + goto fail; + } ret = get_user(ch, ubuf++); if (ret) - return ret; + goto fail; read++; cnt--; } @@ -1868,11 +1870,15 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, /* Make sure the parsed string always terminates with '\0'. */ parser->buffer[parser->idx] = 0; } else { - return -EINVAL; + ret = -EINVAL; + goto fail; } *ppos += read; return read; +fail: + trace_parser_fail(parser); + return ret; } /* TODO add a seq_buf_to_buffer() */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1dbf1d3cf2f1..be6654899cae 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1292,6 +1292,7 @@ bool ftrace_event_is_function(struct trace_event_call *call); */ struct trace_parser { bool cont; + bool fail; char *buffer; unsigned idx; unsigned size; @@ -1299,7 +1300,7 @@ struct trace_parser { static inline bool trace_parser_loaded(struct trace_parser *parser) { - return (parser->idx != 0); + return !parser->fail && parser->idx != 0; } static inline bool trace_parser_cont(struct trace_parser *parser) @@ -1313,6 +1314,11 @@ static inline void trace_parser_clear(struct trace_parser *parser) parser->idx = 0; } +static inline void trace_parser_fail(struct trace_parser *parser) +{ + parser->fail = true; +} + extern int trace_parser_get_init(struct trace_parser *parser, int size); extern void trace_parser_put(struct trace_parser *parser); extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, From cd6e4faba96fe41d6b686e144b96dad5e6f2e771 Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Wed, 13 Aug 2025 17:51:14 +0800 Subject: [PATCH 0615/1307] ring-buffer: Remove redundant semicolons Remove unnecessary semicolons. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250813095114.559530-1-liaoyuanhong@vivo.com Signed-off-by: Liao Yuanhong Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bb71a0dc9d69..43460949ad3f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7666,7 +7666,7 @@ static __init int test_ringbuffer(void) rb_test_started = true; set_current_state(TASK_INTERRUPTIBLE); - /* Just run for 10 seconds */; + /* Just run for 10 seconds */ schedule_timeout(10 * HZ); kthread_stop(rb_hammer); From edede7a6dcd7435395cf757d053974aaab6ab1c2 Mon Sep 17 00:00:00 2001 From: Ye Weihua Date: Mon, 18 Aug 2025 07:33:32 +0000 Subject: [PATCH 0616/1307] trace/fgraph: Fix the warning caused by missing unregister notifier This warning was triggered during testing on v6.16: notifier callback ftrace_suspend_notifier_call already registered WARNING: CPU: 2 PID: 86 at kernel/notifier.c:23 notifier_chain_register+0x44/0xb0 ... Call Trace: blocking_notifier_chain_register+0x34/0x60 register_ftrace_graph+0x330/0x410 ftrace_profile_write+0x1e9/0x340 vfs_write+0xf8/0x420 ? filp_flush+0x8a/0xa0 ? filp_close+0x1f/0x30 ? do_dup2+0xaf/0x160 ksys_write+0x65/0xe0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f When writing to the function_profile_enabled interface, the notifier was not unregistered after start_graph_tracing failed, causing a warning the next time function_profile_enabled was written. Fixed by adding unregister_pm_notifier in the exception path. Link: https://lore.kernel.org/20250818073332.3890629-1-yeweihua4@huawei.com Fixes: 4a2b8dda3f870 ("tracing/function-graph-tracer: fix a regression while suspend to disk") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Ye Weihua Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index f4d200f0c610..2a42c1036ea8 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1397,6 +1397,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) ftrace_graph_active--; gops->saved_func = NULL; fgraph_lru_release_index(i); + unregister_pm_notifier(&ftrace_suspend_notifier); } return ret; } From 8151320c747efb22d30b035af989fed0d502176e Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 22 Jul 2025 22:32:33 +0800 Subject: [PATCH 0617/1307] ACPI: pfr_update: Fix the driver update version check The security-version-number check should be used rather than the runtime version check for driver updates. Otherwise, the firmware update would fail when the update binary had a lower runtime version number than the current one. Fixes: 0db89fa243e5 ("ACPI: Introduce Platform Firmware Runtime Update device driver") Cc: 5.17+ # 5.17+ Reported-by: "Govindarajulu, Hariganesh" Signed-off-by: Chen Yu Link: https://patch.msgid.link/20250722143233.3970607-1-yu.c.chen@intel.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pfr_update.c | 2 +- include/uapi/linux/pfrut.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/pfr_update.c b/drivers/acpi/pfr_update.c index 318683744ed1..11b1c2828005 100644 --- a/drivers/acpi/pfr_update.c +++ b/drivers/acpi/pfr_update.c @@ -329,7 +329,7 @@ static bool applicable_image(const void *data, struct pfru_update_cap_info *cap, if (type == PFRU_CODE_INJECT_TYPE) return payload_hdr->rt_ver >= cap->code_rt_version; - return payload_hdr->rt_ver >= cap->drv_rt_version; + return payload_hdr->svn_ver >= cap->drv_svn; } static void print_update_debug_info(struct pfru_updated_result *result, diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h index 42fa15f8310d..b77d5c210c26 100644 --- a/include/uapi/linux/pfrut.h +++ b/include/uapi/linux/pfrut.h @@ -89,6 +89,7 @@ struct pfru_payload_hdr { __u32 hw_ver; __u32 rt_ver; __u8 platform_id[16]; + __u32 svn_ver; }; enum pfru_dsm_status { From 4647c4deadcc17f40858be06bcf416369a8f1d57 Mon Sep 17 00:00:00 2001 From: Pratyush Brahma Date: Wed, 20 Aug 2025 06:29:34 +0530 Subject: [PATCH 0618/1307] mm: numa,memblock: Use SZ_1M macro to denote bytes to MB conversion Replace the manual bitwise conversion of bytes to MB with SZ_1M macro, a standard macro used within the mm subsystem, to improve readability. Signed-off-by: Pratyush Brahma Link: https://lore.kernel.org/r/20250820-numa-memblks-refac-v2-1-43bf1af02acd@oss.qualcomm.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 4 ++-- mm/numa_emulation.c | 4 ++-- mm/numa_memblks.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index 154f1d73b61f..8a0ed3074af4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -780,9 +780,9 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt } if ((nr_pages << PAGE_SHIFT) > threshold_bytes) { - mem_size_mb = memblock_phys_mem_size() >> 20; + mem_size_mb = memblock_phys_mem_size() / SZ_1M; pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", - (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); + (nr_pages << PAGE_SHIFT) / SZ_1M, mem_size_mb); return false; } diff --git a/mm/numa_emulation.c b/mm/numa_emulation.c index 9d55679d99ce..703c8fa05048 100644 --- a/mm/numa_emulation.c +++ b/mm/numa_emulation.c @@ -73,7 +73,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, } printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n", - nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20); + nid, eb->start, eb->end - 1, (eb->end - eb->start) / SZ_1M); return 0; } @@ -264,7 +264,7 @@ static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei, min_size = ALIGN(max(min_size, FAKE_NODE_MIN_SIZE), FAKE_NODE_MIN_SIZE); if (size < min_size) { pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", - size >> 20, min_size >> 20); + size / SZ_1M, min_size / SZ_1M); size = min_size; } size = ALIGN_DOWN(size, FAKE_NODE_MIN_SIZE); diff --git a/mm/numa_memblks.c b/mm/numa_memblks.c index de626525a87c..5b009a9cd8b4 100644 --- a/mm/numa_memblks.c +++ b/mm/numa_memblks.c @@ -427,9 +427,9 @@ static int __init numa_register_meminfo(struct numa_meminfo *mi) unsigned long pfn_align = node_map_pfn_alignment(); if (pfn_align && pfn_align < PAGES_PER_SECTION) { - unsigned long node_align_mb = PFN_PHYS(pfn_align) >> 20; + unsigned long node_align_mb = PFN_PHYS(pfn_align) / SZ_1M; - unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) >> 20; + unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) / SZ_1M; pr_warn("Node alignment %luMB < min %luMB, rejecting NUMA config\n", node_align_mb, sect_align_mb); From efdaa61d73a1deb066ccc3b4d56257cc63ab5be9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 15 Aug 2025 09:40:52 +0200 Subject: [PATCH 0619/1307] drivers/xen/xenbus: remove quirk for Xen 3.x The kernel is not supported to run as a Xen guest on Xen versions older than 4.0. Remove xen_strict_xenbus_quirk() which is testing the Xen version to be at least 4.0. Acked-by: Stefano Stabellini Reviewed-by: Jason Andryuk Signed-off-by: Juergen Gross Message-ID: <20250815074052.13792-1-jgross@suse.com> --- drivers/xen/xenbus/xenbus_xs.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3c9da446b85d..528682bf0c7f 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -718,26 +718,6 @@ int xs_watch_msg(struct xs_watch_event *event) return 0; } -/* - * Certain older XenBus toolstack cannot handle reading values that are - * not populated. Some Xen 3.4 installation are incapable of doing this - * so if we are running on anything older than 4 do not attempt to read - * control/platform-feature-xs_reset_watches. - */ -static bool xen_strict_xenbus_quirk(void) -{ -#ifdef CONFIG_X86 - uint32_t eax, ebx, ecx, edx, base; - - base = xen_cpuid_base(); - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - if ((eax >> 16) < 4) - return true; -#endif - return false; - -} static void xs_reset_watches(void) { int err; @@ -745,9 +725,6 @@ static void xs_reset_watches(void) if (!xen_hvm_domain() || xen_initial_domain()) return; - if (xen_strict_xenbus_quirk()) - return; - if (!xenbus_read_unsigned("control", "platform-feature-xs_reset_watches", 0)) return; From a47bc954cf0eb51f2828e1607d169d487df7f11f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 20 Aug 2025 22:23:15 +0800 Subject: [PATCH 0620/1307] objtool/LoongArch: Get table size correctly if LTO is enabled When compiling with LLVM and CONFIG_LTO_CLANG is set, there exist many objtool warnings "sibling call from callable instruction with modified stack frame". For this special case, the related object file shows that there is no generated relocation section '.rela.discard.tablejump_annotate' for the table jump instruction jirl, thus objtool can not know that what is the actual destination address. It needs to do something on the LLVM side to make sure that there is the relocation section '.rela.discard.tablejump_annotate' if LTO is enabled, but in order to maintain compatibility for the current LLVM compiler, this can be done in the kernel Makefile for now. Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' needs to be passed via '-mllvm' to ld.lld. Before doing the above changes, it should handle the special case of the relocation section '.rela.discard.tablejump_annotate' to get the correct table size first, otherwise there are many objtool warnings and errors if LTO is enabled. There are many different rodata for each function if LTO is enabled, it is necessary to enhance get_rodata_table_size_by_table_annotate(). Fixes: b95f852d3af2 ("objtool/LoongArch: Add support for switch table") Closes: https://lore.kernel.org/loongarch/20250731175655.GA1455142@ax162/ Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- tools/objtool/arch/loongarch/special.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c index e39f86d97002..a80b75f7b061 100644 --- a/tools/objtool/arch/loongarch/special.c +++ b/tools/objtool/arch/loongarch/special.c @@ -27,6 +27,7 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, struct table_info *next_table; unsigned long tmp_insn_offset; unsigned long tmp_rodata_offset; + bool is_valid_list = false; rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate"); if (!rsec) @@ -35,6 +36,12 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, INIT_LIST_HEAD(&table_list); for_each_reloc(rsec, reloc) { + if (reloc->sym->sec->rodata) + continue; + + if (strcmp(insn->sec->name, reloc->sym->sec->name)) + continue; + orig_table = malloc(sizeof(struct table_info)); if (!orig_table) { WARN("malloc failed"); @@ -49,6 +56,22 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, if (reloc_idx(reloc) + 1 == sec_num_entries(rsec)) break; + + if (strcmp(insn->sec->name, (reloc + 1)->sym->sec->name)) { + list_for_each_entry(orig_table, &table_list, jump_info) { + if (orig_table->insn_offset == insn->offset) { + is_valid_list = true; + break; + } + } + + if (!is_valid_list) { + list_del_init(&table_list); + continue; + } + + break; + } } list_for_each_entry(orig_table, &table_list, jump_info) { From 5dfea6644d201bfeffaa7e0d79d62309856613b7 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 20 Aug 2025 22:23:15 +0800 Subject: [PATCH 0621/1307] LoongArch: Pass annotate-tablejump option if LTO is enabled When compiling with LLVM and CONFIG_LTO_CLANG is set, there exist many objtool warnings "sibling call from callable instruction with modified stack frame". For this special case, the related object file shows that there is no generated relocation section '.rela.discard.tablejump_annotate' for the table jump instruction jirl, thus objtool can not know that what is the actual destination address. It needs to do something on the LLVM side to make sure that there is the relocation section '.rela.discard.tablejump_annotate' if LTO is enabled, but in order to maintain compatibility for the current LLVM compiler, this can be done in the kernel Makefile for now. Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' needs to be passed via '-mllvm' to ld.lld. Note that it should also pass the compiler option -mannotate-tablejump rather than only pass '-mllvm --loongarch-annotate-tablejump' to ld.lld if LTO is enabled, otherwise there are no jump info for some table jump instructions. Fixes: e20ab7d454ee ("LoongArch: Enable jump table for objtool") Closes: https://lore.kernel.org/loongarch/20250731175655.GA1455142@ax162/ Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Co-developed-by: WANG Rui Signed-off-by: WANG Rui Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index b0703a4e02a2..a3a9759414f4 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -102,7 +102,13 @@ KBUILD_CFLAGS += $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma) ifdef CONFIG_OBJTOOL ifdef CONFIG_CC_HAS_ANNOTATE_TABLEJUMP +# The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. +# Ensure it is aware of linker with LTO, '--loongarch-annotate-tablejump' also needs to +# be passed via '-mllvm' to ld.lld. KBUILD_CFLAGS += -mannotate-tablejump +ifdef CONFIG_LTO_CLANG +KBUILD_LDFLAGS += -mllvm --loongarch-annotate-tablejump +endif else KBUILD_CFLAGS += -fno-jump-tables # keep compatibility with older compilers endif From f7794a4d92ade518c813de69a01b27ca6d8d86f3 Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Wed, 20 Aug 2025 22:23:16 +0800 Subject: [PATCH 0622/1307] LoongArch: Increase COMMAND_LINE_SIZE up to 4096 The default COMMAND_LINE_SIZE of 512, inherited from asm-generic, is too small for modern use cases. For example, kdump configurations or extensive debugging parameters can easily exceed this limit. Therefore, increase the command line size to 4096 bytes, aligning LoongArch with the MIPS architecture. This change follows a broader trend among architectures to raise this limit to support modern needs; for instance, PowerPC increased its value for similar reasons in the commit a5980d064fe2 ("powerpc: Bump COMMAND_LINE_SIZE to 2048"). Similar to the change made for RISC-V in the commit 61fc1ee8be26 ("riscv: Bump COMMAND_LINE_SIZE value to 1024"), this is considered a safe change. The broader kernel community has reached a consensus that modifying COMMAND_LINE_SIZE from UAPI headers does not constitute a uABI breakage, as well-behaved userspace applications should not rely on this macro. Suggested-by: Huang Cun Signed-off-by: Ming Wang Signed-off-by: Huacai Chen --- arch/loongarch/include/uapi/asm/setup.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 arch/loongarch/include/uapi/asm/setup.h diff --git a/arch/loongarch/include/uapi/asm/setup.h b/arch/loongarch/include/uapi/asm/setup.h new file mode 100644 index 000000000000..d46363ce3e02 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/setup.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_ASM_LOONGARCH_SETUP_H +#define _UAPI_ASM_LOONGARCH_SETUP_H + +#define COMMAND_LINE_SIZE 4096 + +#endif /* _UAPI_ASM_LOONGARCH_SETUP_H */ From 8ef7f3132e4005a103b382e71abea7ad01fbeb86 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: [PATCH 0623/1307] LoongArch: Add cpuhotplug hooks to fix high cpu usage of vCPU threads When the CPU is offline, the timer of LoongArch is not correctly closed. This is harmless for real machines, but resulting in an excessively high cpu usage rate of the offline vCPU thread in the virtual machines. To correctly close the timer, we have made the following modifications: Register the cpu hotplug event (CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING) for LoongArch. This event's hooks will be called to close the timer when the CPU is offline. Clear the timer interrupt when the timer is turned off. Since before the timer is turned off, there may be a timer interrupt that has already been in the pending state due to the interruption of the disabled, which also affects the halt state of the offline vCPU. Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 22 ++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 2 files changed, 23 insertions(+) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 367906b10f81..f3092f2de8b5 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -5,6 +5,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include #include @@ -102,6 +103,23 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev return 0; } +static int arch_timer_starting(unsigned int cpu) +{ + set_csr_ecfg(ECFGF_TIMER); + + return 0; +} + +static int arch_timer_dying(unsigned int cpu) +{ + constant_set_state_shutdown(this_cpu_ptr(&constant_clockevent_device)); + + /* Clear Timer Interrupt */ + write_csr_tintclear(CSR_TINTCLR_TI); + + return 0; +} + static unsigned long get_loops_per_jiffy(void) { unsigned long lpj = (unsigned long)const_clock_freq; @@ -172,6 +190,10 @@ int constant_clockevent_init(void) lpj_fine = get_loops_per_jiffy(); pr_info("Constant clock event device register\n"); + cpuhp_setup_state(CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, + "clockevents/loongarch/timer:starting", + arch_timer_starting, arch_timer_dying); + return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index edfa61d80702..62cd7b35a29c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -168,6 +168,7 @@ enum cpuhp_state { CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_TEGRA_TIMER_STARTING, CPUHP_AP_ARMADA_TIMER_STARTING, + CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_REALTEK_TIMER_STARTING, From 63dbd8fb2af3a89466538599a9acb2d11ef65c06 Mon Sep 17 00:00:00 2001 From: Kanglong Wang Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: [PATCH 0624/1307] LoongArch: Optimize module load time by optimizing PLT/GOT counting When enabling CONFIG_KASAN, CONFIG_PREEMPT_VOLUNTARY_BUILD and CONFIG_PREEMPT_VOLUNTARY at the same time, there will be soft deadlock, the relevant logs are as follows: rcu: INFO: rcu_sched self-detected stall on CPU ... Call Trace: [<900000000024f9e4>] show_stack+0x5c/0x180 [<90000000002482f4>] dump_stack_lvl+0x94/0xbc [<9000000000224544>] rcu_dump_cpu_stacks+0x1fc/0x280 [<900000000037ac80>] rcu_sched_clock_irq+0x720/0xf88 [<9000000000396c34>] update_process_times+0xb4/0x150 [<90000000003b2474>] tick_nohz_handler+0xf4/0x250 [<9000000000397e28>] __hrtimer_run_queues+0x1d0/0x428 [<9000000000399b2c>] hrtimer_interrupt+0x214/0x538 [<9000000000253634>] constant_timer_interrupt+0x64/0x80 [<9000000000349938>] __handle_irq_event_percpu+0x78/0x1a0 [<9000000000349a78>] handle_irq_event_percpu+0x18/0x88 [<9000000000354c00>] handle_percpu_irq+0x90/0xf0 [<9000000000348c74>] handle_irq_desc+0x94/0xb8 [<9000000001012b28>] handle_cpu_irq+0x68/0xa0 [<9000000001def8c0>] handle_loongarch_irq+0x30/0x48 [<9000000001def958>] do_vint+0x80/0xd0 [<9000000000268a0c>] kasan_mem_to_shadow.part.0+0x2c/0x2a0 [<90000000006344f4>] __asan_load8+0x4c/0x120 [<900000000025c0d0>] module_frob_arch_sections+0x5c8/0x6b8 [<90000000003895f0>] load_module+0x9e0/0x2958 [<900000000038b770>] __do_sys_init_module+0x208/0x2d0 [<9000000001df0c34>] do_syscall+0x94/0x190 [<900000000024d6fc>] handle_syscall+0xbc/0x158 After analysis, this is because the slow speed of loading the amdgpu module leads to the long time occupation of the cpu and then the soft deadlock. When loading a module, module_frob_arch_sections() tries to figure out the number of PLTs/GOTs that will be needed to handle all the RELAs. It will call the count_max_entries() to find in an out-of-order date which counting algorithm has O(n^2) complexity. To make it faster, we sort the relocation list by info and addend. That way, to check for a duplicate relocation, it just needs to compare with the previous entry. This reduces the complexity of the algorithm to O(n log n), as done in commit d4e0340919fb ("arm64/module: Optimize module load time by optimizing PLT counting"). This gives sinificant reduction in module load time for modules with large number of relocations. After applying this patch, the soft deadlock problem has been solved, and the kernel starts normally without "Call Trace". Using the default configuration to test some modules, the results are as follows: Module Size ip_tables 36K fat 143K radeon 2.5MB amdgpu 16MB Without this patch: Module Module load time (ms) Count(PLTs/GOTs) ip_tables 18 59/6 fat 0 162/14 radeon 54 1221/84 amdgpu 1411 4525/1098 With this patch: Module Module load time (ms) Count(PLTs/GOTs) ip_tables 18 59/6 fat 0 162/14 radeon 22 1221/84 amdgpu 45 4525/1098 Fixes: fcdfe9d22bed ("LoongArch: Add ELF and module support") Signed-off-by: Kanglong Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/module-sections.c | 38 ++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index e2f30ff9afde..a43ba7f9f987 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -8,6 +8,7 @@ #include #include #include +#include Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { @@ -61,39 +62,38 @@ Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr v return (Elf_Addr)&plt[nr]; } -static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y) +#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) + +static int compare_rela(const void *x, const void *y) { - return x->r_info == y->r_info && x->r_addend == y->r_addend; -} + int ret; + const Elf_Rela *rela_x = x, *rela_y = y; -static bool duplicate_rela(const Elf_Rela *rela, int idx) -{ - int i; + ret = cmp_3way(rela_x->r_info, rela_y->r_info); + if (ret == 0) + ret = cmp_3way(rela_x->r_addend, rela_y->r_addend); - for (i = 0; i < idx; i++) { - if (is_rela_equal(&rela[i], &rela[idx])) - return true; - } - - return false; + return ret; } static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts, unsigned int *gots) { - unsigned int i, type; + unsigned int i; + + sort(relas, num, sizeof(Elf_Rela), compare_rela, NULL); for (i = 0; i < num; i++) { - type = ELF_R_TYPE(relas[i].r_info); - switch (type) { + if (i && !compare_rela(&relas[i-1], &relas[i])) + continue; + + switch (ELF_R_TYPE(relas[i].r_info)) { case R_LARCH_SOP_PUSH_PLT_PCREL: case R_LARCH_B26: - if (!duplicate_rela(relas, i)) - (*plts)++; + (*plts)++; break; case R_LARCH_GOT_PC_HI20: - if (!duplicate_rela(relas, i)) - (*gots)++; + (*gots)++; break; default: break; /* Do nothing. */ From 112ca94f6c3b3e0b2002a240de43c487a33e0234 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: [PATCH 0625/1307] LoongArch: Save LBT before FPU in setup_sigcontext() Now if preemption happens between protected_save_fpu_context() and protected_save_lbt_context(), FTOP context is lost. Because FTOP is saved by protected_save_lbt_context() but protected_save_fpu_context() disables TM before that. So save LBT before FPU in setup_sigcontext() to avoid this potential risk. Signed-off-by: Hanlu Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/signal.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 4740cb5b2388..c9f7ca778364 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -677,6 +677,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, for (i = 1; i < 32; i++) err |= __put_user(regs->regs[i], &sc->sc_regs[i]); +#ifdef CONFIG_CPU_HAS_LBT + if (extctx->lbt.addr) + err |= protected_save_lbt_context(extctx); +#endif + if (extctx->lasx.addr) err |= protected_save_lasx_context(extctx); else if (extctx->lsx.addr) @@ -684,11 +689,6 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); -#ifdef CONFIG_CPU_HAS_LBT - if (extctx->lbt.addr) - err |= protected_save_lbt_context(extctx); -#endif - /* Set the "end" magic */ info = (struct sctx_info *)extctx->end.addr; err |= __put_user(0, &info->magic); From 0078e94a4733454d1ffa3888afe88bf19c81b91c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: [PATCH 0626/1307] LoongArch: Rename GCC_PLUGIN_STACKLEAK to KSTACK_ERASE Commit 57fbad15c2eee772 ("stackleak: Rename STACKLEAK to KSTACK_ERASE") misses the stackframe.h part for LoongArch, so fix it. Fixes: 57fbad15c2eee772 ("stackleak: Rename STACKLEAK to KSTACK_ERASE") Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/stackframe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 3eda298702b1..5cb568a60cf8 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -58,7 +58,7 @@ .endm .macro STACKLEAK_ERASE -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE bl stackleak_erase_on_task_stack #endif .endm From f135fb24ef29335b94921077588cae445bc7f099 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 20 Aug 2025 15:22:00 +0100 Subject: [PATCH 0627/1307] ASoC: cs35l56: Update Firmware Addresses for CS35L63 for production silicon Production silicon for CS36L63 has some small differences compared to pre-production silicon. Update firmware addresses, which are different. No product was ever released with pre-production silicon so there is no need for the driver to include support for it. Fixes: 978858791ced ("ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire") Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250820142209.127575-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index e17c4cadd04d..f44aabde805e 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -107,8 +107,8 @@ #define CS35L56_DSP1_PMEM_5114 0x3804FE8 #define CS35L63_DSP1_FW_VER CS35L56_DSP1_FW_VER -#define CS35L63_DSP1_HALO_STATE 0x280396C -#define CS35L63_DSP1_PM_CUR_STATE 0x28042C8 +#define CS35L63_DSP1_HALO_STATE 0x2803C04 +#define CS35L63_DSP1_PM_CUR_STATE 0x2804518 #define CS35L63_PROTECTION_STATUS 0x340009C #define CS35L63_TRANSDUCER_ACTUAL_PS 0x34000F4 #define CS35L63_MAIN_RENDER_USER_MUTE 0x3400020 From 8dadc11b67d4b83deff45e4889b3b5540b9c0a7f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 20 Aug 2025 15:22:01 +0100 Subject: [PATCH 0628/1307] ASoC: cs35l56: Handle new algorithms IDs for CS35L63 CS35L63 uses different algorithm IDs from CS35L56. Add a new mechanism to handle different alg IDs between parts in the CS35L56 driver. Fixes: 978858791ced ("ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire") Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250820142209.127575-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + sound/soc/codecs/cs35l56-shared.c | 29 ++++++++++++++++++++++++++--- sound/soc/codecs/cs35l56.c | 2 +- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index f44aabde805e..7c8bbe8ad1e2 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -306,6 +306,7 @@ struct cs35l56_base { struct gpio_desc *reset_gpio; struct cs35l56_spi_payload *spi_payload_buf; const struct cs35l56_fw_reg *fw_reg; + const struct cirrus_amp_cal_controls *calibration_controls; }; static inline bool cs35l56_is_otp_register(unsigned int reg) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index ba653f6ccfae..850fcf385996 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -838,6 +838,15 @@ const struct cirrus_amp_cal_controls cs35l56_calibration_controls = { }; EXPORT_SYMBOL_NS_GPL(cs35l56_calibration_controls, "SND_SOC_CS35L56_SHARED"); +static const struct cirrus_amp_cal_controls cs35l63_calibration_controls = { + .alg_id = 0xbf210, + .mem_region = WMFW_ADSP2_YM, + .ambient = "CAL_AMBIENT", + .calr = "CAL_R", + .status = "CAL_STATUS", + .checksum = "CAL_CHECKSUM", +}; + int cs35l56_get_calibration(struct cs35l56_base *cs35l56_base) { u64 silicon_uid = 0; @@ -912,19 +921,31 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_read_prot_status, "SND_SOC_CS35L56_SHARED"); void cs35l56_log_tuning(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp) { __be32 pid, sid, tid; + unsigned int alg_id; int ret; + switch (cs35l56_base->type) { + case 0x54: + case 0x56: + case 0x57: + alg_id = 0x9f212; + break; + default: + alg_id = 0xbf212; + break; + } + scoped_guard(mutex, &cs_dsp->pwr_lock) { ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_PRJCT_ID", - WMFW_ADSP2_XM, 0x9f212), + WMFW_ADSP2_XM, alg_id), 0, &pid, sizeof(pid)); if (!ret) ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_CHNNL_ID", - WMFW_ADSP2_XM, 0x9f212), + WMFW_ADSP2_XM, alg_id), 0, &sid, sizeof(sid)); if (!ret) ret = cs_dsp_coeff_read_ctrl(cs_dsp_get_ctl(cs_dsp, "AS_SNPSHT_ID", - WMFW_ADSP2_XM, 0x9f212), + WMFW_ADSP2_XM, alg_id), 0, &tid, sizeof(tid)); } @@ -974,8 +995,10 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) case 0x35A54: case 0x35A56: case 0x35A57: + cs35l56_base->calibration_controls = &cs35l56_calibration_controls; break; case 0x35A630: + cs35l56_base->calibration_controls = &cs35l63_calibration_controls; devid = devid >> 4; break; default: diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index b1c65d8331e7..2c1edbd636ef 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -695,7 +695,7 @@ static int cs35l56_write_cal(struct cs35l56_private *cs35l56) return ret; ret = cs_amp_write_cal_coeffs(&cs35l56->dsp.cs_dsp, - &cs35l56_calibration_controls, + cs35l56->base.calibration_controls, &cs35l56->base.cal_data); wm_adsp_stop(&cs35l56->dsp); From 8d13d1bdb59d0a2c526869ee571ec51a3a887463 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 20 Aug 2025 15:22:02 +0100 Subject: [PATCH 0629/1307] ASoC: cs35l56: Remove SoundWire Clock Divider workaround for CS35L63 Production silicon for CS36L63 has some small differences compared to pre-production silicon. Remove soundwire clock workaround as no longer necessary. We don't want to do tricks with low-level clocking controls if we don't need to. Fixes: 978858791ced ("ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire") Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250820142209.127575-4-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-sdw.c | 69 ---------------------------------- sound/soc/codecs/cs35l56.h | 3 -- 2 files changed, 72 deletions(-) diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index ee14031695a1..3905c9cb188a 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -393,74 +393,6 @@ static int cs35l56_sdw_update_status(struct sdw_slave *peripheral, return 0; } -static int cs35l63_sdw_kick_divider(struct cs35l56_private *cs35l56, - struct sdw_slave *peripheral) -{ - unsigned int curr_scale_reg, next_scale_reg; - int curr_scale, next_scale, ret; - - if (!cs35l56->base.init_done) - return 0; - - if (peripheral->bus->params.curr_bank) { - curr_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B1; - next_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B0; - } else { - curr_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B0; - next_scale_reg = SDW_SCP_BUSCLOCK_SCALE_B1; - } - - /* - * Current clock scale value must be different to new value. - * Modify current to guarantee this. If next still has the dummy - * value we wrote when it was current, the core code has not set - * a new scale so restore its original good value - */ - curr_scale = sdw_read_no_pm(peripheral, curr_scale_reg); - if (curr_scale < 0) { - dev_err(cs35l56->base.dev, "Failed to read current clock scale: %d\n", curr_scale); - return curr_scale; - } - - next_scale = sdw_read_no_pm(peripheral, next_scale_reg); - if (next_scale < 0) { - dev_err(cs35l56->base.dev, "Failed to read next clock scale: %d\n", next_scale); - return next_scale; - } - - if (next_scale == CS35L56_SDW_INVALID_BUS_SCALE) { - next_scale = cs35l56->old_sdw_clock_scale; - ret = sdw_write_no_pm(peripheral, next_scale_reg, next_scale); - if (ret < 0) { - dev_err(cs35l56->base.dev, "Failed to modify current clock scale: %d\n", - ret); - return ret; - } - } - - cs35l56->old_sdw_clock_scale = curr_scale; - ret = sdw_write_no_pm(peripheral, curr_scale_reg, CS35L56_SDW_INVALID_BUS_SCALE); - if (ret < 0) { - dev_err(cs35l56->base.dev, "Failed to modify current clock scale: %d\n", ret); - return ret; - } - - dev_dbg(cs35l56->base.dev, "Next bus scale: %#x\n", next_scale); - - return 0; -} - -static int cs35l56_sdw_bus_config(struct sdw_slave *peripheral, - struct sdw_bus_params *params) -{ - struct cs35l56_private *cs35l56 = dev_get_drvdata(&peripheral->dev); - - if ((cs35l56->base.type == 0x63) && (cs35l56->base.rev < 0xa1)) - return cs35l63_sdw_kick_divider(cs35l56, peripheral); - - return 0; -} - static int __maybe_unused cs35l56_sdw_clk_stop(struct sdw_slave *peripheral, enum sdw_clk_stop_mode mode, enum sdw_clk_stop_type type) @@ -476,7 +408,6 @@ static const struct sdw_slave_ops cs35l56_sdw_ops = { .read_prop = cs35l56_sdw_read_prop, .interrupt_callback = cs35l56_sdw_interrupt, .update_status = cs35l56_sdw_update_status, - .bus_config = cs35l56_sdw_bus_config, #ifdef DEBUG .clk_stop = cs35l56_sdw_clk_stop, #endif diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index bd77a57249d7..40a1800a4585 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -20,8 +20,6 @@ #define CS35L56_SDW_GEN_INT_MASK_1 0xc1 #define CS35L56_SDW_INT_MASK_CODEC_IRQ BIT(0) -#define CS35L56_SDW_INVALID_BUS_SCALE 0xf - #define CS35L56_RX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE) #define CS35L56_TX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE \ | SNDRV_PCM_FMTBIT_S32_LE) @@ -52,7 +50,6 @@ struct cs35l56_private { u8 asp_slot_count; bool tdm_mode; bool sysclk_set; - u8 old_sdw_clock_scale; u8 sdw_link_num; u8 sdw_unique_id; }; From eb173ce0e23502e397eae75453936b3ecfb1fd84 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Aug 2025 10:48:52 +0200 Subject: [PATCH 0630/1307] s390/configs: Update defconfigs The usual defconfig updates. Notable changes: - Enable ZONE_DEVICE, and with that DEV_DAX, FS_DAX, and FUSE_DAX - Enable CRYPTO_SELFTESTS_FULL for debug_defconfig - Enable CRASH_DM_CRYPT - Disable legacy IP_NF_FILTER Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/configs/debug_defconfig | 33 ++++++++++++++-------------- arch/s390/configs/defconfig | 32 ++++++++++++--------------- arch/s390/configs/zfcpdump_defconfig | 1 + 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6b33429f1c4d..5e616bc988ac 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -5,6 +5,7 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y @@ -19,6 +20,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_SCHED_PROXY_EXEC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y @@ -42,6 +44,7 @@ CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DM_CRYPT=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 @@ -105,6 +108,7 @@ CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_ZONE_DEVICE=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y CONFIG_ANON_VMA_NAME=y @@ -223,17 +227,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_CT=m CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m @@ -248,6 +254,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m CONFIG_NETFILTER_XT_MATCH_CONNMARK=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m @@ -318,16 +325,8 @@ CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -340,15 +339,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_IP_SCTP=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -383,6 +376,7 @@ CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m @@ -504,6 +498,7 @@ CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_OVPN=m CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m @@ -641,6 +636,7 @@ CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m CONFIG_VHOST_VDPA=m +CONFIG_DEV_DAX=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -665,6 +661,7 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -755,6 +752,8 @@ CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_SELFTESTS=y +CONFIG_CRYPTO_SELFTESTS_FULL=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_BENCHMARK=m @@ -783,7 +782,6 @@ CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m @@ -822,6 +820,7 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CRYPTO_KRB5=m CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m +CONFIG_TRACE_MMIO_ACCESS=y CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b75eb2775850..28f025051cdf 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -4,6 +4,7 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y @@ -17,6 +18,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_SCHED_PROXY_EXEC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y @@ -40,6 +42,7 @@ CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DM_CRYPT=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 @@ -97,6 +100,7 @@ CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_ZONE_DEVICE=y CONFIG_PERCPU_STATS=y CONFIG_ANON_VMA_NAME=y CONFIG_USERFAULTFD=y @@ -214,17 +218,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_CT=m CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m @@ -239,6 +245,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m CONFIG_NETFILTER_XT_MATCH_CONNMARK=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m @@ -309,16 +316,8 @@ CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -331,15 +330,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_IP_SCTP=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -373,6 +366,7 @@ CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m @@ -494,6 +488,7 @@ CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_OVPN=m CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m @@ -631,6 +626,7 @@ CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m CONFIG_VHOST_VDPA=m +CONFIG_DEV_DAX=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -652,6 +648,7 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -683,7 +680,6 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y CONFIG_TMPFS_QUOTA=y CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m @@ -741,6 +737,7 @@ CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_SELFTESTS=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_BENCHMARK=m @@ -769,7 +766,6 @@ CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 8163c1702720..23dd55dc41a3 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,5 +1,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set From fcc43a7e294f877021c4fa71276920f543e8e298 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Aug 2025 10:48:53 +0200 Subject: [PATCH 0631/1307] s390/configs: Set HZ=1000 Similar to powerpc set HZ to 1000. See also commit a206d2334012 ("powerpc/defconfigs: Set HZ=1000 on ppc64 and powernv defconfigs"). Besides other this will reduce the latency seen with synchronize_rcu(). Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/configs/defconfig | 2 +- arch/s390/configs/zfcpdump_defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 28f025051cdf..094599cdaf4d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -47,7 +47,7 @@ CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y -CONFIG_HZ_100=y +CONFIG_HZ_1000=y CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 23dd55dc41a3..ed0b137353ad 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -12,7 +12,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_KEXEC=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=2 -CONFIG_HZ_100=y +CONFIG_HZ_1000=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set # CONFIG_AP is not set From 430fa71027b6ac9bb0ce5532b8d0676777d4219a Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 18 Aug 2025 12:21:52 +0200 Subject: [PATCH 0632/1307] s390/sclp: Fix SCCB present check Tracing code called by the SCLP interrupt handler contains early exits if the SCCB address associated with an interrupt is NULL. This check is performed after physical to virtual address translation. If the kernel identity mapping does not start at address zero, the resulting virtual address is never zero, so that the NULL checks won't work. Subsequently this may result in incorrect accesses to the first page of the identity mapping. Fix this by introducing a function that handles the NULL case before address translation. Fixes: ada1da31ce34 ("s390/sclp: sort out physical vs virtual pointers usage") Cc: stable@vger.kernel.org Reviewed-by: Alexander Gordeev Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- drivers/s390/char/sclp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index f2e42c1d51aa..98e334724a62 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -77,6 +77,13 @@ unsigned long sclp_console_full; /* The currently active SCLP command word. */ static sclp_cmdw_t active_cmd; +static inline struct sccb_header *sclpint_to_sccb(u32 sccb_int) +{ + if (sccb_int) + return __va(sccb_int); + return NULL; +} + static inline void sclp_trace(int prio, char *id, u32 a, u64 b, bool err) { struct sclp_trace_entry e; @@ -620,7 +627,7 @@ __sclp_find_req(u32 sccb) static bool ok_response(u32 sccb_int, sclp_cmdw_t cmd) { - struct sccb_header *sccb = (struct sccb_header *)__va(sccb_int); + struct sccb_header *sccb = sclpint_to_sccb(sccb_int); struct evbuf_header *evbuf; u16 response; @@ -659,7 +666,7 @@ static void sclp_interrupt_handler(struct ext_code ext_code, /* INT: Interrupt received (a=intparm, b=cmd) */ sclp_trace_sccb(0, "INT", param32, active_cmd, active_cmd, - (struct sccb_header *)__va(finished_sccb), + sclpint_to_sccb(finished_sccb), !ok_response(finished_sccb, active_cmd)); if (finished_sccb) { From 93f616ff870a1fb7e84d472cad0af651b18f9f87 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 7 Aug 2025 17:04:27 +0200 Subject: [PATCH 0633/1307] s390/mm: Do not map lowcore with identity mapping Since the identity mapping is pinned to address zero the lowcore is always also mapped to address zero, this happens regardless of the relocate_lowcore command line option. If the option is specified the lowcore is mapped twice, instead of only once. This means that NULL pointer accesses will succeed instead of causing an exception (low address protection still applies, but covers only parts). To fix this never map the first two pages of physical memory with the identity mapping. Fixes: 32db401965f1 ("s390/mm: Pin identity mapping base to zero") Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/vmem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 1d073acd05a7..cea3de4dce8c 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -530,6 +530,9 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l lowcore_address + sizeof(struct lowcore), POPULATE_LOWCORE); for_each_physmem_usable_range(i, &start, &end) { + /* Do not map lowcore with identity mapping */ + if (!start) + start = sizeof(struct lowcore); pgtable_populate((unsigned long)__identity_va(start), (unsigned long)__identity_va(end), POPULATE_IDENTITY); From ec879e1a0be8007aa232ffedcf6a6445dfc1a3d7 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 16 Aug 2025 23:10:51 +0900 Subject: [PATCH 0634/1307] tracing: fprobe-event: Sanitize wildcard for fprobe event name Fprobe event accepts wildcards for the target functions, but unless user specifies its event name, it makes an event with the wildcards. /sys/kernel/tracing # echo 'f mutex*' >> dynamic_events /sys/kernel/tracing # cat dynamic_events f:fprobes/mutex*__entry mutex* /sys/kernel/tracing # ls events/fprobes/ enable filter mutex*__entry To fix this, replace the wildcard ('*') with an underscore. Link: https://lore.kernel.org/all/175535345114.282990.12294108192847938710.stgit@devnote2/ Fixes: 334e5519c375 ("tracing/probes: Add fprobe events for tracing function entry and exit.") Signed-off-by: Masami Hiramatsu (Google) Cc: stable@vger.kernel.org --- kernel/trace/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1dbf1d3cf2f1..5a6688832da8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2204,7 +2204,7 @@ static inline bool is_good_system_name(const char *name) static inline void sanitize_event_name(char *name) { while (*name++ != '\0') - if (*name == ':' || *name == '.') + if (*name == ':' || *name == '.' || *name == '*') *name = '_'; } From 4be8cefc132606b4a6e851f37f8e8c40c406c910 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Aug 2025 22:51:14 +0800 Subject: [PATCH 0635/1307] LoongArch: KVM: Make function kvm_own_lbt() robust Add the flag KVM_LARCH_LBT checking in function kvm_own_lbt(), so that it can be called safely rather than duplicated enabling again. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index d1b8c50941ca..ce478151466c 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1283,9 +1283,11 @@ int kvm_own_lbt(struct kvm_vcpu *vcpu) return -EINVAL; preempt_disable(); - set_csr_euen(CSR_EUEN_LBTEN); - _restore_lbt(&vcpu->arch.lbt); - vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) { + set_csr_euen(CSR_EUEN_LBTEN); + _restore_lbt(&vcpu->arch.lbt); + vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + } preempt_enable(); return 0; From 5c68549c81bcca70fc464e305ffeefd9af968287 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Aug 2025 22:51:15 +0800 Subject: [PATCH 0636/1307] LoongArch: KVM: Fix stack protector issue in send_ipi_data() Function kvm_io_bus_read() is called in function send_ipi_data(), buffer size of parameter *val should be at least 8 bytes. Since some emulation functions like loongarch_ipi_readl() and kvm_eiointc_read() will write the buffer *val with 8 bytes signed extension regardless parameter len. Otherwise there will be buffer overflow issue when CONFIG_STACKPROTECTOR is enabled. The bug report is shown as follows: Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: send_ipi_data+0x194/0x1a0 [kvm] CPU: 11 UID: 107 PID: 2692 Comm: CPU 0/KVM Not tainted 6.17.0-rc1+ #102 PREEMPT(full) Stack : 9000000005901568 0000000000000000 9000000003af371c 900000013c68c000 900000013c68f850 900000013c68f858 0000000000000000 900000013c68f998 900000013c68f990 900000013c68f990 900000013c68f6c0 fffffffffffdb058 fffffffffffdb0e0 900000013c68f858 911e1d4d39cf0ec2 9000000105657a00 0000000000000001 fffffffffffffffe 0000000000000578 282049464555206e 6f73676e6f6f4c20 0000000000000001 00000000086b4000 0000000000000000 0000000000000000 0000000000000000 9000000005709968 90000000058f9000 900000013c68fa68 900000013c68fab4 90000000029279f0 900000010153f940 900000010001f360 0000000000000000 9000000003af3734 000000004390000c 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d ... Call Trace: [<9000000003af3734>] show_stack+0x5c/0x180 [<9000000003aed168>] dump_stack_lvl+0x6c/0x9c [<9000000003ad0ab0>] vpanic+0x108/0x2c4 [<9000000003ad0ca8>] panic+0x3c/0x40 [<9000000004eb0a1c>] __stack_chk_fail+0x14/0x18 [] send_ipi_data+0x190/0x1a0 [kvm] [] __kvm_io_bus_write+0xa4/0xe8 [kvm] [] kvm_io_bus_write+0x54/0x90 [kvm] [] kvm_emu_iocsr+0x180/0x310 [kvm] [] kvm_handle_gspr+0x280/0x478 [kvm] [] kvm_handle_exit+0xc0/0x130 [kvm] Cc: stable@vger.kernel.org Fixes: daee2f9cae551 ("LoongArch: KVM: Add IPI read and write function") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/ipi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index e658d5b37c04..7925651d2ccf 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -99,7 +99,7 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { int i, idx, ret; - uint32_t val = 0, mask = 0; + uint64_t val = 0, mask = 0; /* * Bit 27-30 is mask for byte writing. @@ -108,7 +108,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) if ((data >> 27) & 0xf) { /* Read the old val */ idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, 4, &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); @@ -124,7 +124,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) } val |= ((uint32_t)(data >> 32) & ~mask); idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, 4, &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); From 0dfd9ea7bf80fabe11f5b775d762a5cd168cdf41 Mon Sep 17 00:00:00 2001 From: Song Gao Date: Wed, 20 Aug 2025 22:51:15 +0800 Subject: [PATCH 0637/1307] LoongArch: KVM: Use kvm_get_vcpu_by_id() instead of kvm_get_vcpu() Since using kvm_get_vcpu() may fail to retrieve the vCPU context, kvm_get_vcpu_by_id() should be used instead. Fixes: 8e3054261bc3 ("LoongArch: KVM: Add IPI user mode read and write function") Fixes: 3956a52bc05b ("LoongArch: KVM: Add EIOINTC read and write functions") Reviewed-by: Yanteng Si Signed-off-by: Song Gao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/eiointc.c | 7 ++++++- arch/loongarch/kvm/intc/ipi.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index a3a12af9ecbf..026b139dcff2 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -45,7 +45,12 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) } cpu = s->sw_coremap[irq]; - vcpu = kvm_get_vcpu(s->kvm, cpu); + vcpu = kvm_get_vcpu_by_id(s->kvm, cpu); + if (unlikely(vcpu == NULL)) { + kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); + return; + } + if (level) { /* if not enable return false */ if (!test_bit(irq, (unsigned long *)s->enable.reg_u32)) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 7925651d2ccf..5a8481dda052 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -298,7 +298,7 @@ static int kvm_ipi_regs_access(struct kvm_device *dev, cpu = (attr->attr >> 16) & 0x3ff; addr = attr->attr & 0xff; - vcpu = kvm_get_vcpu(dev->kvm, cpu); + vcpu = kvm_get_vcpu_by_id(dev->kvm, cpu); if (unlikely(vcpu == NULL)) { kvm_err("%s: invalid target cpu: %d\n", __func__, cpu); return -EINVAL; From 538c06e3964a8e94b645686cc58ccc4a06fa6330 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Aug 2025 22:51:15 +0800 Subject: [PATCH 0638/1307] LoongArch: KVM: Add address alignment check in pch_pic register access With pch_pic device, its register is based on MMIO address space, different access size 1/2/4/8 is supported. And base address should be naturally aligned with its access size, here add alignment check in its register access emulation function. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/pch_pic.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c index 6f00ffe05c54..119290bcea79 100644 --- a/arch/loongarch/kvm/intc/pch_pic.c +++ b/arch/loongarch/kvm/intc/pch_pic.c @@ -195,6 +195,11 @@ static int kvm_pch_pic_read(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: pch pic not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + /* statistics of pch pic reading */ vcpu->stat.pch_pic_read_exits++; ret = loongarch_pch_pic_read(s, addr, len, val); @@ -302,6 +307,11 @@ static int kvm_pch_pic_write(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: pch pic not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + /* statistics of pch pic writing */ vcpu->stat.pch_pic_write_exits++; ret = loongarch_pch_pic_write(s, addr, len, val); From dce1b33ed7430c7189b8cc1567498f9e6bf12731 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 24 Jun 2025 16:19:30 -0700 Subject: [PATCH 0639/1307] selftests: harness: Rename is_signed_type() to avoid collision with overflow.h Rename is_signed_type() to is_signed_var() to avoid colliding with a macro of the same name defined by tools' linux/overflow.h. This fixes warnings (and presumably potential test failures) in tests that utilize the selftests harness and happen to (indirectly) include overflow.h. In file included from tools/include/linux/bits.h:34, from tools/include/linux/bitops.h:14, from tools/include/linux/hashtable.h:13, from include/kvm_util.h:11, from x86/userspace_msr_exit_test.c:11: tools/include/linux/overflow.h:31:9: error: "is_signed_type" redefined [-Werror] 31 | #define is_signed_type(type) (((type)(-1)) < (type)1) | ^~~~~~~~~~~~~~ In file included from include/kvm_test_harness.h:11, from x86/userspace_msr_exit_test.c:9: ../kselftest_harness.h:754:9: note: this is the location of the previous definition 754 | #define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) | ^~~~~~~~~~~~~~ Use a separate definition, at least for now, as many selftests build without tools/include in their include path. Fixes: fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") Cc: Vincent Mailhol Cc: Arnaldo Carvalho de Melo Cc: Mark Brown Link: https://lore.kernel.org/r/20250624231930.583689-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kselftest_harness.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 2925e47db995..8516e8434bc4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -751,7 +751,7 @@ for (; _metadata->trigger; _metadata->trigger = \ __bail(_assert, _metadata)) -#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) +#define is_signed_var(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) #define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ @@ -759,7 +759,7 @@ __typeof__(_seen) __seen = (_seen); \ if (!(__exp _t __seen)) { \ /* Report with actual signedness to avoid weird output. */ \ - switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ + switch (is_signed_var(__exp) * 2 + is_signed_var(__seen)) { \ case 0: { \ uintmax_t __exp_print = (uintmax_t)__exp; \ uintmax_t __seen_print = (uintmax_t)__seen; \ From f41c538881eec4dcf5961a242097d447f848cda6 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Tue, 29 Jul 2025 23:03:12 +0800 Subject: [PATCH 0640/1307] dmaengine: idxd: Remove improper idxd_free The call to idxd_free() introduces a duplicate put_device() leading to a reference count underflow: refcount_t: underflow; use-after-free. WARNING: CPU: 15 PID: 4428 at lib/refcount.c:28 refcount_warn_saturate+0xbe/0x110 ... Call Trace: idxd_remove+0xe4/0x120 [idxd] pci_device_remove+0x3f/0xb0 device_release_driver_internal+0x197/0x200 driver_detach+0x48/0x90 bus_remove_driver+0x74/0xf0 pci_unregister_driver+0x2e/0xb0 idxd_exit_module+0x34/0x7a0 [idxd] __do_sys_delete_module.constprop.0+0x183/0x280 do_syscall_64+0x54/0xd70 entry_SYSCALL_64_after_hwframe+0x76/0x7e The idxd_unregister_devices() which is invoked at the very beginning of idxd_remove(), already takes care of the necessary put_device() through the following call path: idxd_unregister_devices() -> device_unregister() -> put_device() In addition, when CONFIG_DEBUG_KOBJECT_RELEASE is enabled, put_device() may trigger asynchronous cleanup via schedule_delayed_work(). If idxd_free() is called immediately after, it can result in a use-after-free. Remove the improper idxd_free() to avoid both the refcount underflow and potential memory corruption during module unload. Fixes: d5449ff1b04d ("dmaengine: idxd: Add missing idxd cleanup to fix memory leak in remove call") Signed-off-by: Yi Sun Tested-by: Shuai Xue Reviewed-by: Dave Jiang Acked-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/20250729150313.1934101-2-yi.sun@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 35bdefd3728b..487268fc527b 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -1294,7 +1294,6 @@ static void idxd_remove(struct pci_dev *pdev) idxd_cleanup(idxd); pci_iounmap(pdev, idxd->reg_base); put_device(idxd_confdev(idxd)); - idxd_free(idxd); pci_disable_device(pdev); } From b7cb9a034305d52222433fad10c3de10204f29e7 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Tue, 29 Jul 2025 23:03:13 +0800 Subject: [PATCH 0641/1307] dmaengine: idxd: Fix refcount underflow on module unload A recent refactor introduced a misplaced put_device() call, resulting in a reference count underflow during module unload. There is no need to add additional put_device() calls for idxd groups, engines, or workqueues. Although the commit claims: "Note, this also fixes the missing put_device() for idxd groups, engines, and wqs." It appears no such omission actually existed. The required cleanup is already handled by the call chain: idxd_unregister_devices() -> device_unregister() -> put_device() Extend idxd_cleanup() to handle the remaining necessary cleanup and remove idxd_cleanup_internals(), which duplicates deallocation logic for idxd, engines, groups, and workqueues. Memory management is also properly handled through the Linux device model. Fixes: a409e919ca32 ("dmaengine: idxd: Refactor remove call with idxd_cleanup() helper") Signed-off-by: Yi Sun Tested-by: Shuai Xue Reviewed-by: Dave Jiang Acked-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/20250729150313.1934101-3-yi.sun@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 487268fc527b..efe614a528fa 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -1291,7 +1291,10 @@ static void idxd_remove(struct pci_dev *pdev) device_unregister(idxd_confdev(idxd)); idxd_shutdown(pdev); idxd_device_remove_debugfs(idxd); - idxd_cleanup(idxd); + perfmon_pmu_remove(idxd); + idxd_cleanup_interrupts(idxd); + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); pci_iounmap(pdev, idxd->reg_base); put_device(idxd_confdev(idxd)); pci_disable_device(pdev); From 39aaa337449e71a41d4813be0226a722827ba606 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 11 Aug 2025 13:43:39 +0300 Subject: [PATCH 0642/1307] dmaengine: idxd: Fix double free in idxd_setup_wqs() The clean up in idxd_setup_wqs() has had a couple bugs because the error handling is a bit subtle. It's simpler to just re-write it in a cleaner way. The issues here are: 1) If "idxd->max_wqs" is <= 0 then we call put_device(conf_dev) when "conf_dev" hasn't been initialized. 2) If kzalloc_node() fails then again "conf_dev" is invalid. It's either uninitialized or it points to the "conf_dev" from the previous iteration so it leads to a double free. It's better to free partial loop iterations within the loop and then the unwinding at the end can handle whole loop iterations. I also renamed the labels to describe what the goto does and not where the goto was located. Fixes: 3fd2f4bc010c ("dmaengine: idxd: fix memory leak in error handling path of idxd_setup_wqs") Reported-by: Colin Ian King Closes: https://lore.kernel.org/all/20250811095836.1642093-1-colin.i.king@gmail.com/ Signed-off-by: Dan Carpenter Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/aJnJW3iYTDDCj9sk@stanley.mountain Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index efe614a528fa..8c4725ad1f64 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -189,27 +189,30 @@ static int idxd_setup_wqs(struct idxd_device *idxd) idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev)); if (!idxd->wq_enable_map) { rc = -ENOMEM; - goto err_bitmap; + goto err_free_wqs; } for (i = 0; i < idxd->max_wqs; i++) { wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev)); if (!wq) { rc = -ENOMEM; - goto err; + goto err_unwind; } idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ); conf_dev = wq_confdev(wq); wq->id = i; wq->idxd = idxd; - device_initialize(wq_confdev(wq)); + device_initialize(conf_dev); conf_dev->parent = idxd_confdev(idxd); conf_dev->bus = &dsa_bus_type; conf_dev->type = &idxd_wq_device_type; rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id); - if (rc < 0) - goto err; + if (rc < 0) { + put_device(conf_dev); + kfree(wq); + goto err_unwind; + } mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); @@ -220,15 +223,20 @@ static int idxd_setup_wqs(struct idxd_device *idxd) wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { + put_device(conf_dev); + kfree(wq); rc = -ENOMEM; - goto err; + goto err_unwind; } if (idxd->hw.wq_cap.op_config) { wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); if (!wq->opcap_bmap) { + kfree(wq->wqcfg); + put_device(conf_dev); + kfree(wq); rc = -ENOMEM; - goto err_opcap_bmap; + goto err_unwind; } bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } @@ -239,13 +247,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) return 0; -err_opcap_bmap: - kfree(wq->wqcfg); - -err: - put_device(conf_dev); - kfree(wq); - +err_unwind: while (--i >= 0) { wq = idxd->wqs[i]; if (idxd->hw.wq_cap.op_config) @@ -254,11 +256,10 @@ static int idxd_setup_wqs(struct idxd_device *idxd) conf_dev = wq_confdev(wq); put_device(conf_dev); kfree(wq); - } bitmap_free(idxd->wq_enable_map); -err_bitmap: +err_free_wqs: kfree(idxd->wqs); return rc; From a82231b2a8712d0218fc286a9b0da328d419a3f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 14 Aug 2025 12:39:39 +0200 Subject: [PATCH 0643/1307] HID: input: rename hidinput_set_battery_charge_status() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for a patch fixing a bug affecting hidinput_set_battery_charge_status(), rename the function to hidinput_update_battery_charge_status() and move it up so it can be used by hidinput_update_battery(). Refactor, no functional changes. Tested-by: 卢国宏 Signed-off-by: José Expósito Signed-off-by: Jiri Kosina --- drivers/hid/hid-input-test.c | 10 ++++----- drivers/hid/hid-input.c | 42 ++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/hid/hid-input-test.c b/drivers/hid/hid-input-test.c index 77c2d45ac62a..6f5c71660d82 100644 --- a/drivers/hid/hid-input-test.c +++ b/drivers/hid/hid-input-test.c @@ -7,7 +7,7 @@ #include -static void hid_test_input_set_battery_charge_status(struct kunit *test) +static void hid_test_input_update_battery_charge_status(struct kunit *test) { struct hid_device *dev; bool handled; @@ -15,15 +15,15 @@ static void hid_test_input_set_battery_charge_status(struct kunit *test) dev = kunit_kzalloc(test, sizeof(*dev), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); - handled = hidinput_set_battery_charge_status(dev, HID_DG_HEIGHT, 0); + handled = hidinput_update_battery_charge_status(dev, HID_DG_HEIGHT, 0); KUNIT_EXPECT_FALSE(test, handled); KUNIT_EXPECT_EQ(test, dev->battery_charge_status, POWER_SUPPLY_STATUS_UNKNOWN); - handled = hidinput_set_battery_charge_status(dev, HID_BAT_CHARGING, 0); + handled = hidinput_update_battery_charge_status(dev, HID_BAT_CHARGING, 0); KUNIT_EXPECT_TRUE(test, handled); KUNIT_EXPECT_EQ(test, dev->battery_charge_status, POWER_SUPPLY_STATUS_DISCHARGING); - handled = hidinput_set_battery_charge_status(dev, HID_BAT_CHARGING, 1); + handled = hidinput_update_battery_charge_status(dev, HID_BAT_CHARGING, 1); KUNIT_EXPECT_TRUE(test, handled); KUNIT_EXPECT_EQ(test, dev->battery_charge_status, POWER_SUPPLY_STATUS_CHARGING); } @@ -63,7 +63,7 @@ static void hid_test_input_get_battery_property(struct kunit *test) } static struct kunit_case hid_input_tests[] = { - KUNIT_CASE(hid_test_input_set_battery_charge_status), + KUNIT_CASE(hid_test_input_update_battery_charge_status), KUNIT_CASE(hid_test_input_get_battery_property), { } }; diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index ff1784b5c2a4..262787e6eb20 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -595,6 +595,20 @@ static void hidinput_cleanup_battery(struct hid_device *dev) dev->battery = NULL; } +static bool hidinput_update_battery_charge_status(struct hid_device *dev, + unsigned int usage, int value) +{ + switch (usage) { + case HID_BAT_CHARGING: + dev->battery_charge_status = value ? + POWER_SUPPLY_STATUS_CHARGING : + POWER_SUPPLY_STATUS_DISCHARGING; + return true; + } + + return false; +} + static void hidinput_update_battery(struct hid_device *dev, int value) { int capacity; @@ -617,20 +631,6 @@ static void hidinput_update_battery(struct hid_device *dev, int value) power_supply_changed(dev->battery); } } - -static bool hidinput_set_battery_charge_status(struct hid_device *dev, - unsigned int usage, int value) -{ - switch (usage) { - case HID_BAT_CHARGING: - dev->battery_charge_status = value ? - POWER_SUPPLY_STATUS_CHARGING : - POWER_SUPPLY_STATUS_DISCHARGING; - return true; - } - - return false; -} #else /* !CONFIG_HID_BATTERY_STRENGTH */ static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type, struct hid_field *field, bool is_percentage) @@ -642,15 +642,15 @@ static void hidinput_cleanup_battery(struct hid_device *dev) { } -static void hidinput_update_battery(struct hid_device *dev, int value) -{ -} - -static bool hidinput_set_battery_charge_status(struct hid_device *dev, - unsigned int usage, int value) +static bool hidinput_update_battery_charge_status(struct hid_device *dev, + unsigned int usage, int value) { return false; } + +static void hidinput_update_battery(struct hid_device *dev, int value) +{ +} #endif /* CONFIG_HID_BATTERY_STRENGTH */ static bool hidinput_field_in_collection(struct hid_device *device, struct hid_field *field, @@ -1515,7 +1515,7 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct return; if (usage->type == EV_PWR) { - bool handled = hidinput_set_battery_charge_status(hid, usage->hid, value); + bool handled = hidinput_update_battery_charge_status(hid, usage->hid, value); if (!handled) hidinput_update_battery(hid, value); From e94536e1d1818b0989aa19b443b7089f50133c35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 14 Aug 2025 12:39:40 +0200 Subject: [PATCH 0644/1307] HID: input: report battery status changes immediately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the battery status (charging/discharging) was not reported immediately to user-space.  For most input devices, this wasn't problematic because changing their battery status requires connecting them to a different bus. For example, a gamepad would report a discharging status while connected via Bluetooth and a charging status while connected via USB. However, certain devices are not connected or disconnected when their battery status changes. For example, a phone battery changes its status without connecting or disconnecting it. In these cases, the battery status was not reported immediately to user space. Report battery status changes immediately to user space to support these kinds of devices. Fixes: a608dc1c0639 ("HID: input: map battery system charging") Reported-by: 卢国宏 Closes: https://lore.kernel.org/linux-input/aI49Im0sGb6fpgc8@fedora/T/ Tested-by: 卢国宏 Signed-off-by: José Expósito Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 262787e6eb20..f45f856a127f 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -609,13 +609,19 @@ static bool hidinput_update_battery_charge_status(struct hid_device *dev, return false; } -static void hidinput_update_battery(struct hid_device *dev, int value) +static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, + int value) { int capacity; if (!dev->battery) return; + if (hidinput_update_battery_charge_status(dev, usage, value)) { + power_supply_changed(dev->battery); + return; + } + if (value == 0 || value < dev->battery_min || value > dev->battery_max) return; @@ -642,13 +648,8 @@ static void hidinput_cleanup_battery(struct hid_device *dev) { } -static bool hidinput_update_battery_charge_status(struct hid_device *dev, - unsigned int usage, int value) -{ - return false; -} - -static void hidinput_update_battery(struct hid_device *dev, int value) +static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, + int value) { } #endif /* CONFIG_HID_BATTERY_STRENGTH */ @@ -1515,11 +1516,7 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct return; if (usage->type == EV_PWR) { - bool handled = hidinput_update_battery_charge_status(hid, usage->hid, value); - - if (!handled) - hidinput_update_battery(hid, value); - + hidinput_update_battery(hid, usage->hid, value); return; } From ab1bb82f3db20e23eace06db52031b1164a110c2 Mon Sep 17 00:00:00 2001 From: Matt Coffin Date: Wed, 20 Aug 2025 01:49:51 -0600 Subject: [PATCH 0645/1307] HID: logitech: Add ids for G PRO 2 LIGHTSPEED Adds support for the G PRO 2 LIGHTSPEED Wireless via it's nano receiver or directly. This nano receiver appears to work identically to the 1_1 receiver for the case I've verified, which is the battery status through lg-hidpp. The same appears to be the case wired, sharing much with the Pro X Superlight 2; differences seemed to lie in userland configuration rather than in interfaces used by hid_logitech_hidpp on the kernel side. I verified the sysfs interface for battery charge/discharge status, and capacity read to be working on my 910-007290 device (white). Signed-off-by: Matt Coffin Reviewed-by: Bastien Nocera Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-logitech-dj.c | 4 ++++ drivers/hid/hid-logitech-hidpp.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5a1096283855..37dc42380373 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -907,6 +907,7 @@ #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2 0xc534 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1 0xc539 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1 0xc53f +#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2 0xc543 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY 0xc53a #define USB_DEVICE_ID_LOGITECH_BOLT_RECEIVER 0xc548 #define USB_DEVICE_ID_SPACETRAVELLER 0xc623 diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 34fa71ceec2b..cce54dd9884a 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1983,6 +1983,10 @@ static const struct hid_device_id logi_dj_receivers[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1), .driver_data = recvr_type_gaming_hidpp}, + { /* Logitech lightspeed receiver (0xc543) */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, + USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2), + .driver_data = recvr_type_gaming_hidpp}, { /* Logitech 27 MHz HID++ 1.0 receiver (0xc513) */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER), diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 10a3bc5f931b..aaef405a717e 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4596,6 +4596,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) }, { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) }, + { /* Logitech G PRO 2 LIGHTSPEED Wireless Mouse over USB */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xc09a) }, { /* G935 Gaming Headset */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87), From 729dc340a4ed1267774fc8518284e976e2210bdc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 17 Aug 2025 16:21:46 +0200 Subject: [PATCH 0646/1307] bootconfig: Fix negative seeks on 32-bit with LFS enabled Commit 26dda5769509 "tools/bootconfig: Cleanup bootconfig footer size calculations" replaced some expressions of type int with the BOOTCONFIG_FOOTER_SIZE macro, which expands to an expression of type size_t, which is unsigned. On 32-bit architectures with LFS enabled (i.e. off_t is 64-bit), the seek offset of -BOOTCONFIG_FOOTER_SIZE now turns into a positive value. Fix this by casting the size to off_t before negating it. Just in case someone changes BOOTCONFIG_MAGIC_LEN to have type size_t later, do the same thing to the seek offset of -BOOTCONFIG_MAGIC_LEN. Link: https://lore.kernel.org/all/aKHlevxeg6Y7UQrz@decadent.org.uk/ Fixes: 26dda5769509 ("tools/bootconfig: Cleanup bootconfig footer size calculations") Signed-off-by: Ben Hutchings Signed-off-by: Masami Hiramatsu (Google) --- tools/bootconfig/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 57c669d2aa90..55d59ed507d5 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -193,7 +193,7 @@ static int load_xbc_from_initrd(int fd, char **buf) if (stat.st_size < BOOTCONFIG_FOOTER_SIZE) return 0; - if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) + if (lseek(fd, -(off_t)BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) return pr_errno("Failed to lseek for magic", -errno); if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0) @@ -203,7 +203,7 @@ static int load_xbc_from_initrd(int fd, char **buf) if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) return 0; - if (lseek(fd, -BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0) + if (lseek(fd, -(off_t)BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0) return pr_errno("Failed to lseek for size", -errno); if (read(fd, &size, sizeof(uint32_t)) < 0) From c81f6ce16785cc07ae81f53deb07b662ed0bb3a5 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Mon, 18 Aug 2025 08:22:21 -0700 Subject: [PATCH 0647/1307] of: dynamic: Fix memleak when of_pci_add_properties() failed When of_pci_add_properties() failed, of_changeset_destroy() is called to free the changeset. And of_changeset_destroy() puts device tree node in each entry but does not free property in the entry. This leads to memory leak in the failure case. In of_changeset_add_prop_helper(), add the property to the device tree node deadprops list. Thus, the property will also be freed along with device tree node. Fixes: b544fc2b8606 ("of: dynamic: Add interfaces for creating device node dynamically") Reported-by: Lorenzo Pieralisi Closes: https://lore.kernel.org/all/aJms+YT8TnpzpCY8@lpieralisi/ Tested-by: Lorenzo Pieralisi Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20250818152221.3685724-1-lizhi.hou@amd.com Signed-off-by: Rob Herring (Arm) --- drivers/of/dynamic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 0aba760f7577..dd30b7d8b5e4 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -938,6 +938,9 @@ static int of_changeset_add_prop_helper(struct of_changeset *ocs, if (ret) __of_prop_free(new_pp); + new_pp->next = np->deadprops; + np->deadprops = new_pp; + return ret; } From 4d4d9ef9dfee877d494e5418f68a1016ef08cad6 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Tue, 19 Aug 2025 15:19:57 -0700 Subject: [PATCH 0648/1307] ixgbe: xsk: resolve the negative overflow of budget in ixgbe_xmit_zc Resolve the budget negative overflow which leads to returning true in ixgbe_xmit_zc even when the budget of descs are thoroughly consumed. Before this patch, when the budget is decreased to zero and finishes sending the last allowed desc in ixgbe_xmit_zc, it will always turn back and enter into the while() statement to see if it should keep processing packets, but in the meantime it unexpectedly decreases the value again to 'unsigned int (0--)', namely, UINT_MAX. Finally, the ixgbe_xmit_zc returns true, showing 'we complete cleaning the budget'. That also means 'clean_complete = true' in ixgbe_poll. The true theory behind this is if that budget number of descs are consumed, it implies that we might have more descs to be done. So we should return false in ixgbe_xmit_zc to tell napi poll to find another chance to start polling to handle the rest of descs. On the contrary, returning true here means job done and we know we finish all the possible descs this time and we don't intend to start a new napi poll. It is apparently against our expectations. Please also see how ixgbe_clean_tx_irq() handles the problem: it uses do..while() statement to make sure the budget can be decreased to zero at most and the negative overflow never happens. The patch adds 'likely' because we rarely would not hit the loop condition since the standard budget is 256. Fixes: 8221c5eba8c1 ("ixgbe: add AF_XDP zero-copy Tx support") Signed-off-by: Jason Xing Reviewed-by: Larysa Zaremba Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Priya Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250819222000.3504873-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index ac58964b2f08..7b941505a9d0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -398,7 +398,7 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) dma_addr_t dma; u32 cmd_type; - while (budget-- > 0) { + while (likely(budget)) { if (unlikely(!ixgbe_desc_unused(xdp_ring))) { work_done = false; break; @@ -433,6 +433,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) xdp_ring->next_to_use++; if (xdp_ring->next_to_use == xdp_ring->count) xdp_ring->next_to_use = 0; + + budget--; } if (tx_desc) { From f3d9f7fa7f5dbfd4fdb1e69c25fc5627700d19dd Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 19 Aug 2025 15:19:58 -0700 Subject: [PATCH 0649/1307] ixgbe: fix ndo_xdp_xmit() workloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ixgbe driver checks periodically in its watchdog subtask if there is anything to be transmitted (considering both Tx and XDP rings) under state of carrier not being 'ok'. Such event is interpreted as Tx hang and therefore results in interface reset. This is currently problematic for ndo_xdp_xmit() as it is allowed to produce descriptors when interface is going through reset or its carrier is turned off. Furthermore, XDP rings should not really be objects of Tx hang detection. This mechanism is rather a matter of ndo_tx_timeout() being called from dev_watchdog against Tx rings exposed to networking stack. Taking into account issues described above, let us have a two fold fix - do not respect XDP rings in local ixgbe watchdog and do not produce Tx descriptors in ndo_xdp_xmit callback when there is some problem with carrier currently. For now, keep the Tx hang checks in clean Tx irq routine, but adjust it to not execute for XDP rings. Cc: Tobias Böhm Reported-by: Marcus Wichelmann Closes: https://lore.kernel.org/netdev/eca1880f-253a-4955-afe6-732d7c6926ee@hetzner-cloud.de/ Fixes: 6453073987ba ("ixgbe: add initial support for xdp redirect") Fixes: 33fdc82f0883 ("ixgbe: add support for XDP_TX action") Reviewed-by: Aleksandr Loktionov Tested-by: Marcus Wichelmann Signed-off-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250819222000.3504873-5-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6122a0abb41f..80e6a2ef1350 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -968,10 +968,6 @@ static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) clear_bit(__IXGBE_HANG_CHECK_ARMED, &adapter->tx_ring[i]->state); - - for (i = 0; i < adapter->num_xdp_queues; i++) - clear_bit(__IXGBE_HANG_CHECK_ARMED, - &adapter->xdp_ring[i]->state); } static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) @@ -1214,7 +1210,7 @@ static void ixgbe_pf_handle_tx_hang(struct ixgbe_ring *tx_ring, struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev); struct ixgbe_hw *hw = &adapter->hw; - e_err(drv, "Detected Tx Unit Hang%s\n" + e_err(drv, "Detected Tx Unit Hang\n" " Tx Queue <%d>\n" " TDH, TDT <%x>, <%x>\n" " next_to_use <%x>\n" @@ -1222,16 +1218,14 @@ static void ixgbe_pf_handle_tx_hang(struct ixgbe_ring *tx_ring, "tx_buffer_info[next_to_clean]\n" " time_stamp <%lx>\n" " jiffies <%lx>\n", - ring_is_xdp(tx_ring) ? " (XDP)" : "", tx_ring->queue_index, IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)), IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)), tx_ring->next_to_use, next, tx_ring->tx_buffer_info[next].time_stamp, jiffies); - if (!ring_is_xdp(tx_ring)) - netif_stop_subqueue(tx_ring->netdev, - tx_ring->queue_index); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); } /** @@ -1451,6 +1445,9 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_bytes); adapter->tx_ipsec += total_ipsec; + if (ring_is_xdp(tx_ring)) + return !!budget; + if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { if (adapter->hw.mac.type == ixgbe_mac_e610) ixgbe_handle_mdd_event(adapter, tx_ring); @@ -1468,9 +1465,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, return true; } - if (ring_is_xdp(tx_ring)) - return !!budget; - #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); if (!__netif_txq_completed_wake(txq, total_packets, total_bytes, @@ -7974,12 +7968,9 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter) return; /* Force detection of hung controller */ - if (netif_carrier_ok(adapter->netdev)) { + if (netif_carrier_ok(adapter->netdev)) for (i = 0; i < adapter->num_tx_queues; i++) set_check_for_tx_hang(adapter->tx_ring[i]); - for (i = 0; i < adapter->num_xdp_queues; i++) - set_check_for_tx_hang(adapter->xdp_ring[i]); - } if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { /* @@ -8199,13 +8190,6 @@ static bool ixgbe_ring_tx_pending(struct ixgbe_adapter *adapter) return true; } - for (i = 0; i < adapter->num_xdp_queues; i++) { - struct ixgbe_ring *ring = adapter->xdp_ring[i]; - - if (ring->next_to_use != ring->next_to_clean) - return true; - } - return false; } @@ -11005,6 +10989,10 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) return -ENETDOWN; + if (!netif_carrier_ok(adapter->netdev) || + !netif_running(adapter->netdev)) + return -ENETDOWN; + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; From 1468c1f97cf32418e34dbb40b784ed9333b9e123 Mon Sep 17 00:00:00 2001 From: ValdikSS Date: Tue, 19 Aug 2025 15:19:59 -0700 Subject: [PATCH 0650/1307] igc: fix disabling L1.2 PCI-E link substate on I226 on init Device ID comparison in igc_is_device_id_i226 is performed before the ID is set, resulting in always failing check on init. Before the patch: * L1.2 is not disabled on init * L1.2 is properly disabled after suspend-resume cycle With the patch: * L1.2 is properly disabled both on init and after suspend-resume How to test: Connect to the 1G link with 300+ mbit/s Internet speed, and run the download speed test, such as: curl -o /dev/null http://speedtest.selectel.ru/1GB Without L1.2 disabled, the speed would be no more than ~200 mbit/s. With L1.2 disabled, the speed would reach 1 gbit/s. Note: it's required that the latency between your host and the remote be around 3-5 ms, the test inside LAN (<1 ms latency) won't trigger the issue. Link: https://lore.kernel.org/intel-wired-lan/15248b4f-3271-42dd-8e35-02bfc92b25e1@intel.com Fixes: 0325143b59c6 ("igc: disable L1.2 PCI-E link substate to avoid performance issue") Signed-off-by: ValdikSS Reviewed-by: Vitaly Lifshits Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250819222000.3504873-6-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 458e5eaa92e5..e79b14d50b24 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7149,6 +7149,13 @@ static int igc_probe(struct pci_dev *pdev, adapter->port_num = hw->bus.func; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + /* PCI config space info */ + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + hw->revision_id = pdev->revision; + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ if (igc_is_device_id_i226(hw)) pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); @@ -7175,13 +7182,6 @@ static int igc_probe(struct pci_dev *pdev, netdev->mem_start = pci_resource_start(pdev, 0); netdev->mem_end = pci_resource_end(pdev, 0); - /* PCI config space info */ - hw->vendor_id = pdev->vendor; - hw->device_id = pdev->device; - hw->revision_id = pdev->revision; - hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; - /* Copy the default MAC and PHY function pointers */ memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); From e318cd6714592fb762fcab59c5684a442243a12f Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Mon, 18 Aug 2025 18:04:57 -0700 Subject: [PATCH 0651/1307] net: dsa: microchip: Fix KSZ9477 HSR port setup issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ksz9477_hsr_join() is called once to setup the HSR port membership, but the port can be enabled later, or disabled and enabled back and the port membership is not set correctly inside ksz_update_port_member(). The added code always use the correct HSR port membership for HSR port that is enabled. Fixes: 2d61298fdd7b ("net: dsa: microchip: Enable HSR offloading for KSZ9477") Reported-by: Frieder Schrempf Signed-off-by: Tristram Ha Reviewed-by: Łukasz Majewski Tested-by: Frieder Schrempf Reviewed-by: Frieder Schrempf Link: https://patch.msgid.link/20250819010457.563286-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 4cb14288ff0f..9568cc391fe3 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2457,6 +2457,12 @@ static void ksz_update_port_member(struct ksz_device *dev, int port) dev->dev_ops->cfg_port_member(dev, i, val | cpu_port); } + /* HSR ports are setup once so need to use the assigned membership + * when the port is enabled. + */ + if (!port_member && p->stp_state == BR_STATE_FORWARDING && + (dev->hsr_ports & BIT(port))) + port_member = dev->hsr_ports; dev->dev_ops->cfg_port_member(dev, port, port_member | cpu_port); } From 15de71d06a400f7fdc15bf377a2552b0ec437cf5 Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 19 Aug 2025 03:36:28 +0000 Subject: [PATCH 0652/1307] net/sched: Make cake_enqueue return NET_XMIT_CN when past buffer_limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following setup can trigger a WARNING in htb_activate due to the condition: !cl->leaf.q->q.qlen tc qdisc del dev lo root tc qdisc add dev lo root handle 1: htb default 1 tc class add dev lo parent 1: classid 1:1 \ htb rate 64bit tc qdisc add dev lo parent 1:1 handle f: \ cake memlimit 1b ping -I lo -f -c1 -s64 -W0.001 127.0.0.1 This is because the low memlimit leads to a low buffer_limit, which causes packet dropping. However, cake_enqueue still returns NET_XMIT_SUCCESS, causing htb_enqueue to call htb_activate with an empty child qdisc. We should return NET_XMIT_CN when packets are dropped from the same tin and flow. I do not believe return value of NET_XMIT_CN is necessary for packet drops in the case of ack filtering, as that is meant to optimize performance, not to signal congestion. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Acked-by: Toke Høiland-Jørgensen Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20250819033601.579821-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index dbcfb948c867..32bacfc314c2 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1750,7 +1750,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, ktime_t now = ktime_get(); struct cake_tin_data *b; struct cake_flow *flow; - u32 idx; + u32 idx, tin; /* choose flow to insert into */ idx = cake_classify(sch, &b, skb, q->flow_mode, &ret); @@ -1760,6 +1760,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, __qdisc_drop(skb, to_free); return ret; } + tin = (u32)(b - q->tins); idx--; flow = &b->flows[idx]; @@ -1927,13 +1928,22 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->buffer_max_used = q->buffer_used; if (q->buffer_used > q->buffer_limit) { + bool same_flow = false; u32 dropped = 0; + u32 drop_id; while (q->buffer_used > q->buffer_limit) { dropped++; - cake_drop(sch, to_free); + drop_id = cake_drop(sch, to_free); + + if ((drop_id >> 16) == tin && + (drop_id & 0xFFFF) == idx) + same_flow = true; } b->drop_overlimit += dropped; + + if (same_flow) + return NET_XMIT_CN; } return NET_XMIT_SUCCESS; } From 2c2192e5f9c7c2892fe2363244d1387f62710d83 Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 19 Aug 2025 03:36:59 +0000 Subject: [PATCH 0653/1307] net/sched: Remove unnecessary WARNING condition for empty child qdisc in htb_activate The WARN_ON trigger based on !cl->leaf.q->q.qlen is unnecessary in htb_activate. htb_dequeue_tree already accounts for that scenario. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250819033632.579854-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- net/sched/sch_htb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c968ea763774..b5e40c51655a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -592,7 +592,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) */ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen); + WARN_ON(cl->level || !cl->leaf.q); if (!cl->prio_activity) { cl->prio_activity = 1 << cl->prio; From 7af76e9d18a9fd6f8611b3313c86c190f9b6a5a7 Mon Sep 17 00:00:00 2001 From: Jakub Acs Date: Tue, 19 Aug 2025 08:28:42 +0000 Subject: [PATCH 0654/1307] net, hsr: reject HSR frame if skb can't hold tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Receiving HSR frame with insufficient space to hold HSR tag in the skb can result in a crash (kernel BUG): [ 45.390915] skbuff: skb_under_panic: text:ffffffff86f32cac len:26 put:14 head:ffff888042418000 data:ffff888042417ff4 tail:0xe end:0x180 dev:bridge_slave_1 [ 45.392559] ------------[ cut here ]------------ [ 45.392912] kernel BUG at net/core/skbuff.c:211! [ 45.393276] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI [ 45.393809] CPU: 1 UID: 0 PID: 2496 Comm: reproducer Not tainted 6.15.0 #12 PREEMPT(undef) [ 45.394433] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 45.395273] RIP: 0010:skb_panic+0x15b/0x1d0 [ 45.402911] Call Trace: [ 45.403105] [ 45.404470] skb_push+0xcd/0xf0 [ 45.404726] br_dev_queue_push_xmit+0x7c/0x6c0 [ 45.406513] br_forward_finish+0x128/0x260 [ 45.408483] __br_forward+0x42d/0x590 [ 45.409464] maybe_deliver+0x2eb/0x420 [ 45.409763] br_flood+0x174/0x4a0 [ 45.410030] br_handle_frame_finish+0xc7c/0x1bc0 [ 45.411618] br_handle_frame+0xac3/0x1230 [ 45.413674] __netif_receive_skb_core.constprop.0+0x808/0x3df0 [ 45.422966] __netif_receive_skb_one_core+0xb4/0x1f0 [ 45.424478] __netif_receive_skb+0x22/0x170 [ 45.424806] process_backlog+0x242/0x6d0 [ 45.425116] __napi_poll+0xbb/0x630 [ 45.425394] net_rx_action+0x4d1/0xcc0 [ 45.427613] handle_softirqs+0x1a4/0x580 [ 45.427926] do_softirq+0x74/0x90 [ 45.428196] This issue was found by syzkaller. The panic happens in br_dev_queue_push_xmit() once it receives a corrupted skb with ETH header already pushed in linear data. When it attempts the skb_push() call, there's not enough headroom and skb_push() panics. The corrupted skb is put on the queue by HSR layer, which makes a sequence of unintended transformations when it receives a specific corrupted HSR frame (with incomplete TAG). Fix it by dropping and consuming frames that are not long enough to contain both ethernet and hsr headers. Alternative fix would be to check for enough headroom before skb_push() in br_dev_queue_push_xmit(). In the reproducer, this is injected via AF_PACKET, but I don't easily see why it couldn't be sent over the wire from adjacent network. Further Details: In the reproducer, the following network interface chain is set up: ┌────────────────┐ ┌────────────────┐ │ veth0_to_hsr ├───┤ hsr_slave0 ┼───┐ └────────────────┘ └────────────────┘ │ │ ┌──────┐ ├─┤ hsr0 ├───┐ │ └──────┘ │ ┌────────────────┐ ┌────────────────┐ │ │┌────────┐ │ veth1_to_hsr ┼───┤ hsr_slave1 ├───┘ └┤ │ └────────────────┘ └────────────────┘ ┌┼ bridge │ ││ │ │└────────┘ │ ┌───────┐ │ │ ... ├──────┘ └───────┘ To trigger the events leading up to crash, reproducer sends a corrupted HSR frame with incomplete TAG, via AF_PACKET socket on 'veth0_to_hsr'. The first HSR-layer function to process this frame is hsr_handle_frame(). It and then checks if the protocol is ETH_P_PRP or ETH_P_HSR. If it is, it calls skb_set_network_header(skb, ETH_HLEN + HSR_HLEN), without checking that the skb is long enough. For the crashing frame it is not, and hence the skb->network_header and skb->mac_len fields are set incorrectly, pointing after the end of the linear buffer. I will call this a BUG#1 and it is what is addressed by this patch. In the crashing scenario before the fix, the skb continues to go down the hsr path as follows. hsr_handle_frame() then calls this sequence hsr_forward_skb() fill_frame_info() hsr->proto_ops->fill_frame_info() hsr_fill_frame_info() hsr_fill_frame_info() contains a check that intends to check whether the skb actually contains the HSR header. But the check relies on the skb->mac_len field which was erroneously setup due to BUG#1, so the check passes and the execution continues back in the hsr_forward_skb(): hsr_forward_skb() hsr_forward_do() hsr->proto_ops->get_untagged_frame() hsr_get_untagged_frame() create_stripped_skb_hsr() In create_stripped_skb_hsr(), a copy of the skb is created and is further corrupted by operation that attempts to strip the HSR tag in a call to __pskb_copy(). The skb enters create_stripped_skb_hsr() with ethernet header pushed in linear buffer. The skb_pull(skb_in, HSR_HLEN) thus pulls 6 bytes of ethernet header into the headroom, creating skb_in with a headroom of size 8. The subsequent __pskb_copy() then creates an skb with headroom of just 2 and skb->len of just 12, this is how it looks after the copy: gdb) p skb->len $10 = 12 (gdb) p skb->data $11 = (unsigned char *) 0xffff888041e45382 "\252\252\252\252\252!\210\373", (gdb) p skb->head $12 = (unsigned char *) 0xffff888041e45380 "" It seems create_stripped_skb_hsr() assumes that ETH header is pulled in the headroom when it's entered, because it just pulls HSR header on top. But that is not the case in our code-path and we end up with the corrupted skb instead. I will call this BUG#2 *I got confused here because it seems that under no conditions can create_stripped_skb_hsr() work well, the assumption it makes is not true during the processing of hsr frames - since the skb_push() in hsr_handle_frame to skb_pull in hsr_deliver_master(). I wonder whether I missed something here.* Next, the execution arrives in hsr_deliver_master(). It calls skb_pull(ETH_HLEN), which just returns NULL - the SKB does not have enough space for the pull (as it only has 12 bytes in total at this point). *The skb_pull() here further suggests that ethernet header is meant to be pushed through the whole hsr processing and create_stripped_skb_hsr() should pull it before doing the HSR header pull.* hsr_deliver_master() then puts the corrupted skb on the queue, it is then picked up from there by bridge frame handling layer and finally lands in br_dev_queue_push_xmit where it panics. Cc: stable@kernel.org Fixes: 48b491a5cc74 ("net: hsr: fix mac_len checks") Reported-by: syzbot+a81f2759d022496b40ab@syzkaller.appspotmail.com Signed-off-by: Jakub Acs Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250819082842.94378-1-acsjakub@amazon.de Signed-off-by: Jakub Kicinski --- net/hsr/hsr_slave.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index b87b6a6fe070..102eccf5ead7 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -63,8 +63,14 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); if ((!hsr->prot_version && protocol == htons(ETH_P_PRP)) || - protocol == htons(ETH_P_HSR)) + protocol == htons(ETH_P_HSR)) { + if (!pskb_may_pull(skb, ETH_HLEN + HSR_HLEN)) { + kfree_skb(skb); + goto finish_consume; + } + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); + } skb_reset_mac_len(skb); /* Only the frames received over the interlink port will assign a From a458b2902115b26a25d67393b12ddd57d1216aaa Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Aug 2025 13:27:24 -0700 Subject: [PATCH 0655/1307] ipv6: sr: Fix MAC comparison to be constant-time To prevent timing attacks, MACs need to be compared in constant time. Use the appropriate helper function for this. Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Reviewed-by: Andrea Mayer Link: https://patch.msgid.link/20250818202724.15713-1-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_hmac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index d77b52523b6a..fd58426f222b 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -280,7 +281,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) return false; - if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + if (crypto_memneq(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN)) return false; return true; From c42be534547d6e45c155c347dd792b6ad9c24def Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Tue, 19 Aug 2025 09:32:30 -0700 Subject: [PATCH 0656/1307] Revert "net: cadence: macb: sama7g5_emac: Remove USARIO CLKEN flag" This reverts commit db400061b5e7cc55f9b4dd15443e9838964119ea. This commit can cause a Devicetree ABI break for older DTS files that rely this flag for RMII configuration. Adding this back in ensures that the older DTBs will not break. Fixes: db400061b5e7 ("net: cadence: macb: sama7g5_emac: Remove USARIO CLKEN flag") Signed-off-by: Ryan Wanner Link: https://patch.msgid.link/20250819163236.100680-1-Ryan.Wanner@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ce95fad8cedd..9693f0289435 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5113,7 +5113,8 @@ static const struct macb_config sama7g5_gem_config = { static const struct macb_config sama7g5_emac_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | - MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP, + MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII | + MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, From 5003a65790ed66be882d1987cc2ca86af0de3db1 Mon Sep 17 00:00:00 2001 From: Dewei Meng Date: Thu, 21 Aug 2025 09:43:17 +0800 Subject: [PATCH 0657/1307] ALSA: timer: fix ida_free call while not allocated In the snd_utimer_create() function, if the kasprintf() function return NULL, snd_utimer_put_id() will be called, finally use ida_free() to free the unallocated id 0. the syzkaller reported the following information: ------------[ cut here ]------------ ida_free called for id=0 which is not allocated. WARNING: CPU: 1 PID: 1286 at lib/idr.c:592 ida_free+0x1fd/0x2f0 lib/idr.c:592 Modules linked in: CPU: 1 UID: 0 PID: 1286 Comm: syz-executor164 Not tainted 6.15.8 #3 PREEMPT(lazy) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.fc42 04/01/2014 RIP: 0010:ida_free+0x1fd/0x2f0 lib/idr.c:592 Code: f8 fc 41 83 fc 3e 76 69 e8 70 b2 f8 (...) RSP: 0018:ffffc900007f79c8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 1ffff920000fef3b RCX: ffffffff872176a5 RDX: ffff88800369d200 RSI: 0000000000000000 RDI: ffff88800369d200 RBP: 0000000000000000 R08: ffffffff87ba60a5 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000002 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f6f1abc1740(0000) GS:ffff8880d76a0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6f1ad7a784 CR3: 000000007a6e2000 CR4: 00000000000006f0 Call Trace: snd_utimer_put_id sound/core/timer.c:2043 [inline] [snd_timer] snd_utimer_create+0x59b/0x6a0 sound/core/timer.c:2184 [snd_timer] snd_utimer_ioctl_create sound/core/timer.c:2202 [inline] [snd_timer] __snd_timer_user_ioctl.isra.0+0x724/0x1340 sound/core/timer.c:2287 [snd_timer] snd_timer_user_ioctl+0x75/0xc0 sound/core/timer.c:2298 [snd_timer] vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl fs/ioctl.c:893 [inline] __x64_sys_ioctl+0x198/0x200 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x7b/0x160 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] The utimer->id should be set properly before the kasprintf() function, ensures the snd_utimer_put_id() function will free the allocated id. Fixes: 37745918e0e75 ("ALSA: timer: Introduce virtual userspace-driven timers") Signed-off-by: Dewei Meng Link: https://patch.msgid.link/20250821014317.40786-1-mengdewei@cqsoftware.com.cn Signed-off-by: Takashi Iwai --- sound/core/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 3ce12264eed8..d9fff5c87613 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -2139,14 +2139,14 @@ static int snd_utimer_create(struct snd_timer_uinfo *utimer_info, goto err_take_id; } + utimer->id = utimer_id; + utimer->name = kasprintf(GFP_KERNEL, "snd-utimer%d", utimer_id); if (!utimer->name) { err = -ENOMEM; goto err_get_name; } - utimer->id = utimer_id; - tid.dev_sclass = SNDRV_TIMER_SCLASS_APPLICATION; tid.dev_class = SNDRV_TIMER_CLASS_GLOBAL; tid.card = -1; From b64d035f77b1f02ab449393342264b44950a75ae Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Aug 2025 06:19:58 +0000 Subject: [PATCH 0658/1307] bonding: update LACP activity flag after setting lacp_active The port's actor_oper_port_state activity flag should be updated immediately after changing the lacp_active option to reflect the current mode correctly. Fixes: 3a755cd8b7c6 ("bonding: add new option lacp_active") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250815062000.22220-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_3ad.c | 25 +++++++++++++++++++++++++ drivers/net/bonding/bond_options.c | 1 + include/net/bond_3ad.h | 1 + 3 files changed, 27 insertions(+) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 2fca8e84ab10..414fecfd2a0e 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2883,6 +2883,31 @@ void bond_3ad_update_lacp_rate(struct bonding *bond) spin_unlock_bh(&bond->mode_lock); } +/** + * bond_3ad_update_lacp_active - change the lacp active + * @bond: bonding struct + * + * Update actor_oper_port_state when lacp_active is modified. + */ +void bond_3ad_update_lacp_active(struct bonding *bond) +{ + struct port *port = NULL; + struct list_head *iter; + struct slave *slave; + int lacp_active; + + lacp_active = bond->params.lacp_active; + spin_lock_bh(&bond->mode_lock); + bond_for_each_slave(bond, slave, iter) { + port = &(SLAVE_AD_INFO(slave)->port); + if (lacp_active) + port->actor_oper_port_state |= LACP_STATE_LACP_ACTIVITY; + else + port->actor_oper_port_state &= ~LACP_STATE_LACP_ACTIVITY; + } + spin_unlock_bh(&bond->mode_lock); +} + size_t bond_3ad_stats_size(void) { return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */ diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 1d639a3be6ba..3b6f815c55ff 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1660,6 +1660,7 @@ static int bond_option_lacp_active_set(struct bonding *bond, netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n", newval->string, newval->value); bond->params.lacp_active = newval->value; + bond_3ad_update_lacp_active(bond); return 0; } diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 2053cd8e788a..dba369a2cf27 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -307,6 +307,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); int bond_3ad_set_carrier(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); +void bond_3ad_update_lacp_active(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); size_t bond_3ad_stats_size(void); From 0599640a21e98f0d6a3e9ff85c0a687c90a8103b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Aug 2025 06:19:59 +0000 Subject: [PATCH 0659/1307] bonding: send LACPDUs periodically in passive mode after receiving partner's LACPDU When `lacp_active` is set to `off`, the bond operates in passive mode, meaning it only "speaks when spoken to." However, the current kernel implementation only sends an LACPDU in response when the partner's state changes. As a result, once LACP negotiation succeeds, the actor stops sending LACPDUs until the partner times out and sends an "expired" LACPDU. This causes continuous LACP state flapping. According to IEEE 802.1AX-2014, 6.4.13 Periodic Transmission machine. The values of Partner_Oper_Port_State.LACP_Activity and Actor_Oper_Port_State.LACP_Activity determine whether periodic transmissions take place. If either or both parameters are set to Active LACP, then periodic transmissions occur; if both are set to Passive LACP, then periodic transmissions do not occur. To comply with this, we remove the `!bond->params.lacp_active` check in `ad_periodic_machine()`. Instead, we initialize the actor's port's `LACP_STATE_LACP_ACTIVITY` state based on `lacp_active` setting. Additionally, we avoid setting the partner's state to `LACP_STATE_LACP_ACTIVITY` in the EXPIRED state, since we should not assume the partner is active by default. This ensures that in passive mode, the bond starts sending periodic LACPDUs after receiving one from the partner, and avoids flapping due to inactivity. Fixes: 3a755cd8b7c6 ("bonding: add new option lacp_active") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250815062000.22220-3-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_3ad.c | 42 +++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 414fecfd2a0e..4edc8e6b6b64 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -95,13 +95,13 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); -static void ad_periodic_machine(struct port *port, struct bond_params *bond_params); +static void ad_periodic_machine(struct port *port); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); static void ad_clear_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator); -static void ad_initialize_port(struct port *port, int lacp_fast); +static void ad_initialize_port(struct port *port, const struct bond_params *bond_params); static void ad_enable_collecting(struct port *port); static void ad_disable_distributing(struct port *port, bool *update_slave_arr); @@ -1307,10 +1307,16 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) * case of EXPIRED even if LINK_DOWN didn't arrive for * the port. */ - port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->sm_vars &= ~AD_PORT_MATCHED; + /* Based on IEEE 8021AX-2014, Figure 6-18 - Receive + * machine state diagram, the statue should be + * Partner_Oper_Port_State.Synchronization = FALSE; + * Partner_Oper_Port_State.LACP_Timeout = Short Timeout; + * start current_while_timer(Short Timeout); + * Actor_Oper_Port_State.Expired = TRUE; + */ + port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT; - port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY; port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); port->actor_oper_port_state |= LACP_STATE_EXPIRED; port->sm_vars |= AD_PORT_CHURNED; @@ -1417,11 +1423,10 @@ static void ad_tx_machine(struct port *port) /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at - * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ -static void ad_periodic_machine(struct port *port, struct bond_params *bond_params) +static void ad_periodic_machine(struct port *port) { periodic_states_t last_state; @@ -1430,8 +1435,7 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || - (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || - !bond_params->lacp_active) { + (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ @@ -1955,16 +1959,16 @@ static void ad_initialize_agg(struct aggregator *aggregator) /** * ad_initialize_port - initialize a given port's parameters * @port: the port we're looking at - * @lacp_fast: boolean. whether fast periodic should be used + * @bond_params: bond parameters we will use */ -static void ad_initialize_port(struct port *port, int lacp_fast) +static void ad_initialize_port(struct port *port, const struct bond_params *bond_params) { static const struct port_params tmpl = { .system_priority = 0xffff, .key = 1, .port_number = 1, .port_priority = 0xff, - .port_state = 1, + .port_state = 0, }; static const struct lacpdu lacpdu = { .subtype = 0x01, @@ -1982,12 +1986,14 @@ static void ad_initialize_port(struct port *port, int lacp_fast) port->actor_port_priority = 0xff; port->actor_port_aggregator_identifier = 0; port->ntt = false; - port->actor_admin_port_state = LACP_STATE_AGGREGATION | - LACP_STATE_LACP_ACTIVITY; - port->actor_oper_port_state = LACP_STATE_AGGREGATION | - LACP_STATE_LACP_ACTIVITY; + port->actor_admin_port_state = LACP_STATE_AGGREGATION; + port->actor_oper_port_state = LACP_STATE_AGGREGATION; + if (bond_params->lacp_active) { + port->actor_admin_port_state |= LACP_STATE_LACP_ACTIVITY; + port->actor_oper_port_state |= LACP_STATE_LACP_ACTIVITY; + } - if (lacp_fast) + if (bond_params->lacp_fast) port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; memcpy(&port->partner_admin, &tmpl, sizeof(tmpl)); @@ -2201,7 +2207,7 @@ void bond_3ad_bind_slave(struct slave *slave) /* port initialization */ port = &(SLAVE_AD_INFO(slave)->port); - ad_initialize_port(port, bond->params.lacp_fast); + ad_initialize_port(port, &bond->params); port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; @@ -2513,7 +2519,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } ad_rx_machine(NULL, port); - ad_periodic_machine(port, &bond->params); + ad_periodic_machine(port); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); From 87951b566446da04eed1fe8100f99a512ef02756 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Aug 2025 06:20:00 +0000 Subject: [PATCH 0660/1307] selftests: bonding: add test for passive LACP mode Add a selftest to verify bonding behavior when `lacp_active` is set to `off`. The test checks the following: - The passive LACP bond should not send LACPDUs before receiving a partner's LACPDU. - The transmitted LACPDUs must not include the active flag. - After transitioning to EXPIRED and DEFAULTED states, the passive side should still not initiate LACPDUs. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250815062000.22220-4-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_passive_lacp.sh | 105 ++++++++++++++++++ .../selftests/drivers/net/bonding/config | 1 + 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 2b10854e4b1e..44b98f17f8ff 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -10,7 +10,8 @@ TEST_PROGS := \ mode-2-recovery-updelay.sh \ bond_options.sh \ bond-eth-type-change.sh \ - bond_macvlan_ipvlan.sh + bond_macvlan_ipvlan.sh \ + bond_passive_lacp.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh new file mode 100755 index 000000000000..9c3b089813df --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test if a bond interface works with lacp_active=off. + +# shellcheck disable=SC2034 +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/forwarding/lib.sh + +# shellcheck disable=SC2317 +check_port_state() +{ + local netns=$1 + local port=$2 + local state=$3 + + ip -n "${netns}" -d -j link show "$port" | \ + jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null +} + +check_pkt_count() +{ + RET=0 + local ns="$1" + local iface="$2" + + # wait 65s, one per 30s + slowwait_for_counter 65 2 tc_rule_handle_stats_get \ + "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null +} + +setup() { + setup_ns c_ns s_ns + + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + + # Add tc filter to count the pkts + tc -n "${c_ns}" qdisc add dev eth0 clsact + tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass + tc -n "${s_ns}" qdisc add dev eth1 clsact + tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass + + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + +} + +trap cleanup_all_ns EXIT +setup + +# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s +# after interface up +ip -n "${c_ns}" link set bond0 up +sleep 2 + +# 1. The passive side shouldn't send LACPDU. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "802.3ad lacp_active off" "init port" + +ip -n "${s_ns}" link set bond0 up +# 2. The passive side should not have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1 +log_test "802.3ad lacp_active off" "port state active" + +# 3. The active side should have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1 +log_test "802.3ad lacp_active on" "port state active" + +# 4. Make sure the connection is not expired. +RET=0 +slowwait 5 check_port_state "${s_ns}" "eth0" "distributing" +slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1 +log_test "bond 802.3ad lacp_active off" "port connection" + +# After testing, disconnect one port on each side to check the state. +ip -n "${s_ns}" link set eth0 nomaster +ip -n "${s_ns}" link set eth0 up +ip -n "${c_ns}" link set eth1 nomaster +ip -n "${c_ns}" link set eth1 up +# Due to Periodic Machine and Rx Machine state change, the bond will still +# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure +# negotiation finished +sleep 5 + +# 5. The active side should keep sending LACPDU. +check_pkt_count "${s_ns}" "eth1" || RET=1 +log_test "bond 802.3ad lacp_active on" "port pkt after disconnect" + +# 6. The passive side shouldn't send LACPDU anymore. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "bond 802.3ad lacp_active off" "port pkt after disconnect" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index dad4e5fda4db..4d16a69ffc65 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -6,6 +6,7 @@ CONFIG_MACVLAN=y CONFIG_IPVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y From 41a86f62424ac436cb51e3de612ef1e1ddb0c873 Mon Sep 17 00:00:00 2001 From: Guopeng Zhang Date: Wed, 20 Aug 2025 21:34:24 +0800 Subject: [PATCH 0661/1307] fs: fix indentation style Replace 8 leading spaces with a tab to follow kernel coding style. Signed-off-by: Guopeng Zhang Link: https://lore.kernel.org/20250820133424.1667467-1-zhangguopeng@kylinos.cn Signed-off-by: Christian Brauner --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index ae6d1312b184..51f77c65c0c6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2455,7 +2455,7 @@ struct vfsmount *clone_private_mount(const struct path *path) return ERR_PTR(-EINVAL); } - if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) + if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); if (__has_locked_children(old_mnt, path->dentry)) From d2d7a96b29ea6ab093973a1a37d26126db70c79f Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Wed, 20 Aug 2025 14:30:47 -0500 Subject: [PATCH 0662/1307] mmc: sdhci_am654: Disable HS400 for AM62P SR1.0 and SR1.1 This adds SDHCI_AM654_QUIRK_DISABLE_HS400 quirk which shall be used to disable HS400 support. AM62P SR1.0 and SR1.1 do not support HS400 due to errata i2458 [0] so disable HS400 for these SoC revisions. [0] https://www.ti.com/lit/er/sprz574a/sprz574a.pdf Fixes: 37f28165518f ("arm64: dts: ti: k3-am62p: Add ITAP/OTAP values for MMC") Cc: stable@vger.kernel.org Signed-off-by: Judith Mendez Reviewed-by: Andrew Davis Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250820193047.4064142-1-jm@ti.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci_am654.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index e4fc345be7e5..17e62c61b6e6 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -156,6 +156,7 @@ struct sdhci_am654_data { #define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) #define SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA BIT(1) +#define SDHCI_AM654_QUIRK_DISABLE_HS400 BIT(2) }; struct window { @@ -765,6 +766,7 @@ static int sdhci_am654_init(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + struct device *dev = mmc_dev(host->mmc); u32 ctl_cfg_2 = 0; u32 mask; u32 val; @@ -820,6 +822,12 @@ static int sdhci_am654_init(struct sdhci_host *host) if (ret) goto err_cleanup_host; + if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_DISABLE_HS400 && + host->mmc->caps2 & (MMC_CAP2_HS400 | MMC_CAP2_HS400_ES)) { + dev_info(dev, "HS400 mode not supported on this silicon revision, disabling it\n"); + host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES); + } + ret = __sdhci_add_host(host); if (ret) goto err_cleanup_host; @@ -883,6 +891,12 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev, return 0; } +static const struct soc_device_attribute sdhci_am654_descope_hs400[] = { + { .family = "AM62PX", .revision = "SR1.0" }, + { .family = "AM62PX", .revision = "SR1.1" }, + { /* sentinel */ } +}; + static const struct of_device_id sdhci_am654_of_match[] = { { .compatible = "ti,am654-sdhci-5.1", @@ -970,6 +984,10 @@ static int sdhci_am654_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "parsing dt failed\n"); + soc = soc_device_match(sdhci_am654_descope_hs400); + if (soc) + sdhci_am654->quirks |= SDHCI_AM654_QUIRK_DISABLE_HS400; + host->mmc_host_ops.start_signal_voltage_switch = sdhci_am654_start_signal_voltage_switch; host->mmc_host_ops.execute_tuning = sdhci_am654_execute_tuning; From 9f6b606b6b37e61427412708411e8e04b1a858e8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 18 Aug 2025 11:58:25 +0200 Subject: [PATCH 0663/1307] net: airoha: ppe: Do not invalid PPE entries in case of SW hash collision SW hash computed by airoha_ppe_foe_get_entry_hash routine (used for foe_flow hlist) can theoretically produce collisions between two different HW PPE entries. In airoha_ppe_foe_insert_entry() if the collision occurs we will mark the second PPE entry in the list as stale (setting the hw hash to 0xffff). Stale entries are no more updated in airoha_ppe_foe_flow_entry_update routine and so they are removed by Netfilter. Fix the problem not marking the second entry as stale in airoha_ppe_foe_insert_entry routine if we have already inserted the brand new entry in the PPE table and let Netfilter remove real stale entries according to their timestamp. Please note this is just a theoretical issue spotted reviewing the code and not faced running the system. Fixes: cd53f622611f9 ("net: airoha: Add L2 hw acceleration support") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250818-airoha-en7581-hash-collision-fix-v1-1-d190c4b53d1c@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_ppe.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 47411d2cbd28..88694b08afa1 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -736,10 +736,8 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, continue; } - if (commit_done || !airoha_ppe_foe_compare_entry(e, hwe)) { - e->hash = 0xffff; + if (!airoha_ppe_foe_compare_entry(e, hwe)) continue; - } airoha_ppe_foe_commit_entry(ppe, &e->data, hash); commit_done = true; From 2d82f3bd8910eb65e30bb2a3c9b945bfb3b6d661 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Aug 2025 21:17:37 +0800 Subject: [PATCH 0664/1307] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues Commit 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update") reintroduced a lockdep warning by calling blk_mq_freeze_queue_nomemsave() before switching the I/O scheduler. The function blk_mq_elv_switch_none() calls elevator_change_done(). Running this while the queue is frozen causes a lockdep warning. Fix this by reordering the operations: first, switch the I/O scheduler to 'none', and then freeze the queue. This ensures that elevator_change_done() is not called on an already frozen queue. And this way is safe because elevator_set_none() does freeze queue before switching to none. Also we still have to rely on blk_mq_elv_switch_back() for switching back, and it has to cover unfrozen queue case. Cc: Nilay Shroff Cc: Yu Kuai Fixes: 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update") Signed-off-by: Ming Lei Reviewed-by: Yu Kuai Reviewed-by: Nilay Shroff Link: https://lore.kernel.org/r/20250815131737.331692-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b67d6c02eceb..ba3a4b77f578 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -5033,6 +5033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, unsigned int memflags; int i; struct xarray elv_tbl, et_tbl; + bool queues_frozen = false; lockdep_assert_held(&set->tag_list_lock); @@ -5056,9 +5057,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_sysfs_unregister_hctxs(q); } - list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue_nomemsave(q); - /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5068,6 +5066,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_freeze_queue_nomemsave(q); + queues_frozen = true; if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) goto switch_back; @@ -5091,8 +5092,12 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, } switch_back: /* The blk_mq_elv_switch_back unfreezes queue for us. */ - list_for_each_entry(q, &set->tag_list, tag_set_list) + list_for_each_entry(q, &set->tag_list, tag_set_list) { + /* switch_back expects queue to be frozen */ + if (!queues_frozen) + blk_mq_freeze_queue_nomemsave(q); blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl); + } list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register_hctxs(q); From be1e0283021ec73c2eb92839db9a471a068709d9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 21 Aug 2025 13:50:47 +0200 Subject: [PATCH 0665/1307] coredump: don't pointlessly check and spew warnings When a write happens it doesn't make sense to check perform checks on the input. Skip them. Whether a fixes tag is licensed is a bit of a gray area here but I'll add one for the socket validation part I added recently. Link: https://lore.kernel.org/20250821-moosbedeckt-denunziant-7908663f3563@brauner Fixes: 16195d2c7dd2 ("coredump: validate socket name as it is written") Reported-by: Brad Spengler Signed-off-by: Christian Brauner --- fs/coredump.c | 4 ++++ fs/exec.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/coredump.c b/fs/coredump.c index 5dce257c67fc..60bc9685e149 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1466,11 +1466,15 @@ static int proc_dostring_coredump(const struct ctl_table *table, int write, ssize_t retval; char old_core_pattern[CORENAME_MAX_SIZE]; + if (write) + return proc_dostring(table, write, buffer, lenp, ppos); + retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE); error = proc_dostring(table, write, buffer, lenp, ppos); if (error) return error; + if (!check_coredump_socket()) { strscpy(core_pattern, old_core_pattern, retval + 1); return -EINVAL; diff --git a/fs/exec.c b/fs/exec.c index 2a1e5e4042a1..e861a4b7ffda 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2048,7 +2048,7 @@ static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int writ { int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!error) + if (!error && !write) validate_coredump_safety(); return error; } From 7c7cda81159b1abe7d50bcef2ccc6f662e225c8b Mon Sep 17 00:00:00 2001 From: Raphael Gallais-Pou Date: Wed, 20 Aug 2025 20:03:10 +0200 Subject: [PATCH 0666/1307] spi: st: fix PM macros to use CONFIG_PM instead of CONFIG_PM_SLEEP pm_sleep_ptr() depends on CONFIG_PM_SLEEP while pm_ptr() depends on CONFIG_PM. Since ST SSC4 implements runtime PM it makes sense using pm_ptr() here. For the same reason replace PM macros that use CONFIG_PM. Doing so prevents from using __maybe_unused attribute of runtime PM functions. Link: https://lore.kernel.org/lkml/CAMuHMdX9nkROkAJJ5odv4qOWe0bFTmaFs=Rfxsfuc9+DT-bsEQ@mail.gmail.com Fixes: 6f8584a4826f ("spi: st: Switch from CONFIG_PM_SLEEP guards to pm_sleep_ptr()") Signed-off-by: Raphael Gallais-Pou Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20250820180310.9605-1-rgallaispou@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-st-ssc4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c index 49ab4c515156..c07c61dc4938 100644 --- a/drivers/spi/spi-st-ssc4.c +++ b/drivers/spi/spi-st-ssc4.c @@ -378,7 +378,7 @@ static void spi_st_remove(struct platform_device *pdev) pinctrl_pm_select_sleep_state(&pdev->dev); } -static int __maybe_unused spi_st_runtime_suspend(struct device *dev) +static int spi_st_runtime_suspend(struct device *dev) { struct spi_controller *host = dev_get_drvdata(dev); struct spi_st *spi_st = spi_controller_get_devdata(host); @@ -391,7 +391,7 @@ static int __maybe_unused spi_st_runtime_suspend(struct device *dev) return 0; } -static int __maybe_unused spi_st_runtime_resume(struct device *dev) +static int spi_st_runtime_resume(struct device *dev) { struct spi_controller *host = dev_get_drvdata(dev); struct spi_st *spi_st = spi_controller_get_devdata(host); @@ -428,8 +428,8 @@ static int __maybe_unused spi_st_resume(struct device *dev) } static const struct dev_pm_ops spi_st_pm = { - SET_SYSTEM_SLEEP_PM_OPS(spi_st_suspend, spi_st_resume) - SET_RUNTIME_PM_OPS(spi_st_runtime_suspend, spi_st_runtime_resume, NULL) + SYSTEM_SLEEP_PM_OPS(spi_st_suspend, spi_st_resume) + RUNTIME_PM_OPS(spi_st_runtime_suspend, spi_st_runtime_resume, NULL) }; static const struct of_device_id stm_spi_match[] = { @@ -441,7 +441,7 @@ MODULE_DEVICE_TABLE(of, stm_spi_match); static struct platform_driver spi_st_driver = { .driver = { .name = "spi-st", - .pm = pm_sleep_ptr(&spi_st_pm), + .pm = pm_ptr(&spi_st_pm), .of_match_table = of_match_ptr(stm_spi_match), }, .probe = spi_st_probe, From 275332877e2fa9d6efa7402b1e897f6c6ee695bb Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 14 Aug 2025 13:54:57 +0530 Subject: [PATCH 0667/1307] block: skip q->rq_qos check in rq_qos_done_bio() If a bio has BIO_QOS_THROTTLED or BIO_QOS_MERGED set, it implicitly guarantees that q->rq_qos is present. Avoid re-checking q->rq_qos in this case and call __rq_qos_done_bio() directly as a minor optimization. Suggested-by : Yu Kuai Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250814082612.500845-2-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-rq-qos.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 39749f4066fb..28125fc49eff 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -142,8 +142,14 @@ static inline void rq_qos_done_bio(struct bio *bio) bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || bio_flagged(bio, BIO_QOS_MERGED))) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); + + /* + * If a bio has BIO_QOS_xxx set, it implicitly implies that + * q->rq_qos is present. So, we skip re-checking q->rq_qos + * here as an extra optimization and directly call + * __rq_qos_done_bio(). + */ + __rq_qos_done_bio(q->rq_qos, bio); } } From ade1beea1c27657712aa8f594226d461639382ff Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 14 Aug 2025 13:54:58 +0530 Subject: [PATCH 0668/1307] block: decrement block_rq_qos static key in rq_qos_del() rq_qos_add() increments the block_rq_qos static key when a QoS policy is attached. When a QoS policy is removed via rq_qos_del(), we must symmetrically decrement the static key. If this removal drops the last QoS policy from the queue (q->rq_qos becomes NULL), the static branch can be disabled and the jump label patched to a NOP, avoiding overhead on the hot path. This change ensures rq_qos_add()/rq_qos_del() keep the block_rq_qos static key balanced and prevents leaving the branch permanently enabled after the last policy is removed. Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key") Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250814082612.500845-3-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-rq-qos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 848591fb3c57..b1e24bb85ad2 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -374,6 +374,7 @@ void rq_qos_del(struct rq_qos *rqos) for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; + static_branch_dec(&block_rq_qos); break; } } From 370ac285f23aecae40600851fb4a1a9e75e50973 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 14 Aug 2025 13:54:59 +0530 Subject: [PATCH 0669/1307] block: avoid cpu_hotplug_lock depedency on freeze_lock A recent lockdep[1] splat observed while running blktest block/005 reveals a potential deadlock caused by the cpu_hotplug_lock dependency on ->freeze_lock. This dependency was introduced by commit 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key"). That change added a static key to avoid fetching q->rq_qos when neither blk-wbt nor blk-iolatency is configured. The static key dynamically patches kernel text to a NOP when disabled, eliminating overhead of fetching q->rq_qos in the I/O hot path. However, enabling a static key at runtime requires acquiring both cpu_hotplug_lock and jump_label_mutex. When this happens after the queue has already been frozen (i.e., while holding ->freeze_lock), it creates a locking dependency from cpu_hotplug_lock to ->freeze_lock, which leads to a potential deadlock reported by lockdep [1]. To resolve this, replace the static key mechanism with q->queue_flags: QUEUE_FLAG_QOS_ENABLED. This flag is evaluated in the fast path before accessing q->rq_qos. If the flag is set, we proceed to fetch q->rq_qos; otherwise, the access is skipped. Since q->queue_flags is commonly accessed in IO hotpath and resides in the first cacheline of struct request_queue, checking it imposes minimal overhead while eliminating the deadlock risk. This change avoids the lockdep splat without introducing performance regressions. [1] https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/ Reported-by: Shinichiro Kawasaki Closes: https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/ Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key") Tested-by: Shin'ichiro Kawasaki Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250814082612.500845-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + block/blk-rq-qos.c | 9 ++++---- block/blk-rq-qos.h | 52 ++++++++++++++++++++++++------------------ include/linux/blkdev.h | 1 + 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7ed3e71f2fc0..32c65efdda46 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -95,6 +95,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SQ_SCHED), QUEUE_FLAG_NAME(DISABLE_WBT_DEF), QUEUE_FLAG_NAME(NO_ELV_SWITCH), + QUEUE_FLAG_NAME(QOS_ENABLED), }; #undef QUEUE_FLAG_NAME diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index b1e24bb85ad2..654478dfbc20 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -2,8 +2,6 @@ #include "blk-rq-qos.h" -__read_mostly DEFINE_STATIC_KEY_FALSE(block_rq_qos); - /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. @@ -319,8 +317,8 @@ void rq_qos_exit(struct request_queue *q) struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); - static_branch_dec(&block_rq_qos); } + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); mutex_unlock(&q->rq_qos_mutex); } @@ -346,7 +344,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; - static_branch_inc(&block_rq_qos); + blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); @@ -374,10 +372,11 @@ void rq_qos_del(struct rq_qos *rqos) for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; - static_branch_dec(&block_rq_qos); break; } } + if (!q->rq_qos) + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 28125fc49eff..1fe22000a379 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -12,7 +12,6 @@ #include "blk-mq-debugfs.h" struct blk_mq_debugfs_attr; -extern struct static_key_false block_rq_qos; enum rq_qos_id { RQ_QOS_WBT, @@ -113,49 +112,55 @@ void __rq_qos_queue_depth_changed(struct rq_qos *rqos); static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_cleanup(q->rq_qos, bio); } static inline void rq_qos_done(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos && - !blk_rq_is_passthrough(rq)) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos && !blk_rq_is_passthrough(rq)) __rq_qos_done(q->rq_qos, rq); } static inline void rq_qos_issue(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_issue(q->rq_qos, rq); } static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_requeue(q->rq_qos, rq); } static inline void rq_qos_done_bio(struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && - bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || - bio_flagged(bio, BIO_QOS_MERGED))) { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct request_queue *q; - /* - * If a bio has BIO_QOS_xxx set, it implicitly implies that - * q->rq_qos is present. So, we skip re-checking q->rq_qos - * here as an extra optimization and directly call - * __rq_qos_done_bio(). - */ - __rq_qos_done_bio(q->rq_qos, bio); - } + if (!bio->bi_bdev || (!bio_flagged(bio, BIO_QOS_THROTTLED) && + !bio_flagged(bio, BIO_QOS_MERGED))) + return; + + q = bdev_get_queue(bio->bi_bdev); + + /* + * If a bio has BIO_QOS_xxx set, it implicitly implies that + * q->rq_qos is present. So, we skip re-checking q->rq_qos + * here as an extra optimization and directly call + * __rq_qos_done_bio(). + */ + __rq_qos_done_bio(q->rq_qos, bio); } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) { bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } @@ -164,14 +169,16 @@ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) static inline void rq_qos_track(struct request_queue *q, struct request *rq, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_track(q->rq_qos, rq, bio); } static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) { bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); } @@ -179,7 +186,8 @@ static inline void rq_qos_merge(struct request_queue *q, struct request *rq, static inline void rq_qos_queue_depth_changed(struct request_queue *q) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_queue_depth_changed(q->rq_qos); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..fe1797bbec42 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -656,6 +656,7 @@ enum { QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ + QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ QUEUE_FLAG_MAX }; From 220abf77e7c2835cc63ea8cd7158cf83952640af Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 21 Aug 2025 09:56:38 +0530 Subject: [PATCH 0670/1307] cpufreq/amd-pstate: Fix setting of CPPC.min_perf in active mode for performance governor In the "active" mode of the amd-pstate driver with performance governor, the CPPC.min_perf is expected to be the nominal_perf. However after commit a9b9b4c2a4cd ("cpufreq/amd-pstate: Drop min and max cached frequencies"), this is not the case when the governor is switched from performance to powersave and back to performance, and the CPPC.min_perf will be equal to the scaling_min_freq that was set for the powersave governor. This is because prior to commit a9b9b4c2a4cd ("cpufreq/amd-pstate: Drop min and max cached frequencies"), amd_pstate_epp_update_limit() would unconditionally call amd_pstate_update_min_max_limit() and the latter function would enforce the CPPC.min_perf constraint when the governor is performance. However, after the aforementioned commit, amd_pstate_update_min_max_limit() is called by amd_pstate_epp_update_limit() only when either the scaling_{min/max}_freq is different from the cached value of cpudata->{min/max}_limit_freq, which wouldn't have changed on a governor transition from powersave to performance, thus missing out on enforcing the CPPC.min_perf constraint for the performance governor. Fix this by invoking amd_pstate_epp_udpate_limit() not only when the {min/max} limits have changed from the cached values, but also when the policy itself has changed. Fixes: a9b9b4c2a4cd ("cpufreq/amd-pstate: Drop min and max cached frequencies") Signed-off-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250821042638.356-1-gautham.shenoy@amd.com Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bbc27ef9edf7..5cd91489fcbe 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1554,13 +1554,15 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) pr_debug("CPU %d exiting\n", policy->cpu); } -static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) +static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy, bool policy_change) { struct amd_cpudata *cpudata = policy->driver_data; union perf_cached perf; u8 epp; - if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + if (policy_change || + policy->min != cpudata->min_limit_freq || + policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) @@ -1584,7 +1586,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) cpudata->policy = policy->policy; - ret = amd_pstate_epp_update_limit(policy); + ret = amd_pstate_epp_update_limit(policy, true); if (ret) return ret; @@ -1658,7 +1660,7 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) int ret; /* enable amd pstate from suspend state*/ - ret = amd_pstate_epp_update_limit(policy); + ret = amd_pstate_epp_update_limit(policy, false); if (ret) return ret; From 62708b9452f8eb77513115b17c4f8d1a22ebf843 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Aug 2025 19:19:51 -0700 Subject: [PATCH 0671/1307] tls: fix handling of zero-length records on the rx_list Each recvmsg() call must process either - only contiguous DATA records (any number of them) - one non-DATA record If the next record has different type than what has already been processed we break out of the main processing loop. If the record has already been decrypted (which may be the case for TLS 1.3 where we don't know type until decryption) we queue the pending record to the rx_list. Next recvmsg() will pick it up from there. Queuing the skb to rx_list after zero-copy decrypt is not possible, since in that case we decrypted directly to the user space buffer, and we don't have an skb to queue (darg.skb points to the ciphertext skb for access to metadata like length). Only data records are allowed zero-copy, and we break the processing loop after each non-data record. So we should never zero-copy and then find out that the record type has changed. The corner case we missed is when the initial record comes from rx_list, and it's zero length. Reported-by: Muhammad Alifa Ramdhan Reported-by: Billy Jheng Bing-Jhong Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250820021952.143068-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 51c98a007dda..bac65d0d4e3e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1808,6 +1808,9 @@ int decrypt_skb(struct sock *sk, struct scatterlist *sgout) return tls_decrypt_sg(sk, NULL, sgout, &darg); } +/* All records returned from a recvmsg() call must have the same type. + * 0 is not a valid content type. Use it as "no type reported, yet". + */ static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, u8 *control) { @@ -2051,8 +2054,10 @@ int tls_sw_recvmsg(struct sock *sk, if (err < 0) goto end; + /* process_rx_list() will set @control if it processed any records */ copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) + if (len <= copied || rx_more || + (control && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); From a61a3e961baff65b0a49f862fe21ce304f279b24 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Aug 2025 19:19:52 -0700 Subject: [PATCH 0672/1307] selftests: tls: add tests for zero-length records Test various combinations of zero-length records. Unfortunately, kernel cannot be coerced into producing those, so hardcode the ciphertext messages in the test. Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250820021952.143068-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 300 +++++++++++++++++++++++++++++- 1 file changed, 295 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 2b8387a83bc7..0f5640d8dc7f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -181,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type, return sendmsg(fd, &msg, flags); } -static int tls_recv_cmsg(struct __test_metadata *_metadata, - int fd, unsigned char record_type, - void *data, size_t len, int flags) +static int __tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char *ctype, + void *data, size_t len, int flags) { char cbuf[CMSG_SPACE(sizeof(char))]; struct cmsghdr *cmsg; - unsigned char ctype; struct msghdr msg; struct iovec vec; int n; @@ -206,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata, EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - ctype = *((unsigned char *)CMSG_DATA(cmsg)); + if (ctype) + *ctype = *((unsigned char *)CMSG_DATA(cmsg)); + + return n; +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + unsigned char ctype; + int n; + + n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags); EXPECT_EQ(ctype, record_type); return n; @@ -2164,6 +2176,284 @@ TEST_F(tls, rekey_poll_delay) } } +struct raw_rec { + unsigned int plain_len; + unsigned char plain_data[100]; + unsigned int cipher_len; + unsigned char cipher_data[128]; +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */ +static const struct raw_rec id0_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33, + 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf, + 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd, + 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */ +static const struct raw_rec id0_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b, + 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae, + 0x88, 0xe1, 0xd2, 0x08, 0x4f, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */ +static const struct raw_rec id0_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90, + 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03, + 0x68, 0x80, 0xd3, 0xd8, 0xcc, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */ +static const struct raw_rec id1_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c, + 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3, + 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff, + 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */ +static const struct raw_rec id1_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe, + 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6, + 0xb4, 0x7e, 0xef, 0x40, 0x2b, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */ +static const struct raw_rec id1_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86, + 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc, + 0xc9, 0xbf, 0xfe, 0x5b, 0xb1, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_ctrl_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36, + 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b, + 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */ +static const struct raw_rec id2_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e, + 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9, + 0x06, 0xdb, 0x79, 0xe5, 0x5d, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */ +static const struct raw_rec id2_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26, + 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83, + 0x30, 0x48, 0x69, 0x1a, 0x47, + }, +}; + +FIXTURE(zero_len) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_VARIANT(zero_len) +{ + const struct raw_rec *recs[4]; + ssize_t recv_ret[4]; +}; + +FIXTURE_VARIANT_ADD(zero_len, data_data_data) +{ + .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, }, + .recv_ret = { 33, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data) +{ + .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, }, + .recv_ret = { 11, 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, }, + .recv_ret = { 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl) +{ + .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0data_0data) +{ + .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_SETUP(zero_len) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + if (self->notls) + return; + + /* Don't install keys on fd, we'll send raw records */ + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(zero_len) +{ + close(self->fd); + close(self->cfd); +} + +TEST_F(zero_len, test) +{ + const struct raw_rec *const *rec; + unsigned char buf[128]; + int rec_off; + int i; + + for (i = 0; i < 4 && variant->recs[i]; i++) + EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data, + variant->recs[i]->cipher_len, 0), + variant->recs[i]->cipher_len); + + rec = &variant->recs[0]; + rec_off = 0; + for (i = 0; i < 4; i++) { + int j, ret; + + ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1; + EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL, + buf, sizeof(buf), MSG_DONTWAIT), ret); + if (ret == -1) + EXPECT_EQ(errno, -variant->recv_ret[i]); + if (variant->recv_ret[i] == -EAGAIN) + break; + + for (j = 0; j < ret; j++) { + while (rec_off == (*rec)->plain_len) { + rec++; + rec_off = 0; + } + EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]); + rec_off++; + } + } +}; + FIXTURE(tls_err) { int fd, cfd; From 8c5d95988c34f0aeba1f34cd5e4ba69494c90c5f Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 20 Aug 2025 12:09:18 +0530 Subject: [PATCH 0673/1307] Octeontx2-af: Skip overlap check for SPI field Octeontx2/CN10K silicon supports generating a 256-bit key per packet. The specific fields to be extracted from a packet for key generation are configurable via a Key Extraction (MKEX) Profile. The AF driver scans the configured extraction profile to ensure that fields from upper layers do not overwrite fields from lower layers in the key. Example Packet Field Layout: LA: DMAC + SMAC LB: VLAN LC: IPv4/IPv6 LD: TCP/UDP Valid MKEX Profile Configuration: LA -> DMAC -> key_offset[0-5] LC -> SIP -> key_offset[20-23] LD -> SPORT -> key_offset[30-31] Invalid MKEX profile configuration: LA -> DMAC -> key_offset[0-5] LC -> SIP -> key_offset[20-23] LD -> SPORT -> key_offset[2-3] // Overlaps with DMAC field In another scenario, if the MKEX profile is configured to extract the SPI field from both AH and ESP headers at the same key offset, the driver rejecting this configuration. In a regular traffic, ipsec packet will be having either AH(LD) or ESP (LE). This patch relaxes the check for the same. Fixes: 12aa0a3b93f3 ("octeontx2-af: Harden rule validation.") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20250820063919.1463518-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 1b765045aa63..b56395ac5a74 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -606,8 +606,8 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf) if (!npc_check_field(rvu, blkaddr, NPC_LB, intf)) *features &= ~BIT_ULL(NPC_OUTER_VID); - /* Set SPI flag only if AH/ESP and IPSEC_SPI are in the key */ - if (npc_check_field(rvu, blkaddr, NPC_IPSEC_SPI, intf) && + /* Allow extracting SPI field from AH and ESP headers at same offset */ + if (npc_is_field_present(rvu, NPC_IPSEC_SPI, intf) && (*features & (BIT_ULL(NPC_IPPROTO_ESP) | BIT_ULL(NPC_IPPROTO_AH)))) *features |= BIT_ULL(NPC_IPSEC_SPI); From 1c67f9c54cdc70627e3f6472b89cd3d895df974c Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 20 Aug 2025 15:27:07 +0200 Subject: [PATCH 0674/1307] net: pse-pd: pd692x0: Fix power budget leak in manager setup error path Fix a resource leak where manager power budgets were freed on both success and error paths during manager setup. Power budgets should only be freed on error paths after regulator registration or during driver removal. Refactor cleanup logic by extracting OF node cleanup and power budget freeing into separate helper functions for better maintainability. Fixes: 359754013e6a ("net: pse-pd: pd692x0: Add support for PSE PI priority feature") Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20250820132708.837255-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pd692x0.c | 59 +++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index 399ce9febda4..395f6c662175 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -1162,12 +1162,44 @@ pd692x0_write_ports_matrix(struct pd692x0_priv *priv, return 0; } +static void pd692x0_of_put_managers(struct pd692x0_priv *priv, + struct pd692x0_manager *manager, + int nmanagers) +{ + int i, j; + + for (i = 0; i < nmanagers; i++) { + for (j = 0; j < manager[i].nports; j++) + of_node_put(manager[i].port_node[j]); + of_node_put(manager[i].node); + } +} + +static void pd692x0_managers_free_pw_budget(struct pd692x0_priv *priv) +{ + int i; + + for (i = 0; i < PD692X0_MAX_MANAGERS; i++) { + struct regulator *supply; + + if (!priv->manager_reg[i] || !priv->manager_pw_budget[i]) + continue; + + supply = priv->manager_reg[i]->supply; + if (!supply) + continue; + + regulator_free_power_budget(supply, + priv->manager_pw_budget[i]); + } +} + static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) { struct pd692x0_manager *manager __free(kfree) = NULL; struct pd692x0_priv *priv = to_pd692x0_priv(pcdev); struct pd692x0_matrix port_matrix[PD692X0_MAX_PIS]; - int ret, i, j, nmanagers; + int ret, nmanagers; /* Should we flash the port matrix */ if (priv->fw_state != PD692X0_FW_OK && @@ -1185,31 +1217,27 @@ static int pd692x0_setup_pi_matrix(struct pse_controller_dev *pcdev) nmanagers = ret; ret = pd692x0_register_managers_regulator(priv, manager, nmanagers); if (ret) - goto out; + goto err_of_managers; ret = pd692x0_configure_managers(priv, nmanagers); if (ret) - goto out; + goto err_of_managers; ret = pd692x0_set_ports_matrix(priv, manager, nmanagers, port_matrix); if (ret) - goto out; + goto err_managers_req_pw; ret = pd692x0_write_ports_matrix(priv, port_matrix); if (ret) - goto out; + goto err_managers_req_pw; -out: - for (i = 0; i < nmanagers; i++) { - struct regulator *supply = priv->manager_reg[i]->supply; + pd692x0_of_put_managers(priv, manager, nmanagers); + return 0; - regulator_free_power_budget(supply, - priv->manager_pw_budget[i]); - - for (j = 0; j < manager[i].nports; j++) - of_node_put(manager[i].port_node[j]); - of_node_put(manager[i].node); - } +err_managers_req_pw: + pd692x0_managers_free_pw_budget(priv); +err_of_managers: + pd692x0_of_put_managers(priv, manager, nmanagers); return ret; } @@ -1748,6 +1776,7 @@ static void pd692x0_i2c_remove(struct i2c_client *client) { struct pd692x0_priv *priv = i2c_get_clientdata(client); + pd692x0_managers_free_pw_budget(priv); firmware_upload_unregister(priv->fwl); } From 7ef353879f714602b43f98662069f4fb86536761 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 20 Aug 2025 15:33:21 +0200 Subject: [PATCH 0675/1307] net: pse-pd: pd692x0: Skip power budget configuration when undefined If the power supply's power budget is not defined in the device tree, the current code still requests power and configures the PSE manager with a 0W power limit, which is undesirable behavior. Skip power budget configuration entirely when the budget is zero, avoiding unnecessary power requests and preventing invalid 0W limits from being set on the PSE manager. Fixes: 359754013e6a ("net: pse-pd: pd692x0: Add support for PSE PI priority feature") Signed-off-by: Kory Maincent Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20250820133321.841054-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pd692x0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index 395f6c662175..f4e91ba64a66 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -1041,6 +1041,10 @@ pd692x0_configure_managers(struct pd692x0_priv *priv, int nmanagers) int pw_budget; pw_budget = regulator_get_unclaimed_power_budget(supply); + if (!pw_budget) + /* Do nothing if no power budget */ + continue; + /* Max power budget per manager */ if (pw_budget > 6000000) pw_budget = 6000000; From bc17455bc843b2f4b206e0bb8139013eb3d3c08b Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 20 Aug 2025 16:32:02 +0300 Subject: [PATCH 0676/1307] net/mlx5: Base ECVF devlink port attrs from 0 Adjust the vport number by the base ECVF vport number so the port attributes start at 0. Previously the port attributes would start 1 after the maximum number of host VFs. Fixes: dc13180824b7 ("net/mlx5: Enable devlink port for embedded cpu VF vports") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index b7102e14d23d..c33accadae0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -47,10 +47,12 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch * devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, vport_num - 1, external); } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { + u16 base_vport = mlx5_core_ec_vf_vport_base(dev); + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum, - vport_num - 1, false); + vport_num - base_vport, false); } } From 330f0f6713a39581936decac72331e6ab7f13529 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:03 +0300 Subject: [PATCH 0677/1307] net/mlx5: Remove default QoS group and attach vports directly to root TSAR Currently, the driver creates a default group (`node0`) and attaches all vports to it unless the user explicitly sets a parent group. As a result, when a user configures tx_share on a group and tx_share on a VF, the expectation is for the group and the VF to share bandwidth relatively. However, since the VF is not connected to the same parent (but to the default node), the proportional share logic is not applied correctly. To fix this, remove the default group (`node0`) and instead connect vports directly to the root TSAR when no parent is specified. This ensures that vports and groups share the same root scheduler and their tx_share values are compared directly under the same hierarchy. Fixes: 0fe132eac38c ("net/mlx5: E-switch, Allow to add vports to rate groups") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 97 +++++++------------ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 - 2 files changed, 33 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 91d863c8c152..cd58d3934596 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -462,6 +462,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, struct netlink_ext_ack *extack) { + struct mlx5_esw_sched_node *parent = vport_node->parent; u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = vport_node->esw->dev; void *attr; @@ -477,7 +478,7 @@ esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport); MLX5_SET(scheduling_context, sched_ctx, parent_element_id, - vport_node->parent->ix); + parent ? parent->ix : vport_node->esw->qos.root_tsar_ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate); @@ -786,48 +787,15 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta return err; } - if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { - esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack); - } else { - /* The eswitch doesn't support scheduling nodes. - * Create a software-only node0 using the root TSAR to attach vport QoS to. - */ - if (!__esw_qos_alloc_node(esw, - esw->qos.root_tsar_ix, - SCHED_NODE_TYPE_VPORTS_TSAR, - NULL)) - esw->qos.node0 = ERR_PTR(-ENOMEM); - else - list_add_tail(&esw->qos.node0->entry, - &esw->qos.domain->nodes); - } - if (IS_ERR(esw->qos.node0)) { - err = PTR_ERR(esw->qos.node0); - esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err); - goto err_node0; - } refcount_set(&esw->qos.refcnt, 1); return 0; - -err_node0: - if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - esw->qos.root_tsar_ix)) - esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); - - return err; } static void esw_qos_destroy(struct mlx5_eswitch *esw) { int err; - if (esw->qos.node0->ix != esw->qos.root_tsar_ix) - __esw_qos_destroy_node(esw->qos.node0, NULL); - else - __esw_qos_free_node(esw->qos.node0); - esw->qos.node0 = NULL; - err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, esw->qos.root_tsar_ix); @@ -990,13 +958,16 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type, struct netlink_ext_ack *extack) { struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; - int err, new_level, max_level; + struct mlx5_esw_sched_node *parent = vport_node->parent; + int err; if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + int new_level, max_level; + /* Increase the parent's level by 2 to account for both the * TC arbiter and the vports TC scheduling element. */ - new_level = vport_node->parent->level + 2; + new_level = (parent ? parent->level : 2) + 2; max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev, log_esw_max_sched_depth); if (new_level > max_level) { @@ -1033,9 +1004,7 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type, err_sched_nodes: if (type == SCHED_NODE_TYPE_RATE_LIMITER) { esw_qos_node_destroy_sched_element(vport_node, NULL); - list_add_tail(&vport_node->entry, - &vport_node->parent->children); - vport_node->level = vport_node->parent->level + 1; + esw_qos_node_attach_to_parent(vport_node); } else { esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL); } @@ -1083,7 +1052,6 @@ static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport, static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack) { struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; - struct mlx5_esw_sched_node *parent = vport_node->parent; enum sched_node_type curr_type = vport_node->type; if (curr_type == SCHED_NODE_TYPE_VPORT) @@ -1093,7 +1061,7 @@ static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_a vport_node->bw_share = 0; list_del_init(&vport_node->entry); - esw_qos_normalize_min_rate(parent->esw, parent, extack); + esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack); trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport); } @@ -1103,25 +1071,23 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; int err; esw_assert_qos_lock_held(vport->dev->priv.eswitch); - esw_qos_node_set_parent(vport->qos.sched_node, parent); - if (type == SCHED_NODE_TYPE_VPORT) { - err = esw_qos_vport_create_sched_element(vport->qos.sched_node, - extack); - } else { + esw_qos_node_set_parent(vport_node, parent); + if (type == SCHED_NODE_TYPE_VPORT) + err = esw_qos_vport_create_sched_element(vport_node, extack); + else err = esw_qos_vport_tc_enable(vport, type, extack); - } if (err) return err; - vport->qos.sched_node->type = type; - esw_qos_normalize_min_rate(parent->esw, parent, extack); - trace_mlx5_esw_vport_qos_create(vport->dev, vport, - vport->qos.sched_node->max_rate, - vport->qos.sched_node->bw_share); + vport_node->type = type; + esw_qos_normalize_min_rate(vport_node->esw, parent, extack); + trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate, + vport_node->bw_share); return 0; } @@ -1132,6 +1098,7 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; struct mlx5_esw_sched_node *sched_node; + struct mlx5_eswitch *parent_esw; int err; esw_assert_qos_lock_held(esw); @@ -1139,10 +1106,12 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t if (err) return err; - parent = parent ?: esw->qos.node0; - sched_node = __esw_qos_alloc_node(parent->esw, 0, type, parent); + parent_esw = parent ? parent->esw : esw; + sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent); if (!sched_node) return -ENOMEM; + if (!parent) + list_add_tail(&sched_node->entry, &esw->qos.domain->nodes); sched_node->max_rate = max_rate; sched_node->min_rate = min_rate; @@ -1168,7 +1137,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) goto unlock; parent = vport->qos.sched_node->parent; - WARN(parent != esw->qos.node0, "Disabling QoS on port before detaching it from node"); + WARN(parent, "Disabling QoS on port before detaching it from node"); esw_qos_vport_disable(vport, NULL); mlx5_esw_qos_vport_qos_free(vport); @@ -1268,7 +1237,6 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, int err; esw_assert_qos_lock_held(vport->dev->priv.eswitch); - parent = parent ?: curr_parent; if (curr_type == type && curr_parent == parent) return 0; @@ -1306,16 +1274,16 @@ static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw esw_assert_qos_lock_held(esw); curr_parent = vport->qos.sched_node->parent; - parent = parent ?: esw->qos.node0; if (curr_parent == parent) return 0; /* Set vport QoS type based on parent node type if different from * default QoS; otherwise, use the vport's current QoS type. */ - if (parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) type = SCHED_NODE_TYPE_RATE_LIMITER; - else if (curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + else if (curr_parent && + curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) type = SCHED_NODE_TYPE_VPORT; else type = vport->qos.sched_node->type; @@ -1654,9 +1622,10 @@ static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, u32 *tc_bw) { - struct mlx5_eswitch *esw = vport->qos.sched_node ? - vport->qos.sched_node->parent->esw : - vport->dev->priv.eswitch; + struct mlx5_esw_sched_node *node = vport->qos.sched_node; + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + esw = (node && node->parent) ? node->parent->esw : esw; return esw_qos_validate_unsupported_tc_bw(esw, tc_bw); } @@ -1763,7 +1732,7 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, if (disable) { if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT, - NULL, extack); + vport_node->parent, extack); goto unlock; } @@ -1775,7 +1744,7 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, } else { err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_TC_ARBITER_TSAR, - NULL, extack); + vport_node->parent, extack); } if (!err) esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b0b8ef3ec3c4..45506ad56847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -373,11 +373,6 @@ struct mlx5_eswitch { refcount_t refcnt; u32 root_tsar_ix; struct mlx5_qos_domain *domain; - /* Contains all vports with QoS enabled but no explicit node. - * Cannot be NULL if QoS is enabled, but may be a fake node - * referencing the root TSAR if the esw doesn't support nodes. - */ - struct mlx5_esw_sched_node *node0; } qos; struct mlx5_esw_bridge_offloads *br_offloads; From e8f973576ca5387ffd2917b8ae661d3f9acde526 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:04 +0300 Subject: [PATCH 0678/1307] net/mlx5e: Preserve tc-bw during parent changes When changing parent of a node/leaf with tc-bw configured, the code saves and restores tc-bw values. However, it was reading the converted hardware bw_share values (where 0 becomes 1) instead of the original user values, causing incorrect tc-bw calculations after parent change. Store original tc-bw values in the node structure and use them directly for save/restore operations. Fixes: cf7e73770d1b ("net/mlx5: Manage TC arbiter nodes and implement full support for tc-bw") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index cd58d3934596..4ed5968f1638 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -102,6 +102,8 @@ struct mlx5_esw_sched_node { u8 level; /* Valid only when this node represents a traffic class. */ u8 tc; + /* Valid only for a TC arbiter node or vport TC arbiter. */ + u32 tc_bw[DEVLINK_RATE_TCS_MAX]; }; static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node) @@ -609,10 +611,7 @@ static void esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, u32 *tc_bw) { - struct mlx5_esw_sched_node *vports_tc_node; - - list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) - tc_bw[vports_tc_node->tc] = vports_tc_node->bw_share; + memcpy(tc_bw, tc_arbiter_node->tc_bw, sizeof(tc_arbiter_node->tc_bw)); } static void @@ -629,6 +628,7 @@ esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, u8 tc = vports_tc_node->tc; u32 bw_share; + tc_arbiter_node->tc_bw[tc] = tc_bw[tc]; bw_share = tc_bw[tc] * fw_max_bw_share; bw_share = esw_qos_calc_bw_share(bw_share, divider, fw_max_bw_share); @@ -1060,6 +1060,7 @@ static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_a esw_qos_vport_tc_disable(vport, extack); vport_node->bw_share = 0; + memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw)); list_del_init(&vport_node->entry); esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack); @@ -1231,8 +1232,9 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { - struct mlx5_esw_sched_node *curr_parent = vport->qos.sched_node->parent; - enum sched_node_type curr_type = vport->qos.sched_node->type; + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + struct mlx5_esw_sched_node *curr_parent = vport_node->parent; + enum sched_node_type curr_type = vport_node->type; u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0}; int err; @@ -1244,10 +1246,8 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, if (err) return err; - if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { - esw_qos_tc_arbiter_get_bw_shares(vport->qos.sched_node, - curr_tc_bw); - } + if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) + esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw); esw_qos_vport_disable(vport, extack); @@ -1258,8 +1258,8 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, } if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { - esw_qos_set_tc_arbiter_bw_shares(vport->qos.sched_node, - curr_tc_bw, extack); + esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw, + extack); } return err; From b697ef4d1d136948d282384e6cc3d1af469ea123 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:05 +0300 Subject: [PATCH 0679/1307] net/mlx5: Destroy vport QoS element when no configuration remains If a VF has been configured and the user later clears all QoS settings, the vport element remains in the firmware QoS tree. This leads to inconsistent behavior compared to VFs that were never configured, since the FW assumes that unconfigured VFs are outside the QoS hierarchy. As a result, the bandwidth share across VFs may differ, even though none of them appear to have any configuration. Align the driver behavior with the FW expectation by destroying the vport QoS element when all configurations are removed. Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") Fixes: cf7e73770d1b ("net/mlx5: Manage TC arbiter nodes and implement full support for tc-bw") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20250820133209.389065-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 57 ++++++++++++++++--- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 4ed5968f1638..452a948a3e6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1127,6 +1127,19 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t return err; } +static void mlx5_esw_qos_vport_disable_locked(struct mlx5_vport *vport) +{ + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + esw_assert_qos_lock_held(esw); + if (!vport->qos.sched_node) + return; + + esw_qos_vport_disable(vport, NULL); + mlx5_esw_qos_vport_qos_free(vport); + esw_qos_put(esw); +} + void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; @@ -1140,9 +1153,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) parent = vport->qos.sched_node->parent; WARN(parent, "Disabling QoS on port before detaching it from node"); - esw_qos_vport_disable(vport, NULL); - mlx5_esw_qos_vport_qos_free(vport); - esw_qos_put(esw); + mlx5_esw_qos_vport_disable_locked(vport); unlock: esw_qos_unlock(esw); } @@ -1642,6 +1653,21 @@ static bool esw_qos_tc_bw_disabled(u32 *tc_bw) return true; } +static void esw_vport_qos_prune_empty(struct mlx5_vport *vport) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + + esw_assert_qos_lock_held(vport->dev->priv.eswitch); + if (!vport_node) + return; + + if (vport_node->parent || vport_node->max_rate || + vport_node->min_rate || !esw_qos_tc_bw_disabled(vport_node->tc_bw)) + return; + + mlx5_esw_qos_vport_disable_locked(vport); +} + int mlx5_esw_qos_init(struct mlx5_eswitch *esw) { if (esw->qos.domain) @@ -1675,6 +1701,10 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void esw_qos_lock(esw); err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack); + if (err) + goto out; + esw_vport_qos_prune_empty(vport); +out: esw_qos_unlock(esw); return err; } @@ -1696,6 +1726,10 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * esw_qos_lock(esw); err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack); + if (err) + goto out; + esw_vport_qos_prune_empty(vport); +out: esw_qos_unlock(esw); return err; } @@ -1733,6 +1767,7 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT, vport_node->parent, extack); + esw_vport_qos_prune_empty(vport); goto unlock; } @@ -1893,14 +1928,20 @@ int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, void *priv, void *parent_priv, struct netlink_ext_ack *extack) { - struct mlx5_esw_sched_node *node; + struct mlx5_esw_sched_node *node = parent ? parent_priv : NULL; struct mlx5_vport *vport = priv; + int err; - if (!parent) - return mlx5_esw_qos_vport_update_parent(vport, NULL, extack); + err = mlx5_esw_qos_vport_update_parent(vport, node, extack); + if (!err) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; - node = parent_priv; - return mlx5_esw_qos_vport_update_parent(vport, node, extack); + esw_qos_lock(esw); + esw_vport_qos_prune_empty(vport); + esw_qos_unlock(esw); + } + + return err; } static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node) From 3c114fb2afe493066df5b9e560ef37216b153c90 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:06 +0300 Subject: [PATCH 0680/1307] net/mlx5: Fix QoS reference leak in vport enable error path Add missing esw_qos_put() call when __esw_qos_alloc_node() fails in mlx5_esw_qos_vport_enable(). Fixes: be034baba83e ("net/mlx5: Make vport QoS enablement more flexible for future extensions") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 452a948a3e6d..41aec07bb6c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1109,8 +1109,10 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t parent_esw = parent ? parent->esw : esw; sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent); - if (!sched_node) + if (!sched_node) { + esw_qos_put(esw); return -ENOMEM; + } if (!parent) list_add_tail(&sched_node->entry, &esw->qos.domain->nodes); From 51b17c98e3dbb2093a81b0490050a0eaa919ebee Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 20 Aug 2025 16:32:07 +0300 Subject: [PATCH 0681/1307] net/mlx5: Restore missing scheduling node cleanup on vport enable failure Restore the __esw_qos_free_node() call removed by the offending commit. Fixes: 97733d1e00a0 ("net/mlx5: Add traffic class scheduling support for vport QoS") Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 41aec07bb6c2..8b4977650183 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1122,6 +1122,7 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t vport->qos.sched_node = sched_node; err = esw_qos_vport_enable(vport, type, parent, extack); if (err) { + __esw_qos_free_node(sched_node); esw_qos_put(esw); vport->qos.sched_node = NULL; } From 451d2849ea66659040b59ae3cb7e50cc97404733 Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Wed, 20 Aug 2025 16:32:08 +0300 Subject: [PATCH 0682/1307] net/mlx5e: Query FW for buffer ownership The SW currently saves local buffer ownership when setting the buffer. This means that the SW assumes it has ownership of the buffer after the command is set. If setting the buffer fails and we remain in FW ownership, the local buffer ownership state incorrectly remains as SW-owned. This leads to incorrect behavior in subsequent PFC commands, causing failures. Instead of saving local buffer ownership in SW, query the FW for buffer ownership when setting the buffer. This ensures that the buffer ownership state is accurately reflected, avoiding the issues caused by incorrect ownership states. Fixes: ecdf2dadee8e ("net/mlx5e: Receive buffer support for DCBX") Signed-off-by: Alexei Lazar Reviewed-by: Shahar Shitrit Reviewed-by: Dragos Tatulea Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250820133209.389065-8-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/dcbnl.h | 1 - .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 12 ++++++++--- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/port.c | 20 +++++++++++++++++++ 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h index b59aee75de94..2c98a5299df3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -26,7 +26,6 @@ struct mlx5e_dcbx { u8 cap; /* Buffer configuration */ - bool manual_buffer; u32 cable_len; u32 xoff; u16 port_buff_cell_sz; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 5fe016e477b3..d166c0d5189e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -362,6 +362,7 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { + u8 buffer_ownership = MLX5_BUF_OWNERSHIP_UNKNOWN; struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; u32 old_cable_len = priv->dcbx.cable_len; @@ -389,7 +390,14 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, if (MLX5_BUFFER_SUPPORTED(mdev)) { pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en; - if (priv->dcbx.manual_buffer) + ret = mlx5_query_port_buffer_ownership(mdev, + &buffer_ownership); + if (ret) + netdev_err(dev, + "%s, Failed to get buffer ownership: %d\n", + __func__, ret); + + if (buffer_ownership == MLX5_BUF_OWNERSHIP_SW_OWNED) ret = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, &pfc_new, NULL, NULL); @@ -982,7 +990,6 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev, if (!changed) return 0; - priv->dcbx.manual_buffer = true; err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL, buffer_size, prio2buffer); return err; @@ -1252,7 +1259,6 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) priv->dcbx.cap |= DCB_CAP_DCBX_HOST; priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv); - priv->dcbx.manual_buffer = false; priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN; mlx5e_ets_init(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b6d53db27cd5..9d3504f5abfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -367,6 +367,8 @@ int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); +int mlx5_query_port_buffer_ownership(struct mlx5_core_dev *mdev, + u8 *buffer_ownership); int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 549f1066d2a5..2d7adf7444ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -968,6 +968,26 @@ int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state) return err; } +int mlx5_query_port_buffer_ownership(struct mlx5_core_dev *mdev, + u8 *buffer_ownership) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)] = {}; + int err; + + if (!MLX5_CAP_PCAM_FEATURE(mdev, buffer_ownership)) { + *buffer_ownership = MLX5_BUF_OWNERSHIP_UNKNOWN; + return 0; + } + + err = mlx5_query_pfcc_reg(mdev, out, sizeof(out)); + if (err) + return err; + + *buffer_ownership = MLX5_GET(pfcc_reg, out, buf_ownership); + + return 0; +} + int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio) { int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); From 8b0587a885fdb34fd6090a3f8625cb7ac1444826 Mon Sep 17 00:00:00 2001 From: Armen Ratner Date: Wed, 20 Aug 2025 16:32:09 +0300 Subject: [PATCH 0683/1307] net/mlx5e: Preserve shared buffer capacity during headroom updates When port buffer headroom changes, port_update_shared_buffer() recalculates the shared buffer size and splits it in a 3:1 ratio (lossy:lossless) - Currently, the calculation is: lossless = shared / 4; lossy = (shared / 4) * 3; Meaning, the calculation dropped the remainder of shared % 4 due to integer division, unintentionally reducing the total shared buffer by up to three cells on each update. Over time, this could shrink the buffer below usable size. Fix it by changing the calculation to: lossless = shared / 4; lossy = shared - lossless; This retains all buffer cells while still approximating the intended 3:1 split, preventing capacity loss over time. While at it, perform headroom calculations in units of cells rather than in bytes for more accurate calculations avoiding extra divisions. Fixes: a440030d8946 ("net/mlx5e: Update shared buffer along with device buffer changes") Signed-off-by: Armen Ratner Signed-off-by: Maher Sanalla Reviewed-by: Tariq Toukan Signed-off-by: Alexei Lazar Signed-off-by: Mark Bloch Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20250820133209.389065-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/port_buffer.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 5ae787656a7c..3efa8bf1d14e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -272,8 +272,8 @@ static int port_update_shared_buffer(struct mlx5_core_dev *mdev, /* Total shared buffer size is split in a ratio of 3:1 between * lossy and lossless pools respectively. */ - lossy_epool_size = (shared_buffer_size / 4) * 3; lossless_ipool_size = shared_buffer_size / 4; + lossy_epool_size = shared_buffer_size - lossless_ipool_size; mlx5e_port_set_sbpr(mdev, 0, MLX5_EGRESS_DIR, MLX5_LOSSY_POOL, 0, lossy_epool_size); @@ -288,14 +288,12 @@ static int port_set_buffer(struct mlx5e_priv *priv, u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); - u32 new_headroom_size = 0; - u32 current_headroom_size; + u32 current_headroom_cells = 0; + u32 new_headroom_cells = 0; void *in; int err; int i; - current_headroom_size = port_buffer->headroom_size; - in = kzalloc(sz, GFP_KERNEL); if (!in) return -ENOMEM; @@ -306,12 +304,14 @@ static int port_set_buffer(struct mlx5e_priv *priv, for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + current_headroom_cells += MLX5_GET(bufferx_reg, buffer, size); + u64 size = port_buffer->buffer[i].size; u64 xoff = port_buffer->buffer[i].xoff; u64 xon = port_buffer->buffer[i].xon; - new_headroom_size += size; do_div(size, port_buff_cell_sz); + new_headroom_cells += size; do_div(xoff, port_buff_cell_sz); do_div(xon, port_buff_cell_sz); MLX5_SET(bufferx_reg, buffer, size, size); @@ -320,10 +320,8 @@ static int port_set_buffer(struct mlx5e_priv *priv, MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); } - new_headroom_size /= port_buff_cell_sz; - current_headroom_size /= port_buff_cell_sz; - err = port_update_shared_buffer(priv->mdev, current_headroom_size, - new_headroom_size); + err = port_update_shared_buffer(priv->mdev, current_headroom_cells, + new_headroom_cells); if (err) goto out; From 8410fe81093ff231e964891e215b624dabb734b0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Aug 2025 17:08:34 +0200 Subject: [PATCH 0684/1307] ALSA: usb-audio: Use correct sub-type for UAC3 feature unit validation The entry of the validators table for UAC3 feature unit is defined with a wrong sub-type UAC_FEATURE (= 0x06) while it should have been UAC3_FEATURE (= 0x07). This patch corrects the entry value. Fixes: 57f8770620e9 ("ALSA: usb-audio: More validations of descriptor units") Link: https://patch.msgid.link/20250821150835.8894-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 4f4e8e87a14c..a0d55b77c994 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -285,7 +285,7 @@ static const struct usb_desc_validator audio_validators[] = { /* UAC_VERSION_3, UAC3_EXTENDED_TERMINAL: not implemented yet */ FUNC(UAC_VERSION_3, UAC3_MIXER_UNIT, validate_mixer_unit), FUNC(UAC_VERSION_3, UAC3_SELECTOR_UNIT, validate_selector_unit), - FUNC(UAC_VERSION_3, UAC_FEATURE_UNIT, validate_uac3_feature_unit), + FUNC(UAC_VERSION_3, UAC3_FEATURE_UNIT, validate_uac3_feature_unit), /* UAC_VERSION_3, UAC3_EFFECT_UNIT: not implemented yet */ FUNC(UAC_VERSION_3, UAC3_PROCESSING_UNIT, validate_processing_unit), FUNC(UAC_VERSION_3, UAC3_EXTENSION_UNIT, validate_processing_unit), From fec7bdfe7f8694a0c39e6c3ec026ff61ca1058b9 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 21 Aug 2025 14:35:40 +0200 Subject: [PATCH 0685/1307] s390/hypfs: Avoid unnecessary ioctl registration in debugfs Currently, hypfs registers ioctl callbacks for all debugfs files, despite only one file requiring them. This leads to unintended exposure of unused interfaces to user space and can trigger side effects such as restricted access when kernel lockdown is enabled. Restrict ioctl registration to only those files that implement ioctl functionality to avoid interface clutter and unnecessary access restrictions. Tested-by: Mete Durlu Reviewed-by: Vasily Gorbik Fixes: 5496197f9b08 ("debugfs: Restrict debugfs when the kernel is locked down") Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- arch/s390/hypfs/hypfs_dbfs.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 5d9effb0867c..e74eb8f9b23a 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -66,23 +66,27 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long rc; mutex_lock(&df->lock); - if (df->unlocked_ioctl) - rc = df->unlocked_ioctl(file, cmd, arg); - else - rc = -ENOTTY; + rc = df->unlocked_ioctl(file, cmd, arg); mutex_unlock(&df->lock); return rc; } -static const struct file_operations dbfs_ops = { +static const struct file_operations dbfs_ops_ioctl = { .read = dbfs_read, .unlocked_ioctl = dbfs_ioctl, }; +static const struct file_operations dbfs_ops = { + .read = dbfs_read, +}; + void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) { - df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, - &dbfs_ops); + const struct file_operations *fops = &dbfs_ops; + + if (df->unlocked_ioctl) + fops = &dbfs_ops_ioctl; + df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops); mutex_init(&df->lock); } From 3868f910440c47cd5d158776be4ba4e2186beda7 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 21 Aug 2025 15:12:37 +0200 Subject: [PATCH 0686/1307] s390/hypfs: Enable limited access during lockdown When kernel lockdown is active, debugfs_locked_down() blocks access to hypfs files that register ioctl callbacks, even if the ioctl interface is not required for a function. This unnecessarily breaks userspace tools that only rely on read operations. Resolve this by registering a minimal set of file operations during lockdown, avoiding ioctl registration and preserving access for affected tooling. Note that this change restores hypfs functionality when lockdown is active from early boot (e.g. via lockdown=integrity kernel parameter), but does not apply to scenarios where lockdown is enabled dynamically while Linux is running. Tested-by: Mete Durlu Reviewed-by: Vasily Gorbik Fixes: 5496197f9b08 ("debugfs: Restrict debugfs when the kernel is locked down") Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- arch/s390/hypfs/hypfs_dbfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index e74eb8f9b23a..41a0d2066fa0 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -6,6 +6,7 @@ * Author(s): Michael Holzheu */ +#include #include #include "hypfs.h" @@ -84,7 +85,7 @@ void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) { const struct file_operations *fops = &dbfs_ops; - if (df->unlocked_ioctl) + if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS)) fops = &dbfs_ops_ioctl; df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops); mutex_init(&df->lock); From cb83b559bea39f207ee214ee2972657e8576ed18 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:18 -0700 Subject: [PATCH 0687/1307] idpf: add support for Tx refillqs in flow scheduling mode In certain production environments, it is possible for completion tags to collide, meaning N packets with the same completion tag are in flight at the same time. In this environment, any given Tx queue is effectively used to send both slower traffic and higher throughput traffic simultaneously. This is the result of a customer's specific configuration in the device pipeline, the details of which Intel cannot provide. This configuration results in a small number of out-of-order completions, i.e., a small number of packets in flight. The existing guardrails in the driver only protect against a large number of packets in flight. The slower flow completions are delayed which causes the out-of-order completions. The fast flow will continue sending traffic and generating tags. Because tags are generated on the fly, the fast flow eventually uses the same tag for a packet that is still in flight from the slower flow. The driver has no idea which packet it should clean when it processes the completion with that tag, but it will look for the packet on the buffer ring before the hash table. If the slower flow packet completion is processed first, it will end up cleaning the fast flow packet on the ring prematurely. This leaves the descriptor ring in a bad state resulting in a crash or Tx timeout. In summary, generating a tag when a packet is sent can lead to the same tag being associated with multiple packets. This can lead to resource leaks, crashes, and/or Tx timeouts. Before we can replace the tag generation, we need a new mechanism for the send path to know what tag to use next. The driver will allocate and initialize a refillq for each TxQ with all of the possible free tag values. During send, the driver grabs the next free tag from the refillq from next_to_clean. While cleaning the packet, the clean routine posts the tag back to the refillq's next_to_use to indicate that it is now free to use. This mechanism works exactly the same way as the existing Rx refill queues, which post the cleaned buffer IDs back to the buffer queue to be reposted to HW. Since we're using the refillqs for both Rx and Tx now, genericize some of the existing refillq support. Note: the refillqs will not be used yet. This is only demonstrating how they will be used to pass free tags back to the send path. Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 93 +++++++++++++++++++-- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 8 +- 2 files changed, 91 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 66a1b040639d..9b63944235fb 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -139,6 +139,9 @@ static void idpf_tx_desc_rel(struct idpf_tx_queue *txq) if (!txq->desc_ring) return; + if (txq->refillq) + kfree(txq->refillq->ring); + dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); txq->desc_ring = NULL; txq->next_to_use = 0; @@ -244,6 +247,7 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, struct idpf_tx_queue *tx_q) { struct device *dev = tx_q->dev; + struct idpf_sw_queue *refillq; int err; err = idpf_tx_buf_alloc_all(tx_q); @@ -267,6 +271,29 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, tx_q->next_to_clean = 0; idpf_queue_set(GEN_CHK, tx_q); + if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) + return 0; + + refillq = tx_q->refillq; + refillq->desc_count = tx_q->desc_count; + refillq->ring = kcalloc(refillq->desc_count, sizeof(u32), + GFP_KERNEL); + if (!refillq->ring) { + err = -ENOMEM; + goto err_alloc; + } + + for (unsigned int i = 0; i < refillq->desc_count; i++) + refillq->ring[i] = + FIELD_PREP(IDPF_RFL_BI_BUFID_M, i) | + FIELD_PREP(IDPF_RFL_BI_GEN_M, + idpf_queue_has(GEN_CHK, refillq)); + + /* Go ahead and flip the GEN bit since this counts as filling + * up the ring, i.e. we already ring wrapped. + */ + idpf_queue_change(GEN_CHK, refillq); + return 0; err_alloc: @@ -603,18 +630,18 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) } /** - * idpf_rx_post_buf_refill - Post buffer id to refill queue + * idpf_post_buf_refill - Post buffer id to refill queue * @refillq: refill queue to post to * @buf_id: buffer id to post */ -static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) +static void idpf_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) { u32 nta = refillq->next_to_use; /* store the buffer ID and the SW maintained GEN bit to the refillq */ refillq->ring[nta] = - FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | - FIELD_PREP(IDPF_RX_BI_GEN_M, + FIELD_PREP(IDPF_RFL_BI_BUFID_M, buf_id) | + FIELD_PREP(IDPF_RFL_BI_GEN_M, idpf_queue_has(GEN_CHK, refillq)); if (unlikely(++nta == refillq->desc_count)) { @@ -995,6 +1022,11 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; for (j = 0; j < txq_grp->num_txq; j++) { + if (flow_sch_en) { + kfree(txq_grp->txqs[j]->refillq); + txq_grp->txqs[j]->refillq = NULL; + } + kfree(txq_grp->txqs[j]); txq_grp->txqs[j] = NULL; } @@ -1414,6 +1446,13 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) } idpf_queue_set(FLOW_SCH_EN, q); + + q->refillq = kzalloc(sizeof(*q->refillq), GFP_KERNEL); + if (!q->refillq) + goto err_alloc; + + idpf_queue_set(GEN_CHK, q->refillq); + idpf_queue_set(RFL_GEN_CHK, q->refillq); } if (!split) @@ -2005,6 +2044,8 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); + idpf_post_buf_refill(txq->refillq, compl_tag); + /* If we didn't clean anything on the ring, this packet must be * in the hash table. Go clean it there. */ @@ -2364,6 +2405,37 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) return ntu; } +/** + * idpf_tx_get_free_buf_id - get a free buffer ID from the refill queue + * @refillq: refill queue to get buffer ID from + * @buf_id: return buffer ID + * + * Return: true if a buffer ID was found, false if not + */ +static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, + u16 *buf_id) +{ + u32 ntc = refillq->next_to_clean; + u32 refill_desc; + + refill_desc = refillq->ring[ntc]; + + if (unlikely(idpf_queue_has(RFL_GEN_CHK, refillq) != + !!(refill_desc & IDPF_RFL_BI_GEN_M))) + return false; + + *buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); + + if (unlikely(++ntc == refillq->desc_count)) { + idpf_queue_change(RFL_GEN_CHK, refillq); + ntc = 0; + } + + refillq->next_to_clean = ntc; + + return true; +} + /** * idpf_tx_splitq_map - Build the Tx flex descriptor * @tx_q: queue to send buffer on @@ -2912,6 +2984,13 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, } if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, + &tx_params.compl_tag))) { + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + } + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; /* Set the RE bit to catch any packets that may have not been @@ -3472,7 +3551,7 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) skip_data: rx_buf->netmem = 0; - idpf_rx_post_buf_refill(refillq, buf_id); + idpf_post_buf_refill(refillq, buf_id); IDPF_RX_BUMP_NTC(rxq, ntc); /* skip if it is non EOP desc */ @@ -3580,10 +3659,10 @@ static void idpf_rx_clean_refillq(struct idpf_buf_queue *bufq, bool failure; if (idpf_queue_has(RFL_GEN_CHK, refillq) != - !!(refill_desc & IDPF_RX_BI_GEN_M)) + !!(refill_desc & IDPF_RFL_BI_GEN_M)) break; - buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); + buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); failure = idpf_rx_update_bufq_desc(bufq, buf_id, buf_desc); if (failure) break; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 281de655a813..58232a1bd0a9 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -108,8 +108,8 @@ do { \ */ #define IDPF_TX_SPLITQ_RE_MIN_GAP 64 -#define IDPF_RX_BI_GEN_M BIT(16) -#define IDPF_RX_BI_BUFID_M GENMASK(15, 0) +#define IDPF_RFL_BI_GEN_M BIT(16) +#define IDPF_RFL_BI_BUFID_M GENMASK(15, 0) #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M @@ -622,6 +622,7 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @cleaned_pkts: Number of packets cleaned for the above said case * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @stash: Tx buffer stash for Flow-based scheduling mode + * @refillq: Pointer to refill queue * @compl_tag_bufid_m: Completion tag buffer id mask * @compl_tag_cur_gen: Used to keep track of current completion tag generation * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset @@ -671,6 +672,7 @@ struct idpf_tx_queue { u16 tx_max_bufs; struct idpf_txq_stash *stash; + struct idpf_sw_queue *refillq; u16 compl_tag_bufid_m; u16 compl_tag_cur_gen; @@ -692,7 +694,7 @@ struct idpf_tx_queue { __cacheline_group_end_aligned(cold); }; libeth_cacheline_set_assert(struct idpf_tx_queue, 64, - 112 + sizeof(struct u64_stats_sync), + 120 + sizeof(struct u64_stats_sync), 24); /** From f2d18e16479cac7a708d77cbfb4220a9114a71fc Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:19 -0700 Subject: [PATCH 0688/1307] idpf: improve when to set RE bit logic Track the gap between next_to_use and the last RE index. Set RE again if the gap is large enough to ensure RE bit is set frequently. This is critical before removing the stashing mechanisms because the opportunistic descriptor ring cleaning from the out-of-order completions will go away. Previously the descriptors would be "cleaned" by both the descriptor (RE) completion and the out-of-order completions. Without the latter, we must ensure the RE bit is set more frequently. Otherwise, it's theoretically possible for the descriptor ring next_to_clean to never advance. The previous implementation was dependent on the start of a packet falling on a 64th index in the descriptor ring, which is not guaranteed with large packets. Signed-off-by: Luigi Rizzo Signed-off-by: Brian Vazquez Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 20 +++++++++++++++++++- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 6 ++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 9b63944235fb..ee59153508af 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -294,6 +294,8 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, */ idpf_queue_change(GEN_CHK, refillq); + tx_q->last_re = tx_q->desc_count - IDPF_TX_SPLITQ_RE_MIN_GAP; + return 0; err_alloc: @@ -2912,6 +2914,21 @@ static void idpf_tx_set_tstamp_desc(union idpf_flex_tx_ctx_desc *ctx_desc, { } #endif /* CONFIG_PTP_1588_CLOCK */ +/** + * idpf_tx_splitq_need_re - check whether RE bit needs to be set + * @tx_q: pointer to Tx queue + * + * Return: true if RE bit needs to be set, false otherwise + */ +static bool idpf_tx_splitq_need_re(struct idpf_tx_queue *tx_q) +{ + int gap = tx_q->next_to_use - tx_q->last_re; + + gap += (gap < 0) ? tx_q->desc_count : 0; + + return gap >= IDPF_TX_SPLITQ_RE_MIN_GAP; +} + /** * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors * @skb: send buffer @@ -2998,9 +3015,10 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, * MIN_RING size to ensure it will be set at least once each * time around the ring. */ - if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) { + if (idpf_tx_splitq_need_re(tx_q)) { tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; tx_q->txq_grp->num_completions_pending++; + tx_q->last_re = tx_q->next_to_use; } if (skb->ip_summed == CHECKSUM_PARTIAL) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 58232a1bd0a9..c75ca5d3e57c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -610,6 +610,8 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @netdev: &net_device corresponding to this queue * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean + * @last_re: last descriptor index that RE bit was set + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on * the TX completion queue, it can be for any TXQ associated * with that completion queue. This means we can clean up to @@ -620,7 +622,6 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * only once at the end of the cleaning routine. * @clean_budget: singleq only, queue cleaning budget * @cleaned_pkts: Number of packets cleaned for the above said case - * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @stash: Tx buffer stash for Flow-based scheduling mode * @refillq: Pointer to refill queue * @compl_tag_bufid_m: Completion tag buffer id mask @@ -663,6 +664,8 @@ struct idpf_tx_queue { __cacheline_group_begin_aligned(read_write); u16 next_to_use; u16 next_to_clean; + u16 last_re; + u16 tx_max_bufs; union { u32 cleaned_bytes; @@ -670,7 +673,6 @@ struct idpf_tx_queue { }; u16 cleaned_pkts; - u16 tx_max_bufs; struct idpf_txq_stash *stash; struct idpf_sw_queue *refillq; From b61dfa9bc4430ad82b96d3a7c1c485350f91b467 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:20 -0700 Subject: [PATCH 0689/1307] idpf: simplify and fix splitq Tx packet rollback error path Move (and rename) the existing rollback logic to singleq.c since that will be the only consumer. Create a simplified splitq specific rollback function to loop through and unmap tx_bufs based on the completion tag. This is critical before replacing the Tx buffer ring with the buffer pool since the previous rollback indexing will not work to unmap the chained buffers from the pool. Cache the next_to_use index before any portion of the packet is put on the descriptor ring. In case of an error, the rollback will bump tail to the correct next_to_use value. Because the splitq path now supports different types of context descriptors (and potentially multiple in the future), this will take care of rolling back any and all context descriptors encoded on the ring for the erroneous packet. The previous rollback logic was broken for PTP packets since it would not account for the PTP context descriptor. Fixes: 1a49cf814fe1 ("idpf: add Tx timestamp flows") Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 57 +++++++++++- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 91 ++++++++----------- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 5 +- 3 files changed, 95 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 555879b1248d..57c0f5ab8f9e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -179,6 +179,58 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb, return 1; } +/** + * idpf_tx_singleq_dma_map_error - handle TX DMA map errors + * @txq: queue to send buffer on + * @skb: send buffer + * @first: original first buffer info buffer for packet + * @idx: starting point on ring to unwind + */ +static void idpf_tx_singleq_dma_map_error(struct idpf_tx_queue *txq, + struct sk_buff *skb, + struct idpf_tx_buf *first, u16 idx) +{ + struct libeth_sq_napi_stats ss = { }; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + /* clear dma mappings for failed tx_buf map */ + for (;;) { + struct idpf_tx_buf *tx_buf; + + tx_buf = &txq->tx_buf[idx]; + libeth_tx_complete(tx_buf, &cp); + if (tx_buf == first) + break; + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + if (skb_is_gso(skb)) { + union idpf_tx_flex_desc *tx_desc; + + /* If we failed a DMA mapping for a TSO packet, we will have + * used one additional descriptor for a context + * descriptor. Reset that here. + */ + tx_desc = &txq->flex_tx[idx]; + memset(tx_desc, 0, sizeof(*tx_desc)); + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + /* Update tail in case netdev_xmit_more was previously true */ + idpf_tx_buf_hw_update(txq, idx, false); +} + /** * idpf_tx_singleq_map - Build the Tx base descriptor * @tx_q: queue to send buffer on @@ -219,8 +271,9 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (dma_mapping_error(tx_q->dev, dma)) - return idpf_tx_dma_map_error(tx_q, skb, first, i); + if (unlikely(dma_mapping_error(tx_q->dev, dma))) + return idpf_tx_singleq_dma_map_error(tx_q, skb, + first, i); /* record length, and DMA address */ dma_unmap_len_set(tx_buf, len, size); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index ee59153508af..527d56bcbbef 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2339,57 +2339,6 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, return count; } -/** - * idpf_tx_dma_map_error - handle TX DMA map errors - * @txq: queue to send buffer on - * @skb: send buffer - * @first: original first buffer info buffer for packet - * @idx: starting point on ring to unwind - */ -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, - struct idpf_tx_buf *first, u16 idx) -{ - struct libeth_sq_napi_stats ss = { }; - struct libeth_cq_pp cp = { - .dev = txq->dev, - .ss = &ss, - }; - - u64_stats_update_begin(&txq->stats_sync); - u64_stats_inc(&txq->q_stats.dma_map_errs); - u64_stats_update_end(&txq->stats_sync); - - /* clear dma mappings for failed tx_buf map */ - for (;;) { - struct idpf_tx_buf *tx_buf; - - tx_buf = &txq->tx_buf[idx]; - libeth_tx_complete(tx_buf, &cp); - if (tx_buf == first) - break; - if (idx == 0) - idx = txq->desc_count; - idx--; - } - - if (skb_is_gso(skb)) { - union idpf_tx_flex_desc *tx_desc; - - /* If we failed a DMA mapping for a TSO packet, we will have - * used one additional descriptor for a context - * descriptor. Reset that here. - */ - tx_desc = &txq->flex_tx[idx]; - memset(tx_desc, 0, sizeof(*tx_desc)); - if (idx == 0) - idx = txq->desc_count; - idx--; - } - - /* Update tail in case netdev_xmit_more was previously true */ - idpf_tx_buf_hw_update(txq, idx, false); -} - /** * idpf_tx_splitq_bump_ntu - adjust NTU and generation * @txq: the tx ring to wrap @@ -2438,6 +2387,37 @@ static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, return true; } +/** + * idpf_tx_splitq_pkt_err_unmap - Unmap buffers and bump tail in case of error + * @txq: Tx queue to unwind + * @params: pointer to splitq params struct + * @first: starting buffer for packet to unmap + */ +static void idpf_tx_splitq_pkt_err_unmap(struct idpf_tx_queue *txq, + struct idpf_tx_splitq_params *params, + struct idpf_tx_buf *first) +{ + struct libeth_sq_napi_stats ss = { }; + struct idpf_tx_buf *tx_buf = first; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; + u32 idx = 0; + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + do { + libeth_tx_complete(tx_buf, &cp); + idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + } while (idpf_tx_buf_compl_tag(tx_buf) == params->compl_tag); + + /* Update tail in case netdev_xmit_more was previously true. */ + idpf_tx_buf_hw_update(txq, params->prev_ntu, false); +} + /** * idpf_tx_splitq_map - Build the Tx flex descriptor * @tx_q: queue to send buffer on @@ -2482,8 +2462,9 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (dma_mapping_error(tx_q->dev, dma)) - return idpf_tx_dma_map_error(tx_q, skb, first, i); + if (unlikely(dma_mapping_error(tx_q->dev, dma))) + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, + first); first->nr_frags++; idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; @@ -2939,7 +2920,9 @@ static bool idpf_tx_splitq_need_re(struct idpf_tx_queue *tx_q) static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q) { - struct idpf_tx_splitq_params tx_params = { }; + struct idpf_tx_splitq_params tx_params = { + .prev_ntu = tx_q->next_to_use, + }; union idpf_flex_tx_ctx_desc *ctx_desc; struct idpf_tx_buf *first; unsigned int count; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index c75ca5d3e57c..a7632d845a2a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -196,6 +196,7 @@ struct idpf_tx_offload_params { * @compl_tag: Associated tag for completion * @td_tag: Descriptor tunneling tag * @offload: Offload parameters + * @prev_ntu: stored TxQ next_to_use in case of rollback */ struct idpf_tx_splitq_params { enum idpf_tx_desc_dtype_value dtype; @@ -206,6 +207,8 @@ struct idpf_tx_splitq_params { }; struct idpf_tx_offload_params offload; + + u16 prev_ntu; }; enum idpf_tx_ctx_desc_eipt_offload { @@ -1042,8 +1045,6 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, bool xmit_more); unsigned int idpf_size_to_txd_count(unsigned int size); netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, - struct idpf_tx_buf *first, u16 ring_idx); unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, struct sk_buff *skb); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); From 5f417d551324d2894168b362f2429d120ab06243 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:21 -0700 Subject: [PATCH 0690/1307] idpf: replace flow scheduling buffer ring with buffer pool Replace the TxQ buffer ring with one large pool/array of buffers (only for flow scheduling). This eliminates the tag generation and makes it impossible for a tag to be associated with more than one packet. The completion tag passed to HW through the descriptor is the index into the array. That same completion tag is posted back to the driver in the completion descriptor, and used to index into the array to quickly retrieve the buffer during cleaning. In this way, the tags are treated as a fix sized resource. If all tags are in use, no more packets can be sent on that particular queue (until some are freed up). The tag pool size is 64K since the completion tag width is 16 bits. For each packet, the driver pulls a free tag from the refillq to get the next free buffer index. When cleaning is complete, the tag is posted back to the refillq. A multi-frag packet spans multiple buffers in the driver, therefore it uses multiple buffer indexes/tags from the pool. Each frag pulls from the refillq to get the next free buffer index. These are tracked in a next_buf field that replaces the completion tag field in the buffer struct. This chains the buffers together so that the packet can be cleaned from the starting completion tag taken from the completion descriptor, then from the next_buf field for each subsequent buffer. In case of a dma_mapping_error occurs or the refillq runs out of free buf_ids, the packet will execute the rollback error path. This unmaps any buffers previously mapped for the packet. Since several free buf_ids could have already been pulled from the refillq, we need to restore its original state as well. Otherwise, the buf_ids/tags will be leaked and not used again until the queue is reallocated. Descriptor completions only advance the descriptor ring index to "clean" the descriptors. The packet completions only clean the buffers associated with the given packet completion tag and do not update the descriptor ring index. When operating in queue based scheduling mode, the array still acts as a ring and will only have TxQ descriptor count entries. The tx_bufs are still associated 1:1 with the descriptor ring entries and we can use the conventional indexing mechanisms. Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Signed-off-by: Luigi Rizzo Signed-off-by: Brian Vazquez Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 206 +++++++++----------- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 10 +- 2 files changed, 104 insertions(+), 112 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 527d56bcbbef..5fe329a7c944 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -13,6 +13,7 @@ struct idpf_tx_stash { struct libeth_sqe buf; }; +#define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv) #define idpf_tx_buf_compl_tag(buf) (*(u32 *)&(buf)->priv) LIBETH_SQE_CHECK_PRIV(u32); @@ -91,7 +92,7 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) return; /* Free all the Tx buffer sk_buffs */ - for (i = 0; i < txq->desc_count; i++) + for (i = 0; i < txq->buf_pool_size; i++) libeth_tx_complete(&txq->tx_buf[i], &cp); kfree(txq->tx_buf); @@ -199,14 +200,17 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) { struct idpf_buf_lifo *buf_stack; - int buf_size; int i; /* Allocate book keeping buffers only. Buffers to be supplied to HW * are allocated by kernel network stack and received as part of skb */ - buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count; - tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL); + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) + tx_q->buf_pool_size = U16_MAX; + else + tx_q->buf_pool_size = tx_q->desc_count; + tx_q->tx_buf = kcalloc(tx_q->buf_pool_size, sizeof(*tx_q->tx_buf), + GFP_KERNEL); if (!tx_q->tx_buf) return -ENOMEM; @@ -275,7 +279,7 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, return 0; refillq = tx_q->refillq; - refillq->desc_count = tx_q->desc_count; + refillq->desc_count = tx_q->buf_pool_size; refillq->ring = kcalloc(refillq->desc_count, sizeof(u32), GFP_KERNEL); if (!refillq->ring) { @@ -1869,6 +1873,12 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, struct idpf_tx_buf *tx_buf; bool clean_complete = true; + if (descs_only) { + /* Bump ring index to mark as cleaned. */ + tx_q->next_to_clean = end; + return true; + } + tx_desc = &tx_q->flex_tx[ntc]; next_pending_desc = &tx_q->flex_tx[end]; tx_buf = &tx_q->tx_buf[ntc]; @@ -1935,87 +1945,43 @@ do { \ } while (0) /** - * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers + * idpf_tx_clean_bufs - clean flow scheduling TX queue buffers * @txq: queue to clean - * @compl_tag: completion tag of packet to clean (from completion descriptor) + * @buf_id: packet's starting buffer ID, from completion descriptor * @cleaned: pointer to stats struct to track cleaned packets/bytes * @budget: Used to determine if we are in netpoll * - * Cleans all buffers associated with the input completion tag either from the - * TX buffer ring or from the hash table if the buffers were previously - * stashed. Returns the byte/segment count for the cleaned packet associated - * this completion tag. + * Clean all buffers associated with the packet starting at buf_id. Returns the + * byte/segment count for the cleaned packet. */ -static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag, - struct libeth_sq_napi_stats *cleaned, - int budget) +static bool idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, + struct libeth_sq_napi_stats *cleaned, + int budget) { - u16 idx = compl_tag & txq->compl_tag_bufid_m; struct idpf_tx_buf *tx_buf = NULL; struct libeth_cq_pp cp = { .dev = txq->dev, .ss = cleaned, .napi = budget, }; - u16 ntc, orig_idx = idx; - - tx_buf = &txq->tx_buf[idx]; - - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX || - idpf_tx_buf_compl_tag(tx_buf) != compl_tag)) - return false; + tx_buf = &txq->tx_buf[buf_id]; if (tx_buf->type == LIBETH_SQE_SKB) { if (skb_shinfo(tx_buf->skb)->tx_flags & SKBTX_IN_PROGRESS) idpf_tx_read_tstamp(txq, tx_buf->skb); libeth_tx_complete(tx_buf, &cp); + idpf_post_buf_refill(txq->refillq, buf_id); } - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { + buf_id = idpf_tx_buf_next(tx_buf); - while (idpf_tx_buf_compl_tag(tx_buf) == compl_tag) { + tx_buf = &txq->tx_buf[buf_id]; libeth_tx_complete(tx_buf, &cp); - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + idpf_post_buf_refill(txq->refillq, buf_id); } - /* - * It's possible the packet we just cleaned was an out of order - * completion, which means we can stash the buffers starting from - * the original next_to_clean and reuse the descriptors. We need - * to compare the descriptor ring next_to_clean packet's "first" buffer - * to the "first" buffer of the packet we just cleaned to determine if - * this is the case. Howevever, next_to_clean can point to either a - * reserved buffer that corresponds to a context descriptor used for the - * next_to_clean packet (TSO packet) or the "first" buffer (single - * packet). The orig_idx from the packet we just cleaned will always - * point to the "first" buffer. If next_to_clean points to a reserved - * buffer, let's bump ntc once and start the comparison from there. - */ - ntc = txq->next_to_clean; - tx_buf = &txq->tx_buf[ntc]; - - if (tx_buf->type == LIBETH_SQE_CTX) - idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf); - - /* - * If ntc still points to a different "first" buffer, clean the - * descriptor ring and stash all of the buffers for later cleaning. If - * we cannot stash all of the buffers, next_to_clean will point to the - * "first" buffer of the packet that could not be stashed and cleaning - * will start there next time. - */ - if (unlikely(tx_buf != &txq->tx_buf[orig_idx] && - !idpf_tx_splitq_clean(txq, orig_idx, budget, cleaned, - true))) - return true; - - /* - * Otherwise, update next_to_clean to reflect the cleaning that was - * done above. - */ - txq->next_to_clean = idx; - return true; } @@ -2046,12 +2012,10 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); - idpf_post_buf_refill(txq->refillq, compl_tag); - /* If we didn't clean anything on the ring, this packet must be * in the hash table. Go clean it there. */ - if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget)) + if (!idpf_tx_clean_bufs(txq, compl_tag, cleaned, budget)) idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); } @@ -2364,7 +2328,7 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) * Return: true if a buffer ID was found, false if not */ static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, - u16 *buf_id) + u32 *buf_id) { u32 ntc = refillq->next_to_clean; u32 refill_desc; @@ -2397,25 +2361,34 @@ static void idpf_tx_splitq_pkt_err_unmap(struct idpf_tx_queue *txq, struct idpf_tx_splitq_params *params, struct idpf_tx_buf *first) { + struct idpf_sw_queue *refillq = txq->refillq; struct libeth_sq_napi_stats ss = { }; struct idpf_tx_buf *tx_buf = first; struct libeth_cq_pp cp = { .dev = txq->dev, .ss = &ss, }; - u32 idx = 0; u64_stats_update_begin(&txq->stats_sync); u64_stats_inc(&txq->q_stats.dma_map_errs); u64_stats_update_end(&txq->stats_sync); - do { + libeth_tx_complete(tx_buf, &cp); + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { + tx_buf = &txq->tx_buf[idpf_tx_buf_next(tx_buf)]; libeth_tx_complete(tx_buf, &cp); - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); - } while (idpf_tx_buf_compl_tag(tx_buf) == params->compl_tag); + } /* Update tail in case netdev_xmit_more was previously true. */ idpf_tx_buf_hw_update(txq, params->prev_ntu, false); + + if (!refillq) + return; + + /* Restore refillq state to avoid leaking tags. */ + if (params->prev_refill_gen != idpf_queue_has(RFL_GEN_CHK, refillq)) + idpf_queue_change(RFL_GEN_CHK, refillq); + refillq->next_to_clean = params->prev_refill_ntc; } /** @@ -2439,6 +2412,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, struct netdev_queue *nq; struct sk_buff *skb; skb_frag_t *frag; + u32 next_buf_id; u16 td_cmd = 0; dma_addr_t dma; @@ -2456,18 +2430,16 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, tx_buf = first; first->nr_frags = 0; - params->compl_tag = - (tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i; - for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (unlikely(dma_mapping_error(tx_q->dev, dma))) + if (unlikely(dma_mapping_error(tx_q->dev, dma))) { + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; return idpf_tx_splitq_pkt_err_unmap(tx_q, params, first); + } first->nr_frags++; - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; tx_buf->type = LIBETH_SQE_FRAG; /* record length, and DMA address */ @@ -2523,29 +2495,14 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, max_data); if (unlikely(++i == tx_q->desc_count)) { - tx_buf = tx_q->tx_buf; tx_desc = &tx_q->flex_tx[0]; i = 0; tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { - tx_buf++; tx_desc++; } - /* Since this packet has a buffer that is going to span - * multiple descriptors, it's going to leave holes in - * to the TX buffer ring. To ensure these holes do not - * cause issues in the cleaning routines, we will clear - * them of any stale data and assign them the same - * completion tag as the current packet. Then when the - * packet is being cleaned, the cleaning routines will - * simply pass over these holes and finish cleaning the - * rest of the packet. - */ - tx_buf->type = LIBETH_SQE_EMPTY; - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; - /* Adjust the DMA offset and the remaining size of the * fragment. On the first iteration of this loop, * max_data will be >= 12K and <= 16K-1. On any @@ -2570,15 +2527,26 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); if (unlikely(++i == tx_q->desc_count)) { - tx_buf = tx_q->tx_buf; tx_desc = &tx_q->flex_tx[0]; i = 0; tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { - tx_buf++; tx_desc++; } + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, + &next_buf_id))) { + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, + first); + } + } else { + next_buf_id = i; + } + idpf_tx_buf_next(tx_buf) = next_buf_id; + tx_buf = &tx_q->tx_buf[next_buf_id]; + size = skb_frag_size(frag); data_len -= size; @@ -2593,6 +2561,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, /* write last descriptor with RS and EOP bits */ first->rs_idx = i; + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; td_cmd |= params->eop_cmd; idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); i = idpf_tx_splitq_bump_ntu(tx_q, i); @@ -2801,8 +2770,6 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_tx_queue *txq) union idpf_flex_tx_ctx_desc *desc; int i = txq->next_to_use; - txq->tx_buf[i].type = LIBETH_SQE_CTX; - /* grab the next descriptor */ desc = &txq->flex_ctx[i]; txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); @@ -2927,6 +2894,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, struct idpf_tx_buf *first; unsigned int count; int tso, idx; + u32 buf_id; count = idpf_tx_desc_count_required(tx_q, skb); if (unlikely(!count)) @@ -2970,26 +2938,28 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, idpf_tx_set_tstamp_desc(ctx_desc, idx); } - /* record the location of the first descriptor for this packet */ - first = &tx_q->tx_buf[tx_q->next_to_use]; - first->skb = skb; - - if (tso) { - first->packets = tx_params.offload.tso_segs; - first->bytes = skb->len + - ((first->packets - 1) * tx_params.offload.tso_hdr_len); - } else { - first->packets = 1; - first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - } - if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + struct idpf_sw_queue *refillq = tx_q->refillq; + + /* Save refillq state in case of a packet rollback. Otherwise, + * the tags will be leaked since they will be popped from the + * refillq but never reposted during cleaning. + */ + tx_params.prev_refill_gen = + idpf_queue_has(RFL_GEN_CHK, refillq); + tx_params.prev_refill_ntc = refillq->next_to_clean; + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, - &tx_params.compl_tag))) { - u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.q_busy); - u64_stats_update_end(&tx_q->stats_sync); + &buf_id))) { + if (tx_params.prev_refill_gen != + idpf_queue_has(RFL_GEN_CHK, refillq)) + idpf_queue_change(RFL_GEN_CHK, refillq); + refillq->next_to_clean = tx_params.prev_refill_ntc; + + tx_q->next_to_use = tx_params.prev_ntu; + return idpf_tx_drop_skb(tx_q, skb); } + tx_params.compl_tag = buf_id; tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; @@ -3008,6 +2978,8 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; } else { + buf_id = tx_q->next_to_use; + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD; @@ -3015,6 +2987,18 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; } + first = &tx_q->tx_buf[buf_id]; + first->skb = skb; + + if (tso) { + first->packets = tx_params.offload.tso_segs; + first->bytes = skb->len + + ((first->packets - 1) * tx_params.offload.tso_hdr_len); + } else { + first->packets = 1; + first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + } + idpf_tx_splitq_map(tx_q, &tx_params, first); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index a7632d845a2a..d86246c320c8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -137,6 +137,8 @@ do { \ ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ 0 : (txq)->compl_tag_cur_gen) +#define IDPF_TXBUF_NULL U32_MAX + #define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) #define IDPF_TX_FLAGS_TSO BIT(0) @@ -197,6 +199,8 @@ struct idpf_tx_offload_params { * @td_tag: Descriptor tunneling tag * @offload: Offload parameters * @prev_ntu: stored TxQ next_to_use in case of rollback + * @prev_refill_ntc: stored refillq next_to_clean in case of packet rollback + * @prev_refill_gen: stored refillq generation bit in case of packet rollback */ struct idpf_tx_splitq_params { enum idpf_tx_desc_dtype_value dtype; @@ -209,6 +213,8 @@ struct idpf_tx_splitq_params { struct idpf_tx_offload_params offload; u16 prev_ntu; + u16 prev_refill_ntc; + bool prev_refill_gen; }; enum idpf_tx_ctx_desc_eipt_offload { @@ -638,6 +644,7 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @size: Length of descriptor ring in bytes * @dma: Physical address of ring * @q_vector: Backreference to associated vector + * @buf_pool_size: Total number of idpf_tx_buf */ struct idpf_tx_queue { __cacheline_group_begin_aligned(read_mostly); @@ -696,11 +703,12 @@ struct idpf_tx_queue { dma_addr_t dma; struct idpf_q_vector *q_vector; + u32 buf_pool_size; __cacheline_group_end_aligned(cold); }; libeth_cacheline_set_assert(struct idpf_tx_queue, 64, 120 + sizeof(struct u64_stats_sync), - 24); + 32); /** * struct idpf_buf_queue - software structure representing a buffer queue From 0c3f135e840d4a2ba4253e15d530ec61bc30718e Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:22 -0700 Subject: [PATCH 0691/1307] idpf: stop Tx if there are insufficient buffer resources The Tx refillq logic will cause packets to be silently dropped if there are not enough buffer resources available to send a packet in flow scheduling mode. Instead, determine how many buffers are needed along with number of descriptors. Make sure there are enough of both resources to send the packet, and stop the queue if not. Fixes: 7292af042bcf ("idpf: fix a race in txq wakeup") Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 4 +- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 47 +++++++++++++------ drivers/net/ethernet/intel/idpf/idpf_txrx.h | 15 +++++- 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 57c0f5ab8f9e..b19b462e0bb6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -415,11 +415,11 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, { struct idpf_tx_offload_params offload = { }; struct idpf_tx_buf *first; + u32 count, buf_count = 1; int csum, tso, needed; - unsigned int count; __be16 protocol; - count = idpf_tx_desc_count_required(tx_q, skb); + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 5fe329a7c944..fa5432a0566a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2191,15 +2191,22 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); } -/* Global conditions to tell whether the txq (and related resources) - * has room to allow the use of "size" descriptors. +/** + * idpf_tx_splitq_has_room - check if enough Tx splitq resources are available + * @tx_q: the queue to be checked + * @descs_needed: number of descriptors required for this packet + * @bufs_needed: number of Tx buffers required for this packet + * + * Return: 0 if no room available, 1 otherwise */ -static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) +static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 descs_needed, + u32 bufs_needed) { - if (IDPF_DESC_UNUSED(tx_q) < size || + if (IDPF_DESC_UNUSED(tx_q) < descs_needed || IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || - IDPF_TX_BUF_RSV_LOW(tx_q)) + IDPF_TX_BUF_RSV_LOW(tx_q) || + idpf_tx_splitq_get_free_bufs(tx_q->refillq) < bufs_needed) return 0; return 1; } @@ -2208,14 +2215,21 @@ static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions * @tx_q: the queue to be checked * @descs_needed: number of descriptors required for this packet + * @bufs_needed: number of buffers needed for this packet * - * Returns 0 if stop is not needed + * Return: 0 if stop is not needed */ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, - unsigned int descs_needed) + u32 descs_needed, + u32 bufs_needed) { + /* Since we have multiple resources to check for splitq, our + * start,stop_thrs becomes a boolean check instead of a count + * threshold. + */ if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, - idpf_txq_has_room(tx_q, descs_needed), + idpf_txq_has_room(tx_q, descs_needed, + bufs_needed), 1, 1)) return 0; @@ -2257,14 +2271,16 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, } /** - * idpf_tx_desc_count_required - calculate number of Tx descriptors needed + * idpf_tx_res_count_required - get number of Tx resources needed for this pkt * @txq: queue to send buffer on * @skb: send buffer + * @bufs_needed: (output) number of buffers needed for this skb. * - * Returns number of data descriptors needed for this skb. + * Return: number of data descriptors and buffers needed for this skb. */ -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, - struct sk_buff *skb) +unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb, + u32 *bufs_needed) { const struct skb_shared_info *shinfo; unsigned int count = 0, i; @@ -2275,6 +2291,7 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, return count; shinfo = skb_shinfo(skb); + *bufs_needed += shinfo->nr_frags; for (i = 0; i < shinfo->nr_frags; i++) { unsigned int size; @@ -2892,11 +2909,11 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, }; union idpf_flex_tx_ctx_desc *ctx_desc; struct idpf_tx_buf *first; - unsigned int count; + u32 count, buf_count = 1; int tso, idx; u32 buf_id; - count = idpf_tx_desc_count_required(tx_q, skb); + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); @@ -2906,7 +2923,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, /* Check for splitq specific TX resources */ count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso); - if (idpf_tx_maybe_stop_splitq(tx_q, count)) { + if (idpf_tx_maybe_stop_splitq(tx_q, count, buf_count)) { idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); return NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index d86246c320c8..9565e4dc3514 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1026,6 +1026,17 @@ static inline void idpf_vport_intr_set_wb_on_itr(struct idpf_q_vector *q_vector) reg->dyn_ctl); } +/** + * idpf_tx_splitq_get_free_bufs - get number of free buf_ids in refillq + * @refillq: pointer to refillq containing buf_ids + */ +static inline u32 idpf_tx_splitq_get_free_bufs(struct idpf_sw_queue *refillq) +{ + return (refillq->next_to_use > refillq->next_to_clean ? + 0 : refillq->desc_count) + + refillq->next_to_use - refillq->next_to_clean - 1; +} + int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget); void idpf_vport_init_num_qs(struct idpf_vport *vport, struct virtchnl2_create_vport *vport_msg); @@ -1053,8 +1064,8 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, bool xmit_more); unsigned int idpf_size_to_txd_count(unsigned int size); netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, - struct sk_buff *skb); +unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb, u32 *buf_count); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q); From 6c4e68480238274f84aa50d54da0d9e262df6284 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Fri, 25 Jul 2025 11:42:23 -0700 Subject: [PATCH 0692/1307] idpf: remove obsolete stashing code With the new Tx buffer management scheme, there is no need for all of the stashing mechanisms, the hash table, the reserve buffer stack, etc. Remove all of that. Signed-off-by: Joshua Hay Reviewed-by: Madhu Chittim Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 314 ++------------------ drivers/net/ethernet/intel/idpf/idpf_txrx.h | 47 +-- 2 files changed, 22 insertions(+), 339 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index fa5432a0566a..eaad52a83b04 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -8,48 +8,12 @@ #include "idpf_ptp.h" #include "idpf_virtchnl.h" -struct idpf_tx_stash { - struct hlist_node hlist; - struct libeth_sqe buf; -}; - #define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv) -#define idpf_tx_buf_compl_tag(buf) (*(u32 *)&(buf)->priv) LIBETH_SQE_CHECK_PRIV(u32); static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, unsigned int count); -/** - * idpf_buf_lifo_push - push a buffer pointer onto stack - * @stack: pointer to stack struct - * @buf: pointer to buf to push - * - * Returns 0 on success, negative on failure - **/ -static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack, - struct idpf_tx_stash *buf) -{ - if (unlikely(stack->top == stack->size)) - return -ENOSPC; - - stack->bufs[stack->top++] = buf; - - return 0; -} - -/** - * idpf_buf_lifo_pop - pop a buffer pointer from stack - * @stack: pointer to stack struct - **/ -static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack) -{ - if (unlikely(!stack->top)) - return NULL; - - return stack->bufs[--stack->top]; -} - /** * idpf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure @@ -78,14 +42,11 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) { struct libeth_sq_napi_stats ss = { }; - struct idpf_buf_lifo *buf_stack; - struct idpf_tx_stash *stash; struct libeth_cq_pp cp = { .dev = txq->dev, .ss = &ss, }; - struct hlist_node *tmp; - u32 i, tag; + u32 i; /* Buffers already cleared, nothing to do */ if (!txq->tx_buf) @@ -97,33 +58,6 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) kfree(txq->tx_buf); txq->tx_buf = NULL; - - if (!idpf_queue_has(FLOW_SCH_EN, txq)) - return; - - buf_stack = &txq->stash->buf_stack; - if (!buf_stack->bufs) - return; - - /* - * If a Tx timeout occurred, there are potentially still bufs in the - * hash table, free them here. - */ - hash_for_each_safe(txq->stash->sched_buf_hash, tag, tmp, stash, - hlist) { - if (!stash) - continue; - - libeth_tx_complete(&stash->buf, &cp); - hash_del(&stash->hlist); - idpf_buf_lifo_push(buf_stack, stash); - } - - for (i = 0; i < buf_stack->size; i++) - kfree(buf_stack->bufs[i]); - - kfree(buf_stack->bufs); - buf_stack->bufs = NULL; } /** @@ -199,9 +133,6 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) */ static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) { - struct idpf_buf_lifo *buf_stack; - int i; - /* Allocate book keeping buffers only. Buffers to be supplied to HW * are allocated by kernel network stack and received as part of skb */ @@ -214,29 +145,6 @@ static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) if (!tx_q->tx_buf) return -ENOMEM; - if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) - return 0; - - buf_stack = &tx_q->stash->buf_stack; - - /* Initialize tx buf stack for out-of-order completions if - * flow scheduling offload is enabled - */ - buf_stack->bufs = kcalloc(tx_q->desc_count, sizeof(*buf_stack->bufs), - GFP_KERNEL); - if (!buf_stack->bufs) - return -ENOMEM; - - buf_stack->size = tx_q->desc_count; - buf_stack->top = tx_q->desc_count; - - for (i = 0; i < tx_q->desc_count; i++) { - buf_stack->bufs[i] = kzalloc(sizeof(*buf_stack->bufs[i]), - GFP_KERNEL); - if (!buf_stack->bufs[i]) - return -ENOMEM; - } - return 0; } @@ -350,8 +258,6 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) for (i = 0; i < vport->num_txq_grp; i++) { for (j = 0; j < vport->txq_grps[i].num_txq; j++) { struct idpf_tx_queue *txq = vport->txq_grps[i].txqs[j]; - u8 gen_bits = 0; - u16 bufidx_mask; err = idpf_tx_desc_alloc(vport, txq); if (err) { @@ -360,34 +266,6 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) i); goto err_out; } - - if (!idpf_is_queue_model_split(vport->txq_model)) - continue; - - txq->compl_tag_cur_gen = 0; - - /* Determine the number of bits in the bufid - * mask and add one to get the start of the - * generation bits - */ - bufidx_mask = txq->desc_count - 1; - while (bufidx_mask >> 1) { - txq->compl_tag_gen_s++; - bufidx_mask = bufidx_mask >> 1; - } - txq->compl_tag_gen_s++; - - gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH - - txq->compl_tag_gen_s; - txq->compl_tag_gen_max = GETMAXVAL(gen_bits); - - /* Set bufid mask based on location of first - * gen bit; it cannot simply be the descriptor - * ring size-1 since we can have size values - * where not all of those bits are set. - */ - txq->compl_tag_bufid_m = - GETMAXVAL(txq->compl_tag_gen_s); } if (!idpf_is_queue_model_split(vport->txq_model)) @@ -1042,9 +920,6 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) kfree(txq_grp->complq); txq_grp->complq = NULL; - - if (flow_sch_en) - kfree(txq_grp->stashes); } kfree(vport->txq_grps); vport->txq_grps = NULL; @@ -1405,7 +1280,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) for (i = 0; i < vport->num_txq_grp; i++) { struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; struct idpf_adapter *adapter = vport->adapter; - struct idpf_txq_stash *stashes; int j; tx_qgrp->vport = vport; @@ -1418,15 +1292,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) goto err_alloc; } - if (split && flow_sch_en) { - stashes = kcalloc(num_txq, sizeof(*stashes), - GFP_KERNEL); - if (!stashes) - goto err_alloc; - - tx_qgrp->stashes = stashes; - } - for (j = 0; j < tx_qgrp->num_txq; j++) { struct idpf_tx_queue *q = tx_qgrp->txqs[j]; @@ -1446,11 +1311,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) if (!flow_sch_en) continue; - if (split) { - q->stash = &stashes[j]; - hash_init(q->stash->sched_buf_hash); - } - idpf_queue_set(FLOW_SCH_EN, q); q->refillq = kzalloc(sizeof(*q->refillq), GFP_KERNEL); @@ -1742,87 +1602,6 @@ static void idpf_tx_read_tstamp(struct idpf_tx_queue *txq, struct sk_buff *skb) spin_unlock_bh(&tx_tstamp_caps->status_lock); } -/** - * idpf_tx_clean_stashed_bufs - clean bufs that were stored for - * out of order completions - * @txq: queue to clean - * @compl_tag: completion tag of packet to clean (from completion descriptor) - * @cleaned: pointer to stats struct to track cleaned packets/bytes - * @budget: Used to determine if we are in netpoll - */ -static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq, - u16 compl_tag, - struct libeth_sq_napi_stats *cleaned, - int budget) -{ - struct idpf_tx_stash *stash; - struct hlist_node *tmp_buf; - struct libeth_cq_pp cp = { - .dev = txq->dev, - .ss = cleaned, - .napi = budget, - }; - - /* Buffer completion */ - hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf, - hlist, compl_tag) { - if (unlikely(idpf_tx_buf_compl_tag(&stash->buf) != compl_tag)) - continue; - - hash_del(&stash->hlist); - - if (stash->buf.type == LIBETH_SQE_SKB && - (skb_shinfo(stash->buf.skb)->tx_flags & SKBTX_IN_PROGRESS)) - idpf_tx_read_tstamp(txq, stash->buf.skb); - - libeth_tx_complete(&stash->buf, &cp); - - /* Push shadow buf back onto stack */ - idpf_buf_lifo_push(&txq->stash->buf_stack, stash); - } -} - -/** - * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a - * later time (only relevant for flow scheduling mode) - * @txq: Tx queue to clean - * @tx_buf: buffer to store - */ -static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq, - struct idpf_tx_buf *tx_buf) -{ - struct idpf_tx_stash *stash; - - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) - return 0; - - stash = idpf_buf_lifo_pop(&txq->stash->buf_stack); - if (unlikely(!stash)) { - net_err_ratelimited("%s: No out-of-order TX buffers left!\n", - netdev_name(txq->netdev)); - - return -ENOMEM; - } - - /* Store buffer params in shadow buffer */ - stash->buf.skb = tx_buf->skb; - stash->buf.bytes = tx_buf->bytes; - stash->buf.packets = tx_buf->packets; - stash->buf.type = tx_buf->type; - stash->buf.nr_frags = tx_buf->nr_frags; - dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma)); - dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len)); - idpf_tx_buf_compl_tag(&stash->buf) = idpf_tx_buf_compl_tag(tx_buf); - - /* Add buffer to buf_hash table to be freed later */ - hash_add(txq->stash->sched_buf_hash, &stash->hlist, - idpf_tx_buf_compl_tag(&stash->buf)); - - tx_buf->type = LIBETH_SQE_EMPTY; - - return 0; -} - #define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \ do { \ if (unlikely(++(ntc) == (txq)->desc_count)) { \ @@ -1850,14 +1629,8 @@ do { \ * Separate packet completion events will be reported on the completion queue, * and the buffers will be cleaned separately. The stats are not updated from * this function when using flow-based scheduling. - * - * Furthermore, in flow scheduling mode, check to make sure there are enough - * reserve buffers to stash the packet. If there are not, return early, which - * will leave next_to_clean pointing to the packet that failed to be stashed. - * - * Return: false in the scenario above, true otherwise. */ -static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, +static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, int napi_budget, struct libeth_sq_napi_stats *cleaned, bool descs_only) @@ -1871,12 +1644,11 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, .napi = napi_budget, }; struct idpf_tx_buf *tx_buf; - bool clean_complete = true; if (descs_only) { /* Bump ring index to mark as cleaned. */ tx_q->next_to_clean = end; - return true; + return; } tx_desc = &tx_q->flex_tx[ntc]; @@ -1897,53 +1669,24 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, break; eop_idx = tx_buf->rs_idx; + libeth_tx_complete(tx_buf, &cp); - if (descs_only) { - if (IDPF_TX_BUF_RSV_UNUSED(tx_q) < tx_buf->nr_frags) { - clean_complete = false; - goto tx_splitq_clean_out; - } + /* unmap remaining buffers */ + while (ntc != eop_idx) { + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, + tx_desc, tx_buf); - idpf_stash_flow_sch_buffers(tx_q, tx_buf); - - while (ntc != eop_idx) { - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, - tx_desc, tx_buf); - idpf_stash_flow_sch_buffers(tx_q, tx_buf); - } - } else { + /* unmap any remaining paged data */ libeth_tx_complete(tx_buf, &cp); - - /* unmap remaining buffers */ - while (ntc != eop_idx) { - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, - tx_desc, tx_buf); - - /* unmap any remaining paged data */ - libeth_tx_complete(tx_buf, &cp); - } } fetch_next_txq_desc: idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); } -tx_splitq_clean_out: tx_q->next_to_clean = ntc; - - return clean_complete; } -#define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \ -do { \ - (buf)++; \ - (ntc)++; \ - if (unlikely((ntc) == (txq)->desc_count)) { \ - buf = (txq)->tx_buf; \ - ntc = 0; \ - } \ -} while (0) - /** * idpf_tx_clean_bufs - clean flow scheduling TX queue buffers * @txq: queue to clean @@ -1954,7 +1697,7 @@ do { \ * Clean all buffers associated with the packet starting at buf_id. Returns the * byte/segment count for the cleaned packet. */ -static bool idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, +static void idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, struct libeth_sq_napi_stats *cleaned, int budget) { @@ -1981,8 +1724,6 @@ static bool idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, libeth_tx_complete(tx_buf, &cp); idpf_post_buf_refill(txq->refillq, buf_id); } - - return true; } /** @@ -2001,22 +1742,17 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, struct libeth_sq_napi_stats *cleaned, int budget) { - u16 compl_tag; + /* RS completion contains queue head for queue based scheduling or + * completion tag for flow based scheduling. + */ + u16 rs_compl_val = le16_to_cpu(desc->q_head_compl_tag.q_head); if (!idpf_queue_has(FLOW_SCH_EN, txq)) { - u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); - - idpf_tx_splitq_clean(txq, head, budget, cleaned, false); + idpf_tx_splitq_clean(txq, rs_compl_val, budget, cleaned, false); return; } - compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); - - /* If we didn't clean anything on the ring, this packet must be - * in the hash table. Go clean it there. - */ - if (!idpf_tx_clean_bufs(txq, compl_tag, cleaned, budget)) - idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); + idpf_tx_clean_bufs(txq, rs_compl_val, cleaned, budget); } /** @@ -2133,8 +1869,7 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, /* Update BQL */ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); - dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || - np->state != __IDPF_VPORT_UP || + dont_wake = !complq_ok || np->state != __IDPF_VPORT_UP || !netif_carrier_ok(tx_q->netdev); /* Check if the TXQ needs to and can be restarted */ __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, @@ -2205,7 +1940,6 @@ static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 descs_needed, if (IDPF_DESC_UNUSED(tx_q) < descs_needed || IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || - IDPF_TX_BUF_RSV_LOW(tx_q) || idpf_tx_splitq_get_free_bufs(tx_q->refillq) < bufs_needed) return 0; return 1; @@ -2329,10 +2063,8 @@ static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) { ntu++; - if (ntu == txq->desc_count) { + if (ntu == txq->desc_count) ntu = 0; - txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq); - } return ntu; } @@ -2514,8 +2246,6 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, if (unlikely(++i == tx_q->desc_count)) { tx_desc = &tx_q->flex_tx[0]; i = 0; - tx_q->compl_tag_cur_gen = - IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { tx_desc++; } @@ -2546,7 +2276,6 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, if (unlikely(++i == tx_q->desc_count)) { tx_desc = &tx_q->flex_tx[0]; i = 0; - tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { tx_desc++; } @@ -2980,10 +2709,9 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; - /* Set the RE bit to catch any packets that may have not been - * stashed during RS completion cleaning. MIN_GAP is set to - * MIN_RING size to ensure it will be set at least once each - * time around the ring. + /* Set the RE bit to periodically "clean" the descriptor ring. + * MIN_GAP is set to MIN_RING size to ensure it will be set at + * least once each time around the ring. */ if (idpf_tx_splitq_need_re(tx_q)) { tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 9565e4dc3514..52753dff381c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -118,10 +118,6 @@ do { \ ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ (txq)->next_to_clean - (txq)->next_to_use - 1) -#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->stash->buf_stack.top) -#define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ - (txq)->desc_count >> 2) - #define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) /* Determine the absolute number of completions pending, i.e. the number of * completions that are expected to arrive on the TX completion queue. @@ -131,12 +127,6 @@ do { \ 0 : U32_MAX) + \ (txq)->num_completions_pending - (txq)->complq->num_completions) -#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16 -/* Adjust the generation for the completion tag and wrap if necessary */ -#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \ - ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ - 0 : (txq)->compl_tag_cur_gen) - #define IDPF_TXBUF_NULL U32_MAX #define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) @@ -154,18 +144,6 @@ union idpf_tx_flex_desc { #define idpf_tx_buf libeth_sqe -/** - * struct idpf_buf_lifo - LIFO for managing OOO completions - * @top: Used to know how many buffers are left - * @size: Total size of LIFO - * @bufs: Backing array - */ -struct idpf_buf_lifo { - u16 top; - u16 size; - struct idpf_tx_stash **bufs; -}; - /** * struct idpf_tx_offload_params - Offload parameters for a given packet * @tx_flags: Feature flags enabled for this packet @@ -476,17 +454,6 @@ struct idpf_tx_queue_stats { #define IDPF_ITR_IDX_SPACING(spacing, dflt) (spacing ? spacing : dflt) #define IDPF_DIM_DEFAULT_PROFILE_IX 1 -/** - * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode - * @buf_stack: Stack of empty buffers to store buffer info for out of order - * buffer completions. See struct idpf_buf_lifo - * @sched_buf_hash: Hash table to store buffers - */ -struct idpf_txq_stash { - struct idpf_buf_lifo buf_stack; - DECLARE_HASHTABLE(sched_buf_hash, 12); -} ____cacheline_aligned; - /** * struct idpf_rx_queue - software structure representing a receive queue * @rx: universal receive descriptor array @@ -631,11 +598,7 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * only once at the end of the cleaning routine. * @clean_budget: singleq only, queue cleaning budget * @cleaned_pkts: Number of packets cleaned for the above said case - * @stash: Tx buffer stash for Flow-based scheduling mode * @refillq: Pointer to refill queue - * @compl_tag_bufid_m: Completion tag buffer id mask - * @compl_tag_cur_gen: Used to keep track of current completion tag generation - * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset * @cached_tstamp_caps: Tx timestamp capabilities negotiated with the CP * @tstamp_task: Work that handles Tx timestamp read * @stats_sync: See struct u64_stats_sync @@ -666,7 +629,6 @@ struct idpf_tx_queue { u16 desc_count; u16 tx_min_pkt_len; - u16 compl_tag_gen_s; struct net_device *netdev; __cacheline_group_end_aligned(read_mostly); @@ -683,13 +645,8 @@ struct idpf_tx_queue { }; u16 cleaned_pkts; - struct idpf_txq_stash *stash; struct idpf_sw_queue *refillq; - u16 compl_tag_bufid_m; - u16 compl_tag_cur_gen; - u16 compl_tag_gen_max; - struct idpf_ptp_vport_tx_tstamp_caps *cached_tstamp_caps; struct work_struct *tstamp_task; @@ -707,7 +664,7 @@ struct idpf_tx_queue { __cacheline_group_end_aligned(cold); }; libeth_cacheline_set_assert(struct idpf_tx_queue, 64, - 120 + sizeof(struct u64_stats_sync), + 104 + sizeof(struct u64_stats_sync), 32); /** @@ -918,7 +875,6 @@ struct idpf_rxq_group { * @vport: Vport back pointer * @num_txq: Number of TX queues associated * @txqs: Array of TX queue pointers - * @stashes: array of OOO stashes for the queues * @complq: Associated completion queue pointer, split queue only * @num_completions_pending: Total number of completions pending for the * completion queue, acculumated for all TX queues @@ -933,7 +889,6 @@ struct idpf_txq_group { u16 num_txq; struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q]; - struct idpf_txq_stash *stashes; struct idpf_compl_queue *complq; From 91a79b792204313153e1bdbbe5acbfc28903b3a5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Aug 2025 14:37:07 +0200 Subject: [PATCH 0693/1307] netfilter: nf_reject: don't leak dst refcount for loopback packets recent patches to add a WARN() when replacing skb dst entry found an old bug: WARNING: include/linux/skbuff.h:1165 skb_dst_check_unset include/linux/skbuff.h:1164 [inline] WARNING: include/linux/skbuff.h:1165 skb_dst_set include/linux/skbuff.h:1210 [inline] WARNING: include/linux/skbuff.h:1165 nf_reject_fill_skb_dst+0x2a4/0x330 net/ipv4/netfilter/nf_reject_ipv4.c:234 [..] Call Trace: nf_send_unreach+0x17b/0x6e0 net/ipv4/netfilter/nf_reject_ipv4.c:325 nft_reject_inet_eval+0x4bc/0x690 net/netfilter/nft_reject_inet.c:27 expr_call_ops_eval net/netfilter/nf_tables_core.c:237 [inline] .. This is because blamed commit forgot about loopback packets. Such packets already have a dst_entry attached, even at PRE_ROUTING stage. Instead of checking hook just check if the skb already has a route attached to it. Fixes: f53b9b0bdc59 ("netfilter: introduce support for reject at prerouting stage") Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20250820123707.10671-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/ipv4/netfilter/nf_reject_ipv4.c | 6 ++---- net/ipv6/netfilter/nf_reject_ipv6.c | 5 ++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 87fd945a0d27..0d3cb2ba6fc8 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -247,8 +247,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, if (!oth) return; - if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && - nf_reject_fill_skb_dst(oldskb) < 0) + if (!skb_dst(oldskb) && nf_reject_fill_skb_dst(oldskb) < 0) return; if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -321,8 +320,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) if (iph->frag_off & htons(IP_OFFSET)) return; - if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && - nf_reject_fill_skb_dst(skb_in) < 0) + if (!skb_dst(skb_in) && nf_reject_fill_skb_dst(skb_in) < 0) return; if (skb_csum_unnecessary(skb_in) || diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 838295fa32e3..cb2d38e80de9 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -293,7 +293,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; - if (hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) { + if (!skb_dst(oldskb)) { nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); if (!dst) return; @@ -397,8 +397,7 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) skb_in->dev = net->loopback_dev; - if ((hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_INGRESS) && - nf_reject6_fill_skb_dst(skb_in) < 0) + if (!skb_dst(skb_in) && nf_reject6_fill_skb_dst(skb_in) < 0) return; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); From aea70964b5a7ca491a3701f2dde6c9d05d51878d Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 20 Aug 2025 14:28:04 -0500 Subject: [PATCH 0694/1307] of: reserved_mem: Add missing IORESOURCE_MEM flag on resources Commit f4fcfdda2fd8 ('of: reserved_mem: Add functions to parse "memory-region"') failed to set IORESOURCE_MEM flag on the resources. The result is functions such as devm_ioremap_resource_wc() will fail. Add the missing flag. Fixes: f4fcfdda2fd8 ('of: reserved_mem: Add functions to parse "memory-region"') Reported-by: Iuliana Prodan Reported-by: Daniel Baluta Tested-by: Iuliana Prodan Reviewed-by: Iuliana Prodan Reviewed-by: Saravana Kannan Link: https://lore.kernel.org/r/20250820192805.565568-1-robh@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/of_reserved_mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 77016c0cc296..d3b7c4ae429c 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -771,6 +771,7 @@ int of_reserved_mem_region_to_resource(const struct device_node *np, return -EINVAL; resource_set_range(res, rmem->base, rmem->size); + res->flags = IORESOURCE_MEM; res->name = rmem->name; return 0; } From 8a30114073639fd97f2c7390abbc34fb8711327a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Pi=C3=B3rkowski?= Date: Mon, 11 Aug 2025 12:43:58 +0200 Subject: [PATCH 0695/1307] drm/xe: Move ASID allocation and user PT BO tracking into xe_vm_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, ASID assignment for user VMs and page-table BO accounting for client memory tracking are performed in xe_vm_create_ioctl. To consolidate VM object initialization, move this logic to xe_vm_create. v2: - removed unnecessary duplicate BO tracking code - using the local variable xef to verify whether the VM is being created by userspace Fixes: 658a1c8e0a66 ("drm/xe: Assign ioctl xe file handler to vm in xe_vm_create") Suggested-by: Matthew Auld Signed-off-by: Piotr Piórkowski Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250811104358.2064150-3-piotr.piorkowski@intel.com Signed-off-by: Michał Winiarski (cherry picked from commit 30e0c3f43a414616e0b6ca76cf7f7b2cd387e1d4) Signed-off-by: Rodrigo Vivi [Rodrigo: Added fixes tag] --- drivers/gpu/drm/xe/xe_vm.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5bff317e335a..1bb73bb63406 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1795,6 +1795,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); + if (xef && xe->info.has_asid) { + u32 asid; + + down_write(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(1, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + up_write(&xe->usm.lock); + if (err < 0) + goto err_unlock_close; + + vm->usm.asid = asid; + } + trace_xe_vm_create(vm); return vm; @@ -2062,9 +2076,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; - struct xe_tile *tile; struct xe_vm *vm; - u32 id, asid; + u32 id; int err; u32 flags = 0; @@ -2104,23 +2117,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - if (xe->info.has_asid) { - down_write(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - up_write(&xe->usm.lock); - if (err < 0) - goto err_close_and_put; - - vm->usm.asid = asid; - } - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); From 111fb43a557726079a67ce3ab51f602ddbf7097e Mon Sep 17 00:00:00 2001 From: Christoph Manszewski Date: Wed, 13 Aug 2025 12:12:30 +0200 Subject: [PATCH 0696/1307] drm/xe: Fix vm_bind_ioctl double free bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the argument check during an array bind fails, the bind_ops are freed twice as seen below. Fix this by setting bind_ops to NULL after freeing. ================================================================== BUG: KASAN: double-free in xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] Free of addr ffff88813bb9b800 by task xe_vm/14198 CPU: 5 UID: 0 PID: 14198 Comm: xe_vm Not tainted 6.16.0-xe-eudebug-cmanszew+ #520 PREEMPT(full) Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P DDR5 RVP, BIOS ADLPFWI1.R00.2411.A02.2110081023 10/08/2021 Call Trace: dump_stack_lvl+0x82/0xd0 print_report+0xcb/0x610 ? __virt_addr_valid+0x19a/0x300 ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] kasan_report_invalid_free+0xc8/0xf0 ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] check_slab_allocation+0x102/0x130 kfree+0x10d/0x440 ? should_fail_ex+0x57/0x2f0 ? xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] xe_vm_bind_ioctl+0x1b2/0x21f0 [xe] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] ? __lock_acquire+0xab9/0x27f0 ? lock_acquire+0x165/0x300 ? drm_dev_enter+0x53/0xe0 [drm] ? find_held_lock+0x2b/0x80 ? drm_dev_exit+0x30/0x50 [drm] ? drm_ioctl_kernel+0x128/0x1c0 [drm] drm_ioctl_kernel+0x128/0x1c0 [drm] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] ? find_held_lock+0x2b/0x80 ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm] ? should_fail_ex+0x57/0x2f0 ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] drm_ioctl+0x352/0x620 [drm] ? __pfx_drm_ioctl+0x10/0x10 [drm] ? __pfx_rpm_resume+0x10/0x10 ? do_raw_spin_lock+0x11a/0x1b0 ? find_held_lock+0x2b/0x80 ? __pm_runtime_resume+0x61/0xc0 ? rcu_is_watching+0x20/0x50 ? trace_irq_enable.constprop.0+0xac/0xe0 xe_drm_ioctl+0x91/0xc0 [xe] __x64_sys_ioctl+0xb2/0x100 ? rcu_is_watching+0x20/0x50 do_syscall_64+0x68/0x2e0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fa9acb24ded Fixes: b43e864af0d4 ("drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: Thomas Hellström Signed-off-by: Christoph Manszewski Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250813101231.196632-2-christoph.manszewski@intel.com (cherry picked from commit a01b704527c28a2fd43a17a85f8996b75ec8492a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1bb73bb63406..ec04bef8ae40 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3418,6 +3418,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, free_bind_ops: if (args->num_binds > 1) kvfree(*bind_ops); + *bind_ops = NULL; return err; } @@ -3524,7 +3525,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q = NULL; u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; + struct drm_xe_vm_bind_op *bind_ops = NULL; struct xe_vma_ops vops; struct dma_fence *fence; int err; From ac29e4487aa20a21b7c3facbd1f14f5093835dc9 Mon Sep 17 00:00:00 2001 From: Pritesh Patel Date: Mon, 16 Jun 2025 16:53:12 +0530 Subject: [PATCH 0697/1307] dt-bindings: vendor-prefixes: add eswin Add new vendor string to dt bindings. This new vendor string is used by - ESWIN EIC770X SoC - HiFive Premier P550 board which uses EIC7700 SoC. Link: https://www.eswin.com/en/ Signed-off-by: Pritesh Patel Reviewed-by: Samuel Holland Signed-off-by: Pinkesh Vaghela Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250616112316.3833343-4-pinkesh.vaghela@einfochips.com Signed-off-by: Rob Herring (Arm) --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 741b545e3ab0..982ef10eed76 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -499,6 +499,8 @@ patternProperties: description: Espressif Systems Co. Ltd. "^est,.*": description: ESTeem Wireless Modems + "^eswin,.*": + description: Beijing ESWIN Technology Group Co. Ltd. "^ettus,.*": description: NI Ettus Research "^eukrea,.*": From 508c1314b342b78591f51c4b5dadee31a88335df Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Aug 2025 13:23:21 -0600 Subject: [PATCH 0698/1307] io_uring/futex: ensure io_futex_wait() cleans up properly on failure The io_futex_data is allocated upfront and assigned to the io_kiocb async_data field, but the request isn't marked with REQ_F_ASYNC_DATA at that point. Those two should always go together, as the flag tells io_uring whether the field is valid or not. Additionally, on failure cleanup, the futex handler frees the data but does not clear ->async_data. Clear the data and the flag in the error path as well. Thanks to Trend Micro Zero Day Initiative and particularly ReDress for reporting this. Cc: stable@vger.kernel.org Fixes: 194bb58c6090 ("io_uring: add support for futex wake and wait") Signed-off-by: Jens Axboe --- io_uring/futex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/io_uring/futex.c b/io_uring/futex.c index 692462d50c8c..9113a44984f3 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -288,6 +288,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) goto done_unlock; } + req->flags |= REQ_F_ASYNC_DATA; req->async_data = ifd; ifd->q = futex_q_init; ifd->q.bitset = iof->futex_mask; @@ -309,6 +310,8 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); + req->async_data = NULL; + req->flags &= ~REQ_F_ASYNC_DATA; kfree(ifd); return IOU_COMPLETE; } From e4e6aaea46b7be818eba0510ba68d30df8689ea3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Aug 2025 13:24:57 -0600 Subject: [PATCH 0699/1307] io_uring: clear ->async_data as part of normal init Opcode handlers like POLL_ADD will use ->async_data as the pointer for double poll handling, which is a bit different than the usual case where it's strictly gated by the REQ_F_ASYNC_DATA flag. Be a bit more proactive in handling ->async_data, and clear it to NULL as part of regular init. Init is touching that cacheline anyway, so might as well clear it. Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4ef69dd58734..93633613a165 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2119,6 +2119,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->file = NULL; req->tctx = current->io_uring; req->cancel_seq_set = false; + req->async_data = NULL; if (unlikely(opcode >= IORING_OP_LAST)) { req->opcode = 0; From 2b3979624c3e34dcdd77d910c6490939727d91b2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 25 Jul 2025 16:51:49 +0100 Subject: [PATCH 0700/1307] btrfs: abort transaction on failure to add link to inode If we fail to update the inode or delete the orphan item, we must abort the transaction to prevent persisting an inconsistent state. For example if we fail to update the inode item, we have the inconsistency of having a persisted inode item with a link count of N but we have N + 1 inode ref items and N + 1 directory entries pointing to our inode in case the transaction gets committed. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9e4aec7330cb..af2f9b2c8c85 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6852,16 +6852,20 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *parent = dentry->d_parent; ret = btrfs_update_inode(trans, BTRFS_I(inode)); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto fail; + } if (inode->i_nlink == 1) { /* * If new hard link count is 1, it's a file created * with open(2) O_TMPFILE flag. */ ret = btrfs_orphan_del(trans, BTRFS_I(inode)); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto fail; + } } d_instantiate(dentry, inode); btrfs_log_new_name(trans, old_dentry, NULL, 0, parent); From e87e953bb20629ca1f008f8146c38e313e5ed319 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 25 Jul 2025 16:54:49 +0100 Subject: [PATCH 0701/1307] btrfs: fix inode leak on failure to add link to inode If we fail to update the inode or delete the orphan item we leak the inode since we update its refcount with the ihold() call to account for the d_instantiate() call which never happens in case we fail those steps. Fix this by setting 'drop_inode' to true in case we fail those steps. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index af2f9b2c8c85..4ed5ab5d3ac1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6854,6 +6854,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, ret = btrfs_update_inode(trans, BTRFS_I(inode)); if (ret) { btrfs_abort_transaction(trans, ret); + drop_inode = 1; goto fail; } if (inode->i_nlink == 1) { @@ -6864,6 +6865,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, ret = btrfs_orphan_del(trans, BTRFS_I(inode)); if (ret) { btrfs_abort_transaction(trans, ret); + drop_inode = 1; goto fail; } } From 5bb00879cb23db7e5e2fc0aa47b5ce3b1c713d8a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 25 Jul 2025 17:08:13 +0100 Subject: [PATCH 0702/1307] btrfs: simplify error handling logic for btrfs_link() Instead of incrementing the inode's link count and refcount early before adding the link, updating the inode and deleting orphan item, do it after all those steps succeeded right before calling d_instantiate(). This makes the error handling logic simpler by avoiding the need for the 'drop_inode' variable to signal if we need to undo the link count increment and the inode refcount increase under the 'fail' label. This also reduces the level of indentation by one, making the code easier to read. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4ed5ab5d3ac1..321a46be0377 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6805,7 +6805,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct fscrypt_name fname; u64 index; int ret; - int drop_inode = 0; /* do not allow sys_link's with other subvols of the same device */ if (btrfs_root_id(root) != btrfs_root_id(BTRFS_I(inode)->root)) @@ -6837,50 +6836,44 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, /* There are several dir indexes for this inode, clear the cache. */ BTRFS_I(inode)->dir_index = 0ULL; - inc_nlink(inode); inode_inc_iversion(inode); inode_set_ctime_current(inode); - ihold(inode); set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), &fname.disk_name, 1, index); + if (ret) + goto fail; + /* Link added now we update the inode item with the new link count. */ + inc_nlink(inode); + ret = btrfs_update_inode(trans, BTRFS_I(inode)); if (ret) { - drop_inode = 1; - } else { - struct dentry *parent = dentry->d_parent; + btrfs_abort_transaction(trans, ret); + goto fail; + } - ret = btrfs_update_inode(trans, BTRFS_I(inode)); + if (inode->i_nlink == 1) { + /* + * If the new hard link count is 1, it's a file created with the + * open(2) O_TMPFILE flag. + */ + ret = btrfs_orphan_del(trans, BTRFS_I(inode)); if (ret) { btrfs_abort_transaction(trans, ret); - drop_inode = 1; goto fail; } - if (inode->i_nlink == 1) { - /* - * If new hard link count is 1, it's a file created - * with open(2) O_TMPFILE flag. - */ - ret = btrfs_orphan_del(trans, BTRFS_I(inode)); - if (ret) { - btrfs_abort_transaction(trans, ret); - drop_inode = 1; - goto fail; - } - } - d_instantiate(dentry, inode); - btrfs_log_new_name(trans, old_dentry, NULL, 0, parent); } + /* Grab reference for the new dentry passed to d_instantiate(). */ + ihold(inode); + d_instantiate(dentry, inode); + btrfs_log_new_name(trans, old_dentry, NULL, 0, dentry->d_parent); + fail: fscrypt_free_filename(&fname); if (trans) btrfs_end_transaction(trans); - if (drop_inode) { - inode_dec_link_count(inode); - iput(inode); - } btrfs_btree_balance_dirty(fs_info); return ret; } From ef07b74e1be56f9eafda6aadebb9ebba0743c9f0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 6 Aug 2025 12:11:30 +0100 Subject: [PATCH 0703/1307] btrfs: fix race between logging inode and checking if it was logged before There's a race between checking if an inode was logged before and logging an inode that can cause us to mark an inode as not logged just after it was logged by a concurrent task: 1) We have inode X which was not logged before neither in the current transaction not in past transaction since the inode was loaded into memory, so it's ->logged_trans value is 0; 2) We are at transaction N; 3) Task A calls inode_logged() against inode X, sees that ->logged_trans is 0 and there is a log tree and so it proceeds to search in the log tree for an inode item for inode X. It doesn't see any, but before it sets ->logged_trans to N - 1... 3) Task B calls btrfs_log_inode() against inode X, logs the inode and sets ->logged_trans to N; 4) Task A now sets ->logged_trans to N - 1; 5) At this point anyone calling inode_logged() gets 0 (inode not logged) since ->logged_trans is greater than 0 and less than N, but our inode was really logged. As a consequence operations like rename, unlink and link that happen afterwards in the current transaction end up not updating the log when they should. Fix this by ensuring inode_logged() only updates ->logged_trans in case the inode item is not found in the log tree if after tacking the inode's lock (spinlock struct btrfs_inode::lock) the ->logged_trans value is still zero, since the inode lock is what protects setting ->logged_trans at btrfs_log_inode(). Fixes: 0f8ce49821de ("btrfs: avoid inode logging during rename and link when possible") Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 69e11557fd13..839dad701290 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3340,6 +3340,31 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, return 0; } +static bool mark_inode_as_not_logged(const struct btrfs_trans_handle *trans, + struct btrfs_inode *inode) +{ + bool ret = false; + + /* + * Do this only if ->logged_trans is still 0 to prevent races with + * concurrent logging as we may see the inode not logged when + * inode_logged() is called but it gets logged after inode_logged() did + * not find it in the log tree and we end up setting ->logged_trans to a + * value less than trans->transid after the concurrent logging task has + * set it to trans->transid. As a consequence, subsequent rename, unlink + * and link operations may end up not logging new names and removing old + * names from the log. + */ + spin_lock(&inode->lock); + if (inode->logged_trans == 0) + inode->logged_trans = trans->transid - 1; + else if (inode->logged_trans == trans->transid) + ret = true; + spin_unlock(&inode->lock); + + return ret; +} + /* * Check if an inode was logged in the current transaction. This correctly deals * with the case where the inode was logged but has a logged_trans of 0, which @@ -3374,10 +3399,8 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * transaction's ID, to avoid the search below in a future call in case * a log tree gets created after this. */ - if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) { - inode->logged_trans = trans->transid - 1; - return 0; - } + if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) + return mark_inode_as_not_logged(trans, inode); /* * We have a log tree and the inode's logged_trans is 0. We can't tell @@ -3431,8 +3454,7 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * Set logged_trans to a value greater than 0 and less then the * current transaction to avoid doing the search in future calls. */ - inode->logged_trans = trans->transid - 1; - return 0; + return mark_inode_as_not_logged(trans, inode); } /* @@ -3440,7 +3462,9 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * the current transacion's ID, to avoid future tree searches as long as * the inode is not evicted again. */ + spin_lock(&inode->lock); inode->logged_trans = trans->transid; + spin_unlock(&inode->lock); /* * If it's a directory, then we must set last_dir_index_offset to the From 59a0dd4ab98970086fd096281b1606c506ff2698 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 6 Aug 2025 12:11:31 +0100 Subject: [PATCH 0704/1307] btrfs: fix race between setting last_dir_index_offset and inode logging At inode_logged() if we find that the inode was not logged before we update its ->last_dir_index_offset to (u64)-1 with the goal that the next directory log operation will see the (u64)-1 and then figure out it must check what was the index of the last logged dir index key and update ->last_dir_index_offset to that key's offset (this is done in update_last_dir_index_offset()). This however has a possibility for a time window where a race can happen and lead to directory logging skipping dir index keys that should be logged. The race happens like this: 1) Task A calls inode_logged(), sees ->logged_trans as 0 and then checks that the inode item was logged before, but before it sets the inode's ->last_dir_index_offset to (u64)-1... 2) Task B is at btrfs_log_inode() which calls inode_logged() early, and that has set ->last_dir_index_offset to (u64)-1; 3) Task B then enters log_directory_changes() which calls update_last_dir_index_offset(). There it sees ->last_dir_index_offset is (u64)-1 and that the inode was logged before (ctx->logged_before is true), and so it searches for the last logged dir index key in the log tree and it finds that it has an offset (index) value of N, so it sets ->last_dir_index_offset to N, so that we can skip index keys that are less than or equal to N (later at process_dir_items_leaf()); 4) Task A now sets ->last_dir_index_offset to (u64)-1, undoing the update that task B just did; 5) Task B will now skip every index key when it enters process_dir_items_leaf(), since ->last_dir_index_offset is (u64)-1. Fix this by making inode_logged() not touch ->last_dir_index_offset and initializing it to 0 when an inode is loaded (at btrfs_alloc_inode()) and then having update_last_dir_index_offset() treat a value of 0 as meaning we must check the log tree and update with the index of the last logged index key. This is fine since the minimum possible value for ->last_dir_index_offset is 1 (BTRFS_DIR_START_INDEX - 1 = 2 - 1 = 1). This also simplifies the management of ->last_dir_index_offset and now all accesses to it are done under the inode's log_mutex. Fixes: 0f8ce49821de ("btrfs: avoid inode logging during rename and link when possible") Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 2 +- fs/btrfs/inode.c | 1 + fs/btrfs/tree-log.c | 17 ++--------------- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b99fb0273292..0387b9f43a52 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -248,7 +248,7 @@ struct btrfs_inode { u64 new_delalloc_bytes; /* * The offset of the last dir index key that was logged. - * This is used only for directories. + * This is used only for directories. Protected by 'log_mutex'. */ u64 last_dir_index_offset; }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 321a46be0377..dd82dcc7b2b7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7829,6 +7829,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_sub_trans = 0; ei->logged_trans = 0; ei->delalloc_bytes = 0; + /* new_delalloc_bytes and last_dir_index_offset are in a union. */ ei->new_delalloc_bytes = 0; ei->defrag_bytes = 0; ei->disk_i_size = 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 839dad701290..2a3d9e996e72 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3466,19 +3466,6 @@ static int inode_logged(const struct btrfs_trans_handle *trans, inode->logged_trans = trans->transid; spin_unlock(&inode->lock); - /* - * If it's a directory, then we must set last_dir_index_offset to the - * maximum possible value, so that the next attempt to log the inode does - * not skip checking if dir index keys found in modified subvolume tree - * leaves have been logged before, otherwise it would result in attempts - * to insert duplicate dir index keys in the log tree. This must be done - * because last_dir_index_offset is an in-memory only field, not persisted - * in the inode item or any other on-disk structure, so its value is lost - * once the inode is evicted. - */ - if (S_ISDIR(inode->vfs_inode.i_mode)) - inode->last_dir_index_offset = (u64)-1; - return 1; } @@ -4069,7 +4056,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, /* * If the inode was logged before and it was evicted, then its - * last_dir_index_offset is (u64)-1, so we don't the value of the last index + * last_dir_index_offset is 0, so we don't know the value of the last index * key offset. If that's the case, search for it and update the inode. This * is to avoid lookups in the log tree every time we try to insert a dir index * key from a leaf changed in the current transaction, and to allow us to always @@ -4085,7 +4072,7 @@ static int update_last_dir_index_offset(struct btrfs_inode *inode, lockdep_assert_held(&inode->log_mutex); - if (inode->last_dir_index_offset != (u64)-1) + if (inode->last_dir_index_offset != 0) return 0; if (!ctx->logged_before) { From 986bf6ed44dff7fbae7b43a0882757ee7f5ba21b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 6 Aug 2025 12:11:32 +0100 Subject: [PATCH 0705/1307] btrfs: avoid load/store tearing races when checking if an inode was logged At inode_logged() we do a couple lockless checks for ->logged_trans, and these are generally safe except the second one in case we get a load or store tearing due to a concurrent call updating ->logged_trans (either at btrfs_log_inode() or later at inode_logged()). In the first case it's safe to compare to the current transaction ID since once ->logged_trans is set the current transaction, we never set it to a lower value. In the second case, where we check if it's greater than zero, we are prone to load/store tearing races, since we can have a concurrent task updating to the current transaction ID with store tearing for example, instead of updating with a single 64 bits write, to update with two 32 bits writes or four 16 bits writes. In that case the reading side at inode_logged() could see a positive value that does not match the current transaction and then return a false negative. Fix this by doing the second check while holding the inode's spinlock, add some comments about it too. Also add the data_race() annotation to the first check to avoid any reports from KCSAN (or similar tools) and comment about it. Fixes: 0f8ce49821de ("btrfs: avoid inode logging during rename and link when possible") Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2a3d9e996e72..7d5d90845ca9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3382,15 +3382,32 @@ static int inode_logged(const struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; - if (inode->logged_trans == trans->transid) + /* + * Quick lockless call, since once ->logged_trans is set to the current + * transaction, we never set it to a lower value anywhere else. + */ + if (data_race(inode->logged_trans) == trans->transid) return 1; /* - * If logged_trans is not 0, then we know the inode logged was not logged - * in this transaction, so we can return false right away. + * If logged_trans is not 0 and not trans->transid, then we know the + * inode was not logged in this transaction, so we can return false + * right away. We take the lock to avoid a race caused by load/store + * tearing with a concurrent btrfs_log_inode() call or a concurrent task + * in this function further below - an update to trans->transid can be + * teared into two 32 bits updates for example, in which case we could + * see a positive value that is not trans->transid and assume the inode + * was not logged when it was. */ - if (inode->logged_trans > 0) + spin_lock(&inode->lock); + if (inode->logged_trans == trans->transid) { + spin_unlock(&inode->lock); + return 1; + } else if (inode->logged_trans > 0) { + spin_unlock(&inode->lock); return 0; + } + spin_unlock(&inode->lock); /* * If no log tree was created for this root in this transaction, then From 0e89ca13ee5ff41b437bb2a003c0eaf34ea43555 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Wed, 20 Aug 2025 16:22:41 +0000 Subject: [PATCH 0706/1307] KVM: arm64: Split kvm_pgtable_stage2_destroy() Split kvm_pgtable_stage2_destroy() into two: - kvm_pgtable_stage2_destroy_range(), that performs the page-table walk and free the entries over a range of addresses. - kvm_pgtable_stage2_destroy_pgd(), that frees the PGD. This refactoring enables subsequent patches to free large page-tables in chunks, calling cond_resched() between each chunk, to yield the CPU as necessary. Existing callers of kvm_pgtable_stage2_destroy(), that probably cannot take advantage of this (such as nVMHE), will continue to function as is. Signed-off-by: Raghavendra Rao Ananta Suggested-by: Oliver Upton Link: https://lore.kernel.org/r/20250820162242.2624752-2-rananta@google.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_pgtable.h | 30 ++++++++++++++++++++++++++++ arch/arm64/include/asm/kvm_pkvm.h | 4 +++- arch/arm64/kvm/hyp/pgtable.c | 25 +++++++++++++++++++---- arch/arm64/kvm/mmu.c | 12 +++++++++-- arch/arm64/kvm/pkvm.c | 11 ++++++++-- 5 files changed, 73 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 2888b5d03757..1246216616b5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -355,6 +355,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } +static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) +{ + return pteref; +} + static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { /* @@ -384,6 +389,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } +static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) +{ + return rcu_dereference_raw(pteref); +} + static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { if (walker->flags & KVM_PGTABLE_WALK_SHARED) @@ -551,6 +561,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2 */ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +/** + * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * @addr: Intermediate physical address at which to place the mapping. + * @size: Size of the mapping. + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size); + +/** + * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * + * It is assumed that the rest of the page-table is freed before this operation. + */ +void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); + /** * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure. * @mm_ops: Memory management callbacks. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index ea58282f59bb..35f9d9478004 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -179,7 +179,9 @@ struct pkvm_mapping { int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops); -void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size); +void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc, enum kvm_pgtable_walk_flags flags); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c351b4abd5db..c36f282a175d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1551,21 +1551,38 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, return 0; } -void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size) { - size_t pgd_sz; struct kvm_pgtable_walker walker = { .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); + WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); +} + +void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) +{ + size_t pgd_sz; + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); + + /* + * Since the pgtable is unlinked at this point, and not shared with + * other walkers, safely deference pgd with kvm_dereference_pteref_raw() + */ + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz); pgt->pgd = NULL; } +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +{ + kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits)); + kvm_pgtable_stage2_destroy_pgd(pgt); +} + void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { kvm_pteref_t ptep = (kvm_pteref_t)pgtable; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 9a45daf817bf..6330a02c8418 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -904,6 +904,14 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type) return 0; } +static void kvm_stage2_destroy(struct kvm_pgtable *pgt) +{ + unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr); + + KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, 0, BIT(ia_bits)); + KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt); +} + /** * kvm_init_stage2_mmu - Initialise a S2 MMU structure * @kvm: The pointer to the KVM structure @@ -980,7 +988,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return 0; out_destroy_pgtable: - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); + kvm_stage2_destroy(pgt); out_free_pgtable: kfree(pgt); return err; @@ -1077,7 +1085,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) write_unlock(&kvm->mmu_lock); if (pgt) { - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); + kvm_stage2_destroy(pgt); kfree(pgt); } } diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index fcd70bfe44fb..61827cf6fea4 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -316,9 +316,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e return 0; } -void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size) { - __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); + __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); +} + +void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) +{ + /* Expected to be called after all pKVM mappings have been released. */ + WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root)); } int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, From e9abe311f35631a999fe38c86f26f0e48ffe46d5 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Wed, 20 Aug 2025 16:22:42 +0000 Subject: [PATCH 0707/1307] KVM: arm64: Reschedule as needed when destroying the stage-2 page-tables When a large VM, specifically one that holds a significant number of PTEs, gets abruptly destroyed, the following warning is seen during the page-table walk: sched: CPU 0 need_resched set for > 100018840 ns (100 ticks) without schedule CPU: 0 UID: 0 PID: 9617 Comm: kvm_page_table_ Tainted: G O 6.16.0-smp-DEV #3 NONE Tainted: [O]=OOT_MODULE Call trace: show_stack+0x20/0x38 (C) dump_stack_lvl+0x3c/0xb8 dump_stack+0x18/0x30 resched_latency_warn+0x7c/0x88 sched_tick+0x1c4/0x268 update_process_times+0xa8/0xd8 tick_nohz_handler+0xc8/0x168 __hrtimer_run_queues+0x11c/0x338 hrtimer_interrupt+0x104/0x308 arch_timer_handler_phys+0x40/0x58 handle_percpu_devid_irq+0x8c/0x1b0 generic_handle_domain_irq+0x48/0x78 gic_handle_irq+0x1b8/0x408 call_on_irq_stack+0x24/0x30 do_interrupt_handler+0x54/0x78 el1_interrupt+0x44/0x88 el1h_64_irq_handler+0x18/0x28 el1h_64_irq+0x84/0x88 stage2_free_walker+0x30/0xa0 (P) __kvm_pgtable_walk+0x11c/0x258 __kvm_pgtable_walk+0x180/0x258 __kvm_pgtable_walk+0x180/0x258 __kvm_pgtable_walk+0x180/0x258 kvm_pgtable_walk+0xc4/0x140 kvm_pgtable_stage2_destroy+0x5c/0xf0 kvm_free_stage2_pgd+0x6c/0xe8 kvm_uninit_stage2_mmu+0x24/0x48 kvm_arch_flush_shadow_all+0x80/0xa0 kvm_mmu_notifier_release+0x38/0x78 __mmu_notifier_release+0x15c/0x250 exit_mmap+0x68/0x400 __mmput+0x38/0x1c8 mmput+0x30/0x68 exit_mm+0xd4/0x198 do_exit+0x1a4/0xb00 do_group_exit+0x8c/0x120 get_signal+0x6d4/0x778 do_signal+0x90/0x718 do_notify_resume+0x70/0x170 el0_svc+0x74/0xd8 el0t_64_sync_handler+0x60/0xc8 el0t_64_sync+0x1b0/0x1b8 The warning is seen majorly on the host kernels that are configured not to force-preempt, such as CONFIG_PREEMPT_NONE=y. To avoid this, instead of walking the entire page-table in one go, split it into smaller ranges, by checking for cond_resched() between each range. Since the path is executed during VM destruction, after the page-table structure is unlinked from the KVM MMU, relying on cond_resched_rwlock_write() isn't necessary. Signed-off-by: Raghavendra Rao Ananta Suggested-by: Oliver Upton Link: https://lore.kernel.org/r/20250820162242.2624752-3-rananta@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 6330a02c8418..86f3d80daf37 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -904,11 +904,35 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type) return 0; } +/* + * Assume that @pgt is valid and unlinked from the KVM MMU to free the + * page-table without taking the kvm_mmu_lock and without performing any + * TLB invalidations. + * + * Also, the range of addresses can be large enough to cause need_resched + * warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke + * cond_resched() periodically to prevent hogging the CPU for a long time + * and schedule something else, if required. + */ +static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr, + phys_addr_t end) +{ + u64 next; + + do { + next = stage2_range_addr_end(addr, end); + KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr, + next - addr); + if (next != end) + cond_resched(); + } while (addr = next, addr != end); +} + static void kvm_stage2_destroy(struct kvm_pgtable *pgt) { unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr); - KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, 0, BIT(ia_bits)); + stage2_destroy_range(pgt, 0, BIT(ia_bits)); KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt); } From 8049164653c6e6e7b347da773098d8660a26a6f6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 21:21:53 +0100 Subject: [PATCH 0708/1307] arm64: Add capability denoting FEAT_RASv1p1 Detecting FEAT_RASv1p1 is rather complicated, as there are two ways for the architecture to advertise the same thing (always a delight...). Add a capability that will advertise this in a synthetic way to the rest of the kernel. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20250817202158.395078-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kernel/cpufeature.c | 24 ++++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 2 files changed, 25 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4dece9ca68bc..22a94e548362 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2235,6 +2235,24 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) /* Firmware may have left a deferred SError in this register. */ write_sysreg_s(0, SYS_DISR_EL1); } +static bool has_rasv1p1(const struct arm64_cpu_capabilities *__unused, int scope) +{ + const struct arm64_cpu_capabilities rasv1p1_caps[] = { + { + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, V1P1) + }, + { + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP) + }, + { + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, RAS_frac, RASv1p1) + }, + }; + + return (has_cpuid_feature(&rasv1p1_caps[0], scope) || + (has_cpuid_feature(&rasv1p1_caps[1], scope) && + has_cpuid_feature(&rasv1p1_caps[2], scope))); +} #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -2653,6 +2671,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_clear_disr, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP) }, + { + .desc = "RASv1p1 Extension Support", + .capability = ARM64_HAS_RASV1P1_EXTN, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_rasv1p1, + }, #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_AMU_EXTN { diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 115161dd9a24..eb7f1f5622a8 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -52,6 +52,7 @@ HAS_S1PIE HAS_S1POE HAS_SCTLR2 HAS_RAS_EXTN +HAS_RASV1P1_EXTN HAS_RNG HAS_SB HAS_STAGE2_FWB From d7b3e23f945b36aec3938e5ea954bc125f38562e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 21:21:54 +0100 Subject: [PATCH 0709/1307] KVM: arm64: Handle RASv1p1 registers FEAT_RASv1p1 system registeres are not handled at all so far. KVM will give an embarassed warning on the console and inject an UNDEF, despite RASv1p1 being exposed to the guest on suitable HW. Handle these registers similarly to FEAT_RAS, with the added fun that there are *two* way to indicate the presence of FEAT_RASv1p1. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20250817202158.395078-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ad2548477257..1b4114790024 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2695,6 +2695,18 @@ static bool access_ras(struct kvm_vcpu *vcpu, struct kvm *kvm = vcpu->kvm; switch(reg_to_encoding(r)) { + case SYS_ERXPFGCDN_EL1: + case SYS_ERXPFGCTL_EL1: + case SYS_ERXPFGF_EL1: + case SYS_ERXMISC2_EL1: + case SYS_ERXMISC3_EL1: + if (!(kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1) || + (kvm_has_feat_enum(kvm, ID_AA64PFR0_EL1, RAS, IMP) && + kvm_has_feat(kvm, ID_AA64PFR1_EL1, RAS_frac, RASv1p1)))) { + kvm_inject_undefined(vcpu); + return false; + } + break; default: if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { kvm_inject_undefined(vcpu); @@ -3058,8 +3070,13 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXCTLR_EL1), access_ras }, { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras }, { SYS_DESC(SYS_ERXADDR_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGF_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGCTL_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGCDN_EL1), access_ras }, { SYS_DESC(SYS_ERXMISC0_EL1), access_ras }, { SYS_DESC(SYS_ERXMISC1_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC2_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC3_EL1), access_ras }, MTE_REG(TFSR_EL1), MTE_REG(TFSRE0_EL1), From 9049fb1227a2d1ab8515788c8553232966380248 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 21:21:55 +0100 Subject: [PATCH 0710/1307] KVM: arm64: Ignore HCR_EL2.FIEN set by L1 guest's EL2 An EL2 guest can set HCR_EL2.FIEN, which gives access to the RASv1p1 fault injection mechanism. This would allow an EL1 guest to inject error records into the system, which does sound like a terrible idea. Prevent this situation by added FIEN to the list of bits we silently exclude from being inserted into the host configuration. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20250817202158.395078-4-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/switch.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e482181c6632..0998ad4a2552 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -43,8 +43,11 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); * * - API/APK: they are already accounted for by vcpu_load(), and can * only take effect across a load/put cycle (such as ERET) + * + * - FIEN: no way we let a guest have access to the RAS "Common Fault + * Injection" thing, whatever that does */ -#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK) +#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK | HCR_FIEN) static u64 __compute_hcr(struct kvm_vcpu *vcpu) { From 1fab657cb2a07889c343302fbebca035e702683e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 21:21:56 +0100 Subject: [PATCH 0711/1307] KVM: arm64: Make ID_AA64PFR0_EL1.RAS writable Make ID_AA64PFR0_EL1.RAS writable so that we can restore a VM from a system without RAS to a RAS-equipped machine (or disable RAS in the guest). Signed-off-by: Marc Zyngier Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20250817202158.395078-5-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1b4114790024..bf160693963c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2939,7 +2939,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ~(ID_AA64PFR0_EL1_AMU | ID_AA64PFR0_EL1_MPAM | ID_AA64PFR0_EL1_SVE | - ID_AA64PFR0_EL1_RAS | ID_AA64PFR0_EL1_AdvSIMD | ID_AA64PFR0_EL1_FP)), ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1, From 7a765aa88e345782d3e4ed4c82e49f1ea82fd29c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 21:21:57 +0100 Subject: [PATCH 0712/1307] KVM: arm64: Make ID_AA64PFR1_EL1.RAS_frac writable Allow userspace to write to RAS_frac, under the condition that the host supports RASv1p1 with RAS_frac==1. Other configurations will result in RAS_frac being exposed as 0, and therefore implicitly not writable. To avoid the clutter, the ID_AA64PFR1_EL1 sanitisation is moved to its own function. Signed-off-by: Marc Zyngier Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20250817202158.395078-6-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 41 ++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index bf160693963c..5abe4db6c008 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1584,6 +1584,7 @@ static u8 pmuver_to_perfmon(u8 pmuver) } static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val); +static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val); static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); /* Read a sanitised cpufeature ID register by sys_reg_desc */ @@ -1606,19 +1607,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val = sanitise_id_aa64pfr0_el1(vcpu, val); break; case SYS_ID_AA64PFR1_EL1: - if (!kvm_has_mte(vcpu->kvm)) { - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac); - } - - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); + val = sanitise_id_aa64pfr1_el1(vcpu, val); break; case SYS_ID_AA64PFR2_EL1: /* We only expose FPMR */ @@ -1834,6 +1823,31 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val) return val; } +static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val) +{ + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + if (!kvm_has_mte(vcpu->kvm)) { + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac); + } + + if (!(cpus_have_final_cap(ARM64_HAS_RASV1P1_EXTN) && + SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0) == ID_AA64PFR0_EL1_RAS_IMP)) + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RAS_frac); + + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); + + return val; +} + static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); @@ -2952,7 +2966,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR1_EL1_SME | ID_AA64PFR1_EL1_RES0 | ID_AA64PFR1_EL1_MPAM_frac | - ID_AA64PFR1_EL1_RAS_frac | ID_AA64PFR1_EL1_MTE)), ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR), ID_UNALLOCATED(4,3), From 0843e0ced338d07c8bcec5675c560a94d05a4d41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 21:21:58 +0100 Subject: [PATCH 0713/1307] KVM: arm64: Get rid of ARM64_FEATURE_MASK() The ARM64_FEATURE_MASK() macro was a hack introduce whilst the automatic generation of sysreg encoding was introduced, and was too unreliable to be entirely trusted. We are in a better place now, and we could really do without this macro. Get rid of it altogether. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250817202158.395078-7-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/sysreg.h | 3 -- arch/arm64/kvm/arm.c | 8 ++-- arch/arm64/kvm/sys_regs.c | 38 +++++++++---------- tools/arch/arm64/include/asm/sysreg.h | 3 -- .../selftests/kvm/arm64/aarch32_id_regs.c | 2 +- .../selftests/kvm/arm64/debug-exceptions.c | 12 +++--- .../testing/selftests/kvm/arm64/no-vgic-v3.c | 4 +- .../selftests/kvm/arm64/page_fault_test.c | 6 +-- .../testing/selftests/kvm/arm64/set_id_regs.c | 8 ++-- .../selftests/kvm/arm64/vpmu_counter_access.c | 2 +- .../selftests/kvm/lib/arm64/processor.c | 6 +-- 11 files changed, 43 insertions(+), 49 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 948007cd3684..845875991eb8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1146,9 +1146,6 @@ #define ARM64_FEATURE_FIELD_BITS 4 -/* Defined for compatibility only, do not add new users. */ -#define ARM64_FEATURE_MASK(x) (x##_MASK) - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7a1a8210ff91..d60d2a644391 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2404,12 +2404,12 @@ static u64 get_hyp_id_aa64pfr0_el1(void) */ u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); + val &= ~(ID_AA64PFR0_EL1_CSV2 | + ID_AA64PFR0_EL1_CSV3); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), + val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV2, arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), + val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV3, arm64_get_meltdown_state() == SPECTRE_UNAFFECTED); return val; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5abe4db6c008..e387d1dfed1e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1615,18 +1615,18 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); + val &= ~(ID_AA64ISAR1_EL1_APA | + ID_AA64ISAR1_EL1_API | + ID_AA64ISAR1_EL1_GPA | + ID_AA64ISAR1_EL1_GPI); break; case SYS_ID_AA64ISAR2_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); + val &= ~(ID_AA64ISAR2_EL1_APA3 | + ID_AA64ISAR2_EL1_GPA3); if (!cpus_have_final_cap(ARM64_HAS_WFXT) || has_broken_cntvoff()) - val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); + val &= ~ID_AA64ISAR2_EL1_WFxT; break; case SYS_ID_AA64ISAR3_EL1: val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX; @@ -1642,7 +1642,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, ID_AA64MMFR3_EL1_S1PIE; break; case SYS_ID_MMFR4_EL1: - val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); + val &= ~ID_MMFR4_EL1_CCIDX; break; } @@ -1828,22 +1828,22 @@ static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val) u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); if (!kvm_has_mte(vcpu->kvm)) { - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac); + val &= ~ID_AA64PFR1_EL1_MTE; + val &= ~ID_AA64PFR1_EL1_MTE_frac; } if (!(cpus_have_final_cap(ARM64_HAS_RASV1P1_EXTN) && SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0) == ID_AA64PFR0_EL1_RAS_IMP)) - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RAS_frac); + val &= ~ID_AA64PFR1_EL1_RAS_frac; - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); + val &= ~ID_AA64PFR1_EL1_SME; + val &= ~ID_AA64PFR1_EL1_RNDR_trap; + val &= ~ID_AA64PFR1_EL1_NMI; + val &= ~ID_AA64PFR1_EL1_GCS; + val &= ~ID_AA64PFR1_EL1_THE; + val &= ~ID_AA64PFR1_EL1_MTEX; + val &= ~ID_AA64PFR1_EL1_PFAR; + val &= ~ID_AA64PFR1_EL1_MPAM_frac; return val; } diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index 690b6ebd118f..65f2759ea27a 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -1080,9 +1080,6 @@ #define ARM64_FEATURE_FIELD_BITS 4 -/* Defined for compatibility only, do not add new users. */ -#define ARM64_FEATURE_MASK(x) (x##_MASK) - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index cef8f7323ceb..713005b6f508 100644 --- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -146,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); return el0 == ID_AA64PFR0_EL1_EL0_IMP; } diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index c7fb55c9135b..521991a89ad9 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -116,12 +116,12 @@ static void reset_debug_state(void) /* Reset all bcr/bvr/wcr/wvr registers */ dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0); + brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0); for (i = 0; i <= brps; i++) { write_dbgbcr(i, 0); write_dbgbvr(i, 0); } - wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0); + wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0); for (i = 0; i <= wrps; i++) { write_dbgwcr(i, 0); write_dbgwvr(i, 0); @@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt) static int debug_version(uint64_t id_aa64dfr0) { - return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0); + return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0); } static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) @@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0) int b, w, c; /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1; + brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1; __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); /* Number of watchpoints */ - wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1; + wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1; /* Number of context aware breakpoints */ - ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1; + ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1; pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, brp_num, wrp_num, ctx_brp_num); diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c index ebd70430c89d..f222538e6084 100644 --- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -54,7 +54,7 @@ static void guest_code(void) * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having * hidden the feature at runtime without any other userspace action. */ - __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), + __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC, read_sysreg(id_aa64pfr0_el1)) == 0, "GICv3 wrongly advertised"); @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, NULL); pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), + __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0), "GICv3 not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index dc6559dad9d8..4ccbd389d133 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -95,14 +95,14 @@ static bool guest_check_lse(void) uint64_t isar0 = read_sysreg(id_aa64isar0_el1); uint64_t atomic; - atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0); + atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0); return atomic >= 2; } static bool guest_check_dc_zva(void) { uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid); + uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid); return dzp == 0; } @@ -195,7 +195,7 @@ static bool guest_set_ha(void) uint64_t hadbs, tcr; /* Skip if HA is not supported. */ - hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1); + hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1); if (hadbs == 0) return false; diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index d3bf9204409c..36d40c267b99 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -594,8 +594,8 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - mte = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), val); - mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n"); @@ -612,7 +612,7 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) } val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI) ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n"); else @@ -774,7 +774,7 @@ int main(void) /* Check for AARCH64 only system */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP); ksft_print_header(); diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index f16b3b27e32e..a0c4ab839155 100644 --- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -441,7 +441,7 @@ static void create_vpmu_vm(void *guest_code) /* Make sure that PMUv3 support is indicated in the ID register */ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); - pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); + pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0); TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 9d69904cb608..eb115123d741 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -573,15 +573,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val); *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, ID_AA64MMFR0_EL1_TGRAN4_52_BIT); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val); *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, ID_AA64MMFR0_EL1_TGRAN64_IMP); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val); *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, ID_AA64MMFR0_EL1_TGRAN16_52_BIT); From 01860bcc53432d8b9b92a72939b35679ac24059f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 18 Aug 2025 17:41:00 +0100 Subject: [PATCH 0714/1307] KVM: arm64: selftests: Sync ID_AA64MMFR3_EL1 in set_id_regs When we added coverage for ID_AA64MMFR3_EL1 we didn't add it to the list of registers we read in the guest, do so. Fixes: 0b593ef12afc ("KVM: arm64: selftests: Catch up set_id_regs with the kernel") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250818-kvm-arm64-selftests-mmfr3-idreg-v1-1-2f85114d0163@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/set_id_regs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 36d40c267b99..189321e96925 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -243,6 +243,7 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); GUEST_REG_SYNC(SYS_CTR_EL0); GUEST_REG_SYNC(SYS_MIDR_EL1); From b08a784a5d1495c42ff9b0c70887d49211cddfe0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 18 Aug 2025 19:03:54 +0100 Subject: [PATCH 0715/1307] net: Introduce skb_copy_datagram_from_iter_full() In a similar manner to copy_from_iter()/copy_from_iter_full(), introduce skb_copy_datagram_from_iter_full() which reverts the iterator to its initial state when returning an error. A subsequent fix for a vsock regression will make use of this new function. Cc: Christian Brauner Cc: Alexander Viro Signed-off-by: Will Deacon Acked-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Link: https://patch.msgid.link/20250818180355.29275-2-will@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 ++ net/core/datagram.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..fa633657e4c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4172,6 +4172,8 @@ int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); +int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, + struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); diff --git a/net/core/datagram.c b/net/core/datagram.c index 94cc4705e91d..f474b9b120f9 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -618,6 +618,20 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_datagram_from_iter); +int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, + struct iov_iter *from, int len) +{ + struct iov_iter_state state; + int ret; + + iov_iter_save_state(from, &state); + ret = skb_copy_datagram_from_iter(skb, offset, from, len); + if (ret) + iov_iter_restore(from, &state); + return ret; +} +EXPORT_SYMBOL(skb_copy_datagram_from_iter_full); + int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length) { From 7fb1291257ea1e27dbc3f34c6a37b4d640aafdd7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 18 Aug 2025 19:03:55 +0100 Subject: [PATCH 0716/1307] vsock/virtio: Fix message iterator handling on transmit path Commit 6693731487a8 ("vsock/virtio: Allocate nonlinear SKBs for handling large transmit buffers") converted the virtio vsock transmit path to utilise nonlinear SKBs when handling large buffers. As part of this change, virtio_transport_fill_skb() was updated to call skb_copy_datagram_from_iter() instead of memcpy_from_msg() as the latter expects a single destination buffer and cannot handle nonlinear SKBs correctly. Unfortunately, during this conversion, I overlooked the error case when the copying function returns -EFAULT due to a fault on the input buffer in userspace. In this case, memcpy_from_msg() reverts the iterator to its initial state thanks to copy_from_iter_full() whereas skb_copy_datagram_from_iter() leaves the iterator partially advanced. This results in a WARN_ONCE() from the vsock code, which expects the iterator to stay in sync with the number of bytes transmitted so that virtio_transport_send_pkt_info() can return -EFAULT when it is called again: ------------[ cut here ]------------ 'send_pkt()' returns 0, but 65536 expected WARNING: CPU: 0 PID: 5503 at net/vmw_vsock/virtio_transport_common.c:428 virtio_transport_send_pkt_info+0xd11/0xf00 net/vmw_vsock/virtio_transport_common.c:426 Modules linked in: CPU: 0 UID: 0 PID: 5503 Comm: syz.0.17 Not tainted 6.16.0-syzkaller-12063-g37816488247d #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call virtio_transport_fill_skb_full() to restore the previous iterator behaviour. Cc: Jason Wang Cc: Stefano Garzarella Fixes: 6693731487a8 ("vsock/virtio: Allocate nonlinear SKBs for handling large transmit buffers") Reported-by: syzbot+b4d960daf7a3c7c2b7b1@syzkaller.appspotmail.com Signed-off-by: Will Deacon Acked-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Link: https://patch.msgid.link/20250818180355.29275-3-will@kernel.org Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index fe92e5fa95b4..dcc8a1d5851e 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -105,12 +105,14 @@ static int virtio_transport_fill_skb(struct sk_buff *skb, size_t len, bool zcopy) { + struct msghdr *msg = info->msg; + if (zcopy) - return __zerocopy_sg_from_iter(info->msg, NULL, skb, - &info->msg->msg_iter, len, NULL); + return __zerocopy_sg_from_iter(msg, NULL, skb, + &msg->msg_iter, len, NULL); virtio_vsock_skb_put(skb, len); - return skb_copy_datagram_from_iter(skb, 0, &info->msg->msg_iter, len); + return skb_copy_datagram_from_iter_full(skb, 0, &msg->msg_iter, len); } static void virtio_transport_init_hdr(struct sk_buff *skb, From 01b9128c5db1b470575d07b05b67ffa3cb02ebf1 Mon Sep 17 00:00:00 2001 From: luoguangfei <15388634752@163.com> Date: Tue, 19 Aug 2025 07:25:27 +0800 Subject: [PATCH 0717/1307] net: macb: fix unregister_netdev call order in macb_remove() When removing a macb device, the driver calls phy_exit() before unregister_netdev(). This leads to a WARN from kernfs: ------------[ cut here ]------------ kernfs: can not remove 'attached_dev', no directory WARNING: CPU: 1 PID: 27146 at fs/kernfs/dir.c:1683 Call trace: kernfs_remove_by_name_ns+0xd8/0xf0 sysfs_remove_link+0x24/0x58 phy_detach+0x5c/0x168 phy_disconnect+0x4c/0x70 phylink_disconnect_phy+0x6c/0xc0 [phylink] macb_close+0x6c/0x170 [macb] ... macb_remove+0x60/0x168 [macb] platform_remove+0x5c/0x80 ... The warning happens because the PHY is being exited while the netdev is still registered. The correct order is to unregister the netdev before shutting down the PHY and cleaning up the MDIO bus. Fix this by moving unregister_netdev() ahead of phy_exit() in macb_remove(). Fixes: 8b73fa3ae02b ("net: macb: Added ZynqMP-specific initialization") Signed-off-by: luoguangfei <15388634752@163.com> Link: https://patch.msgid.link/20250818232527.1316-1-15388634752@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 9693f0289435..b29c3beae0b2 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5399,11 +5399,11 @@ static void macb_remove(struct platform_device *pdev) if (dev) { bp = netdev_priv(dev); + unregister_netdev(dev); phy_exit(bp->sgmii_phy); mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); - unregister_netdev(dev); cancel_work_sync(&bp->hresp_err_bh_work); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); From a12946bef0407cf2db0899c83d42c47c00af3fbc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 14 Aug 2025 19:27:21 -0700 Subject: [PATCH 0718/1307] pinctrl: STMFX: add missing HAS_IOMEM dependency When building on ARCH=um (which does not set HAS_IOMEM), kconfig reports an unmet dependency caused by PINCTRL_STMFX. It selects MFD_STMFX, which depends on HAS_IOMEM. To stop this warning, PINCTRL_STMFX should also depend on HAS_IOMEM. kconfig warning: WARNING: unmet direct dependencies detected for MFD_STMFX Depends on [n]: HAS_IOMEM [=n] && I2C [=y] && OF [=y] Selected by [y]: - PINCTRL_STMFX [=y] && PINCTRL [=y] && I2C [=y] && OF_GPIO [=y] Fixes: 1490d9f841b1 ("pinctrl: Add STMFX GPIO expander Pinctrl/GPIO driver") Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/20250815022721.1650885-1-rdunlap@infradead.org Signed-off-by: Linus Walleij --- drivers/pinctrl/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index ddd11668457c..be1ca8e85754 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -539,6 +539,7 @@ config PINCTRL_STMFX tristate "STMicroelectronics STMFX GPIO expander pinctrl driver" depends on I2C depends on OF_GPIO + depends on HAS_IOMEM select GENERIC_PINCONF select GPIOLIB_IRQCHIP select MFD_STMFX From 685ca577b408ffd9c5a4057a2acc0cd3e6978b36 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 31 Jul 2025 20:01:27 -0700 Subject: [PATCH 0719/1307] iommu/arm-smmu-v3: Fix smmu_domain->nr_ats_masters decrement The arm_smmu_attach_commit() updates master->ats_enabled before calling arm_smmu_remove_master_domain() that is supposed to clean up everything in the old domain, including the old domain's nr_ats_masters. So, it is supposed to use the old ats_enabled state of the device, not an updated state. This isn't a problem if switching between two domains where: - old ats_enabled = false; new ats_enabled = false - old ats_enabled = true; new ats_enabled = true but can fail cases where: - old ats_enabled = false; new ats_enabled = true (old domain should keep the counter but incorrectly decreased it) - old ats_enabled = true; new ats_enabled = false (old domain needed to decrease the counter but incorrectly missed it) Update master->ats_enabled after arm_smmu_remove_master_domain() to fix this. Fixes: 7497f4211f4f ("iommu/arm-smmu-v3: Make changing domains be hitless for ATS") Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Acked-by: Will Deacon Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20250801030127.2006979-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5968043ac802..2a8b46b948f0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2997,9 +2997,9 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) /* ATS is being switched off, invalidate the entire ATC */ arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); } - master->ats_enabled = state->ats_enabled; arm_smmu_remove_master_domain(master, state->old_domain, state->ssid); + master->ats_enabled = state->ats_enabled; } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) From 72b6f7cd89cea8251979b65528d302f9c0ed37bf Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 14 Aug 2025 17:47:16 +0100 Subject: [PATCH 0720/1307] iommu/virtio: Make instance lookup robust Much like arm-smmu in commit 7d835134d4e1 ("iommu/arm-smmu: Make instance lookup robust"), virtio-iommu appears to have the same issue where iommu_device_register() makes the IOMMU instance visible to other API callers (including itself) straight away, but internally the instance isn't ready to recognise itself for viommu_probe_device() to work correctly until after viommu_probe() has returned. This matters a lot more now that bus_iommu_probe() has the DT/VIOT knowledge to probe client devices the way that was always intended. Tweak the lookup and initialisation in much the same way as for arm-smmu, to ensure that what we register is functional and ready to go. Cc: stable@vger.kernel.org Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Signed-off-by: Robin Murphy Tested-by: Eric Auger Link: https://lore.kernel.org/r/308911aaa1f5be32a3a709996c7bd6cf71d30f33.1755190036.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 532db1de201b..b39d6f134ab2 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -998,8 +998,7 @@ static void viommu_get_resv_regions(struct device *dev, struct list_head *head) iommu_dma_get_resv_regions(dev, head); } -static const struct iommu_ops viommu_ops; -static struct virtio_driver virtio_iommu_drv; +static const struct bus_type *virtio_bus_type; static int viommu_match_node(struct device *dev, const void *data) { @@ -1008,8 +1007,9 @@ static int viommu_match_node(struct device *dev, const void *data) static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode) { - struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL, - fwnode, viommu_match_node); + struct device *dev = bus_find_device(virtio_bus_type, NULL, fwnode, + viommu_match_node); + put_device(dev); return dev ? dev_to_virtio(dev)->priv : NULL; @@ -1160,6 +1160,9 @@ static int viommu_probe(struct virtio_device *vdev) if (!viommu) return -ENOMEM; + /* Borrow this for easy lookups later */ + virtio_bus_type = dev->bus; + spin_lock_init(&viommu->request_lock); ida_init(&viommu->domain_ids); viommu->dev = dev; @@ -1229,10 +1232,10 @@ static int viommu_probe(struct virtio_device *vdev) if (ret) goto err_free_vqs; - iommu_device_register(&viommu->iommu, &viommu_ops, parent_dev); - vdev->priv = viommu; + iommu_device_register(&viommu->iommu, &viommu_ops, parent_dev); + dev_info(dev, "input address: %u bits\n", order_base_2(viommu->geometry.aperture_end)); dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap); From 99d4d1a070870aa08163af8ce0522992b7f35d8c Mon Sep 17 00:00:00 2001 From: XianLiang Huang Date: Wed, 20 Aug 2025 15:22:48 +0800 Subject: [PATCH 0721/1307] iommu/riscv: prevent NULL deref in iova_to_phys The riscv_iommu_pte_fetch() function returns either NULL for unmapped/never-mapped iova, or a valid leaf pte pointer that requires no further validation. riscv_iommu_iova_to_phys() failed to handle NULL returns. Prevent null pointer dereference in riscv_iommu_iova_to_phys(), and remove the pte validation. Fixes: 488ffbf18171 ("iommu/riscv: Paging domain support") Cc: Tomasz Jeznach Signed-off-by: XianLiang Huang Link: https://lore.kernel.org/r/20250820072248.312-1-huangxianliang@lanxincomputing.com Signed-off-by: Joerg Roedel --- drivers/iommu/riscv/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 2d0d31ba2886..0eae2f4bdc5e 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -1283,7 +1283,7 @@ static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain, unsigned long *ptr; ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); - if (_io_pte_none(*ptr) || !_io_pte_present(*ptr)) + if (!ptr) return 0; return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1)); From 5245dc5ff9b1f6c02ef948f623432805ea148fca Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Thu, 31 Jul 2025 11:16:52 +0200 Subject: [PATCH 0722/1307] arm64: dts: imx8mp-tqma8mpql: fix LDO5 power off Fix SD card removal caused by automatic LDO5 power off after boot: LDO5: disabling mmc1: card 59b4 removed EXT4-fs (mmcblk1p2): shut down requested (2) Aborting journal on device mmcblk1p2-8. JBD2: I/O error when updating journal superblock for mmcblk1p2-8. To prevent this, add vqmmc regulator for USDHC, using a GPIO-controlled regulator that is supplied by LDO5. Since this is implemented on SoM but used on baseboards with SD-card interface, implement the functionality on SoM part and optionally enable it on baseboards if needed. Fixes: 418d1d840e42 ("arm64: dts: freescale: add initial device tree for TQMa8MPQL with i.MX8MP") Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- .../imx8mp-tqma8mpql-mba8mp-ras314.dts | 13 ++++++----- .../freescale/imx8mp-tqma8mpql-mba8mpxl.dts | 13 ++++++----- .../boot/dts/freescale/imx8mp-tqma8mpql.dtsi | 22 +++++++++++++++++++ 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mp-ras314.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mp-ras314.dts index d7fd9d36f824..f7346b3d35fe 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mp-ras314.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mp-ras314.dts @@ -467,6 +467,10 @@ &pwm4 { status = "okay"; }; +®_usdhc2_vqmmc { + status = "okay"; +}; + &sai5 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai5>; @@ -876,8 +880,7 @@ pinctrl_usdhc2: usdhc2grp { , , , - , - ; + ; }; pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { @@ -886,8 +889,7 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { , , , - , - ; + ; }; pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { @@ -896,8 +898,7 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { , , , - , - ; + ; }; pinctrl_usdhc2_gpio: usdhc2-gpiogrp { diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts index 33cd92e63c5d..4eedd00d83b9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts @@ -604,6 +604,10 @@ &pwm3 { status = "okay"; }; +®_usdhc2_vqmmc { + status = "okay"; +}; + &sai3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai3>; @@ -983,8 +987,7 @@ pinctrl_usdhc2: usdhc2grp { , , , - , - ; + ; }; pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { @@ -993,8 +996,7 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { , , , - , - ; + ; }; pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { @@ -1003,8 +1005,7 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { , , , - , - ; + ; }; pinctrl_usdhc2_gpio: usdhc2-gpiogrp { diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi index fd70b686e7ef..b48d5da14727 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi @@ -24,6 +24,20 @@ reg_vcc3v3: regulator-vcc3v3 { regulator-max-microvolt = <3300000>; regulator-always-on; }; + + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_reg_usdhc2_vqmmc>; + regulator-name = "V_SD2"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; + states = <1800000 0x1>, + <3300000 0x0>; + vin-supply = <&ldo5_reg>; + status = "disabled"; + }; }; &A53_0 { @@ -184,6 +198,10 @@ m24c64: eeprom@57 { }; }; +&usdhc2 { + vqmmc-supply = <®_usdhc2_vqmmc>; +}; + &usdhc3 { pinctrl-names = "default", "state_100mhz", "state_200mhz"; pinctrl-0 = <&pinctrl_usdhc3>; @@ -233,6 +251,10 @@ pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = ; }; + pinctrl_reg_usdhc2_vqmmc: regusdhc2vqmmcgrp { + fsl,pins = ; + }; + pinctrl_usdhc3: usdhc3grp { fsl,pins = , , From 917baa75e376084240ca1696ab29589006563128 Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Thu, 31 Jul 2025 11:16:53 +0200 Subject: [PATCH 0723/1307] arm64: dts: imx8mp-tqma8mpql: remove virtual 3.3V regulator BUCK4 rail supplies the 3.3V rail. Use the actual regulator instead of a virtual fixed regulator. Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mp-tqma8mpql.dtsi | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi index b48d5da14727..9716f24f7c6e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi @@ -16,15 +16,6 @@ memory@40000000 { reg = <0x0 0x40000000 0 0x80000000>; }; - /* identical to buck4_reg, but should never change */ - reg_vcc3v3: regulator-vcc3v3 { - compatible = "regulator-fixed"; - regulator-name = "VCC3V3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-always-on; - }; - reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { compatible = "regulator-gpio"; pinctrl-names = "default"; @@ -187,14 +178,14 @@ at24c02: eeprom@53 { read-only; reg = <0x53>; pagesize = <16>; - vcc-supply = <®_vcc3v3>; + vcc-supply = <&buck4_reg>; }; m24c64: eeprom@57 { compatible = "atmel,24c64"; reg = <0x57>; pagesize = <32>; - vcc-supply = <®_vcc3v3>; + vcc-supply = <&buck4_reg>; }; }; @@ -211,7 +202,7 @@ &usdhc3 { non-removable; no-sd; no-sdio; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <&buck4_reg>; vqmmc-supply = <&buck5_reg>; status = "okay"; }; From c53cf8ce3bfe1309cb4fd4d74c5be27c26a86e52 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 10 Aug 2025 18:03:07 +0200 Subject: [PATCH 0724/1307] arm64: dts: imx8mp: Fix missing microSD slot vqmmc on DH electronics i.MX8M Plus DHCOM Add missing microSD slot vqmmc-supply property, otherwise the kernel might shut down LDO5 regulator and that would power off the microSD card slot, possibly while it is in use. Add the property to make sure the kernel is aware of the LDO5 regulator which supplies the microSD slot and keeps the LDO5 enabled. Fixes: 8d6712695bc8 ("arm64: dts: imx8mp: Add support for DH electronics i.MX8M Plus DHCOM and PDK2") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi index 7f754e0a5d69..68c2e0156a5c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi @@ -609,6 +609,7 @@ &usdhc2 { pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>; cd-gpios = <&gpio2 12 GPIO_ACTIVE_LOW>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <&ldo5>; bus-width = <4>; status = "okay"; }; From 80733306290f6d2e05f0632e5d3e98cd16105c3c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 10 Aug 2025 18:04:32 +0200 Subject: [PATCH 0725/1307] arm64: dts: imx8mp: Fix missing microSD slot vqmmc on Data Modul i.MX8M Plus eDM SBC Add missing microSD slot vqmmc-supply property, otherwise the kernel might shut down LDO5 regulator and that would power off the microSD card slot, possibly while it is in use. Add the property to make sure the kernel is aware of the LDO5 regulator which supplies the microSD slot and keeps the LDO5 enabled. Fixes: 562d222f23f0 ("arm64: dts: imx8mp: Add support for Data Modul i.MX8M Plus eDM SBC") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts index d0fc5977258f..16078ff60ef0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts @@ -555,6 +555,7 @@ &usdhc2 { pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>; cd-gpios = <&gpio2 12 GPIO_ACTIVE_LOW>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <&ldo5>; bus-width = <4>; status = "okay"; }; From e35318d8d2d2dccc50454e3fc0bd9caaf2a797cd Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 19 Aug 2025 10:43:37 +0800 Subject: [PATCH 0726/1307] arm64: dts: imx95-19x19-evk: correct the phy setting for flexcan1/2 1, the phy support up to 8Mbit/s databitrate for CAN FD. refer to product data sheet: https://www.nxp.com/docs/en/data-sheet/TJA1463.pdf 2, the standby pin of the phy is ACTIVE_LOW. 3, the phy of flexcan2 connect the standby/en pin to PCAL6408 on i2c4 bus. Fixes: 02b7adb791e1 ("arm64: dts: imx95-19x19-evk: add adc0 flexcan[1,2] i2c[2,3] uart5 spi3 and tpm3") Signed-off-by: Haibo Chen Reviewed-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts index 2f949a0d48d2..9d034275c847 100644 --- a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts @@ -80,17 +80,17 @@ linux_cma: linux,cma { flexcan1_phy: can-phy0 { compatible = "nxp,tjr1443"; #phy-cells = <0>; - max-bitrate = <1000000>; + max-bitrate = <8000000>; enable-gpios = <&i2c6_pcal6416 6 GPIO_ACTIVE_HIGH>; - standby-gpios = <&i2c6_pcal6416 5 GPIO_ACTIVE_HIGH>; + standby-gpios = <&i2c6_pcal6416 5 GPIO_ACTIVE_LOW>; }; flexcan2_phy: can-phy1 { compatible = "nxp,tjr1443"; #phy-cells = <0>; - max-bitrate = <1000000>; - enable-gpios = <&i2c6_pcal6416 4 GPIO_ACTIVE_HIGH>; - standby-gpios = <&i2c6_pcal6416 3 GPIO_ACTIVE_HIGH>; + max-bitrate = <8000000>; + enable-gpios = <&i2c4_gpio_expander_21 4 GPIO_ACTIVE_HIGH>; + standby-gpios = <&i2c4_gpio_expander_21 3 GPIO_ACTIVE_LOW>; }; reg_vref_1v8: regulator-1p8v { From 37e5caa5571b5a60b0c835a0bc09ab1e53f57bfe Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 20 Aug 2025 18:27:26 +0200 Subject: [PATCH 0727/1307] arm64: dts: imx95: Fix JPEG encoder node assigned clock The assigned clock for JPEG encoder IP has to be IMX95_CLK_VPUBLK_JPEG_ENC and not IMX95_CLK_VPUBLK_JPEG_DEC (_ENC at the end, not _DEC). This is a simple copy-paste error, fix it. Fixes: 153c039a7357 ("arm64: dts: imx95: add jpeg encode and decode nodes") Signed-off-by: Marek Vasut Reviewed-by: Laurent Pinchart Reviewed-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx95.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 4ca6a7ea586e..8296888bce59 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1843,7 +1843,7 @@ jpegenc: jpegenc@4c550000 { ; clocks = <&scmi_clk IMX95_CLK_VPU>, <&vpu_blk_ctrl IMX95_CLK_VPUBLK_JPEG_ENC>; - assigned-clocks = <&vpu_blk_ctrl IMX95_CLK_VPUBLK_JPEG_DEC>; + assigned-clocks = <&vpu_blk_ctrl IMX95_CLK_VPUBLK_JPEG_ENC>; assigned-clock-parents = <&scmi_clk IMX95_CLK_VPUJPEG>; power-domains = <&scmi_devpd IMX95_PD_VPU>; }; From 08fb45446ebf1e2e435f95163c59d692acb0b514 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 21 Aug 2025 08:40:12 +0200 Subject: [PATCH 0728/1307] drm/amdgpu: Pin buffers while vmap'ing exported dma-buf objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current dma-buf vmap semantics require that the mapped buffer remains in place until the corresponding vunmap has completed. For GEM-SHMEM, this used to be guaranteed by a pin operation while creating an S/G table in import. GEM-SHMEN can now import dma-buf objects without creating the S/G table, so the pin is missing. Leads to page-fault errors, such as the one shown below. [ 102.101726] BUG: unable to handle page fault for address: ffffc90127000000 [...] [ 102.157102] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl] [...] [ 102.243250] Call Trace: [ 102.245695] [ 102.2477V95] ? validate_chain+0x24e/0x5e0 [ 102.251805] ? __lock_acquire+0x568/0xae0 [ 102.255807] udl_render_hline+0x165/0x341 [udl] [ 102.260338] ? __pfx_udl_render_hline+0x10/0x10 [udl] [ 102.265379] ? local_clock_noinstr+0xb/0x100 [ 102.269642] ? __lock_release.isra.0+0x16c/0x2e0 [ 102.274246] ? mark_held_locks+0x40/0x70 [ 102.278177] udl_primary_plane_helper_atomic_update+0x43e/0x680 [udl] [ 102.284606] ? __pfx_udl_primary_plane_helper_atomic_update+0x10/0x10 [udl] [ 102.291551] ? lockdep_hardirqs_on_prepare.part.0+0x92/0x170 [ 102.297208] ? lockdep_hardirqs_on+0x88/0x130 [ 102.301554] ? _raw_spin_unlock_irq+0x24/0x50 [ 102.305901] ? wait_for_completion_timeout+0x2bb/0x3a0 [ 102.311028] ? drm_atomic_helper_calc_timestamping_constants+0x141/0x200 [ 102.317714] ? drm_atomic_helper_commit_planes+0x3b6/0x1030 [ 102.323279] drm_atomic_helper_commit_planes+0x3b6/0x1030 [ 102.328664] drm_atomic_helper_commit_tail+0x41/0xb0 [ 102.333622] commit_tail+0x204/0x330 [...] [ 102.529946] ---[ end trace 0000000000000000 ]--- [ 102.651980] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl] In this stack strace, udl (based on GEM-SHMEM) imported and vmap'ed a dma-buf from amdgpu. Amdgpu relocated the buffer, thereby invalidating the mapping. Provide a custom dma-buf vmap method in amdgpu that pins the object before mapping it's buffer's pages into kernel address space. Do the opposite in vunmap. Note that dma-buf vmap differs from GEM vmap in how it handles relocation. While dma-buf vmap keeps the buffer in place, GEM vmap requires the caller to keep the buffer in place. Hence, this fix is in amdgpu's dma-buf code instead of its GEM code. A discussion of various approaches to solving the problem is available at [1]. v3: - try (GTT | VRAM); drop CPU domain (Christian) v2: - only use mapable domains (Christian) - try pinning to domains in preferred order Signed-off-by: Thomas Zimmermann Fixes: 660cd44659a0 ("drm/shmem-helper: Import dmabuf without mapping its sg_table") Reported-by: Thomas Zimmermann Closes: https://lore.kernel.org/dri-devel/ba1bdfb8-dbf7-4372-bdcb-df7e0511c702@suse.de/ Cc: Shixiong Ou Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Simona Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Link: https://lore.kernel.org/dri-devel/9792c6c3-a2b8-4b2b-b5ba-fba19b153e21@suse.de/ # [1] Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250821064031.39090-1-tzimmermann@suse.de --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 34 +++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 5743ebb2f1b7..ce27cb5bb05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -285,6 +285,36 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return ret; } +static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int ret; + + /* + * Pin to keep buffer in place while it's vmap'ed. The actual + * domain is not that important as long as it's mapable. Using + * GTT and VRAM should be compatible with most use cases. + */ + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM); + if (ret) + return ret; + ret = drm_gem_dmabuf_vmap(dma_buf, map); + if (ret) + amdgpu_bo_unpin(bo); + + return ret; +} + +static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + drm_gem_dmabuf_vunmap(dma_buf, map); + amdgpu_bo_unpin(bo); +} + const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_dma_buf_attach, .pin = amdgpu_dma_buf_pin, @@ -294,8 +324,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, - .vmap = drm_gem_dmabuf_vmap, - .vunmap = drm_gem_dmabuf_vunmap, + .vmap = amdgpu_dma_buf_vmap, + .vunmap = amdgpu_dma_buf_vunmap, }; /** From 563fcd6475931c5c8c652a4dd548256314cc87ed Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 22 Aug 2025 14:14:18 +0200 Subject: [PATCH 0729/1307] pinctrl: airoha: Fix return value in pinconf callbacks Pinctrl stack requires ENOTSUPP error code if the parameter is not supported by the pinctrl driver. Fix the returned error code in pinconf callbacks if the operation is not supported. Fixes: 1c8ace2d0725 ("pinctrl: airoha: Add support for EN7581 SoC") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/20250822-airoha-pinconf-err-val-fix-v1-1-87b4f264ced2@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-airoha.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 5f1ec9e0de21..1b2f132d76f0 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -2696,7 +2696,7 @@ static int airoha_pinconf_get(struct pinctrl_dev *pctrl_dev, arg = 1; break; default: - return -EOPNOTSUPP; + return -ENOTSUPP; } *config = pinconf_to_config_packed(param, arg); @@ -2788,7 +2788,7 @@ static int airoha_pinconf_set(struct pinctrl_dev *pctrl_dev, break; } default: - return -EOPNOTSUPP; + return -ENOTSUPP; } } @@ -2805,10 +2805,10 @@ static int airoha_pinconf_group_get(struct pinctrl_dev *pctrl_dev, if (airoha_pinconf_get(pctrl_dev, airoha_pinctrl_groups[group].pins[i], config)) - return -EOPNOTSUPP; + return -ENOTSUPP; if (i && cur_config != *config) - return -EOPNOTSUPP; + return -ENOTSUPP; cur_config = *config; } From 9c6182843b0d02ca04cc1d946954a65a2286c7db Mon Sep 17 00:00:00 2001 From: Cryolitia PukNgae Date: Fri, 22 Aug 2025 20:58:08 +0800 Subject: [PATCH 0730/1307] ALSA: usb-audio: Add mute TLV for playback volumes on some devices Applying the quirk of that, the lowest Playback mixer volume setting mutes the audio output, on more devices. Link: https://gitlab.freedesktop.org/pipewire/pipewire/-/merge_requests/2514 Cc: Tested-by: Guoli An Signed-off-by: Cryolitia PukNgae Link: https://patch.msgid.link/20250822-mixer-quirk-v1-1-b19252239c1c@uniontech.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 7cc27ae5512f..6b47b3145d2c 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -4609,9 +4609,11 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, snd_dragonfly_quirk_db_scale(mixer, cval, kctl); break; /* lowest playback value is muted on some devices */ + case USB_ID(0x0572, 0x1b09): /* Conexant Systems (Rockwell), Inc. */ case USB_ID(0x0d8c, 0x000c): /* C-Media */ case USB_ID(0x0d8c, 0x0014): /* C-Media */ case USB_ID(0x19f7, 0x0003): /* RODE NT-USB */ + case USB_ID(0x2d99, 0x0026): /* HECATE G2 GAMING HEADSET */ if (strstr(kctl->id.name, "Playback")) cval->min_mute = 1; break; From dc88b77113d75a8fd5818355f8e313bec144ea5d Mon Sep 17 00:00:00 2001 From: Brady Norander Date: Wed, 20 Aug 2025 21:47:30 -0400 Subject: [PATCH 0731/1307] ALSA: hda: intel-dsp-config: Select SOF driver on MTL Chromebooks The SOF driver is required for functional audio on MTL Chromebooks Signed-off-by: Brady Norander Link: https://patch.msgid.link/20250821014730.8843-1-bradynorander@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/core/intel-dsp-config.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/sound/hda/core/intel-dsp-config.c b/sound/hda/core/intel-dsp-config.c index 3cb1e7fc3b3b..00f184917623 100644 --- a/sound/hda/core/intel-dsp-config.c +++ b/sound/hda/core/intel-dsp-config.c @@ -167,9 +167,9 @@ static const struct config_entry config_table[] = { /* * CoffeeLake, CannonLake, CometLake, IceLake, TigerLake, AlderLake, - * RaptorLake use legacy HDAudio driver except for Google Chromebooks - * and when DMICs are present. Two cases are required since Coreboot - * does not expose NHLT tables. + * RaptorLake, MeteorLake use legacy HDAudio driver except for Google + * Chromebooks and when DMICs are present. Two cases are required since + * Coreboot does not expose NHLT tables. * * When the Chromebook quirk is not present, it's based on information * that no such device exists. When the quirk is present, it could be @@ -516,6 +516,19 @@ static const struct config_entry config_table[] = { /* Meteor Lake */ #if IS_ENABLED(CONFIG_SND_SOC_SOF_METEORLAKE) /* Meteorlake-P */ + { + .flags = FLAG_SOF, + .device = PCI_DEVICE_ID_INTEL_HDA_MTL, + .dmi_table = (const struct dmi_system_id []) { + { + .ident = "Google Chromebooks", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Google"), + } + }, + {} + } + }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = PCI_DEVICE_ID_INTEL_HDA_MTL, From 5d7eba62e5eb68347de59b31b347b24f304cf21c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 22 Aug 2025 13:40:18 -0400 Subject: [PATCH 0732/1307] Bluetooth: hci_conn: Make unacked packet handling more robust This attempts to make unacked packet handling more robust by detecting if there are no connections left then restore all buffers of the respective pool. Fixes: 5638d9ea9c01 ("Bluetooth: hci_conn: Fix not restoring ISO buffer count on disconnect") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 58 ++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7a879290dd28..e524bb59bff2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -149,8 +149,6 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_chan_list_flush(conn); - hci_conn_hash_del(hdev, conn); - if (HCI_CONN_HANDLE_UNSET(conn->handle)) ida_free(&hdev->unset_handle_ida, conn->handle); @@ -1152,28 +1150,54 @@ void hci_conn_del(struct hci_conn *conn) disable_delayed_work_sync(&conn->auto_accept_work); disable_delayed_work_sync(&conn->idle_work); - if (conn->type == ACL_LINK) { - /* Unacked frames */ - hdev->acl_cnt += conn->sent; - } else if (conn->type == LE_LINK) { - cancel_delayed_work(&conn->le_conn_timeout); + /* Remove the connection from the list so unacked logic can detect when + * a certain pool is not being utilized. + */ + hci_conn_hash_del(hdev, conn); - if (hdev->le_pkts) - hdev->le_cnt += conn->sent; + /* Handle unacked frames: + * + * - In case there are no connection, or if restoring the buffers + * considered in transist would overflow, restore all buffers to the + * pool. + * - Otherwise restore just the buffers considered in transit for the + * hci_conn + */ + switch (conn->type) { + case ACL_LINK: + if (!hci_conn_num(hdev, ACL_LINK) || + hdev->acl_cnt + conn->sent > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; else hdev->acl_cnt += conn->sent; - } else { - /* Unacked ISO frames */ - if (conn->type == CIS_LINK || - conn->type == BIS_LINK || - conn->type == PA_LINK) { - if (hdev->iso_pkts) - hdev->iso_cnt += conn->sent; - else if (hdev->le_pkts) + break; + case LE_LINK: + cancel_delayed_work(&conn->le_conn_timeout); + + if (hdev->le_pkts) { + if (!hci_conn_num(hdev, LE_LINK) || + hdev->le_cnt + conn->sent > hdev->le_pkts) + hdev->le_cnt = hdev->le_pkts; + else hdev->le_cnt += conn->sent; + } else { + if ((!hci_conn_num(hdev, LE_LINK) && + !hci_conn_num(hdev, ACL_LINK)) || + hdev->acl_cnt + conn->sent > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; else hdev->acl_cnt += conn->sent; } + break; + case CIS_LINK: + case BIS_LINK: + case PA_LINK: + if (!hci_iso_count(hdev) || + hdev->iso_cnt + conn->sent > hdev->iso_pkts) + hdev->iso_cnt = hdev->iso_pkts; + else + hdev->iso_cnt += conn->sent; + break; } skb_queue_purge(&conn->data_q); From 2f050a5392b7a0928bf836d9891df4851463512c Mon Sep 17 00:00:00 2001 From: Ludovico de Nittis Date: Tue, 12 Aug 2025 17:55:26 +0200 Subject: [PATCH 0733/1307] Bluetooth: hci_event: Treat UNKNOWN_CONN_ID on disconnect as success When the host sends an HCI_OP_DISCONNECT command, the controller may respond with the status HCI_ERROR_UNKNOWN_CONN_ID (0x02). E.g. this can happen on resume from suspend, if the link was terminated by the remote device before the event mask was correctly set. This is a btmon snippet that shows the issue: ``` > ACL Data RX: Handle 3 flags 0x02 dlen 12 L2CAP: Disconnection Request (0x06) ident 5 len 4 Destination CID: 65 Source CID: 72 < ACL Data TX: Handle 3 flags 0x00 dlen 12 L2CAP: Disconnection Response (0x07) ident 5 len 4 Destination CID: 65 Source CID: 72 > ACL Data RX: Handle 3 flags 0x02 dlen 12 L2CAP: Disconnection Request (0x06) ident 6 len 4 Destination CID: 64 Source CID: 71 < ACL Data TX: Handle 3 flags 0x00 dlen 12 L2CAP: Disconnection Response (0x07) ident 6 len 4 Destination CID: 64 Source CID: 71 < HCI Command: Set Event Mask (0x03|0x0001) plen 8 Mask: 0x3dbff807fffbffff Inquiry Complete Inquiry Result Connection Complete Connection Request Disconnection Complete Authentication Complete [...] < HCI Command: Disconnect (0x01|0x0006) plen 3 Handle: 3 Address: 78:20:A5:4A:DF:28 (Nintendo Co.,Ltd) Reason: Remote User Terminated Connection (0x13) > HCI Event: Command Status (0x0f) plen 4 Disconnect (0x01|0x0006) ncmd 1 Status: Unknown Connection Identifier (0x02) ``` Currently, the hci_cs_disconnect function treats any non-zero status as a command failure. This can be misleading because the connection is indeed being terminated and the controller is confirming that is has no knowledge of that connection handle. Meaning that the initial request of disconnecting a device should be treated as done. With this change we allow the function to proceed, following the success path, which correctly calls `mgmt_device_disconnected` and ensures a consistent state. Link: https://github.com/bluez/bluez/issues/1226 Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier") Signed-off-by: Ludovico de Nittis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fe7cdd67ad2a..6c67dfa139e2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2703,7 +2703,7 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) if (!conn) goto unlock; - if (status) { + if (status && status != HCI_ERROR_UNKNOWN_CONN_ID) { mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); From b7fafbc499b5ee164018eb0eefe9027f5a6aaad2 Mon Sep 17 00:00:00 2001 From: Ludovico de Nittis Date: Tue, 12 Aug 2025 17:55:27 +0200 Subject: [PATCH 0734/1307] Bluetooth: hci_event: Mark connection as closed during suspend disconnect When suspending, the disconnect command for an active Bluetooth connection could be issued, but the corresponding `HCI_EV_DISCONN_COMPLETE` event might not be received before the system completes the suspend process. This can lead to an inconsistent state. On resume, the controller may auto-accept reconnections from the same device (due to suspend event filters), but these new connections are rejected by the kernel which still has connection objects from before suspend. Resulting in errors like: ``` kernel: Bluetooth: hci0: ACL packet for unknown connection handle 1 kernel: Bluetooth: hci0: Ignoring HCI_Connection_Complete for existing connection ``` This is a btmon snippet that shows the issue: ``` < HCI Command: Disconnect (0x01|0x0006) plen 3 Handle: 1 Address: 78:20:A5:4A:DF:28 (Nintendo Co.,Ltd) Reason: Remote User Terminated Connection (0x13) > HCI Event: Command Status (0x0f) plen 4 Disconnect (0x01|0x0006) ncmd 2 Status: Success (0x00) [...] // Host suspends with the event filter set for the device // On resume, the device tries to reconnect with a new handle > HCI Event: Connect Complete (0x03) plen 11 Status: Success (0x00) Handle: 2 Address: 78:20:A5:4A:DF:28 (Nintendo Co.,Ltd) // Kernel ignores this event because there is an existing connection with // handle 1 ``` By explicitly setting the connection state to BT_CLOSED we can ensure a consistent state, even if we don't receive the disconnect complete event in time. Link: https://github.com/bluez/bluez/issues/1226 Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier") Signed-off-by: Ludovico de Nittis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6c67dfa139e2..ce0ff06f2f73 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2718,6 +2718,12 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) goto done; } + /* During suspend, mark connection as closed immediately + * since we might not receive HCI_EV_DISCONN_COMPLETE + */ + if (hdev->suspended) + conn->state = BT_CLOSED; + mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags); if (conn->type == ACL_LINK) { From 15bf2c6391bafb14a3020d06ec0761bce0803463 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 20 Aug 2025 17:04:00 -0400 Subject: [PATCH 0735/1307] Bluetooth: hci_event: Detect if HCI_EV_NUM_COMP_PKTS is unbalanced This attempts to detect if HCI_EV_NUM_COMP_PKTS contain an unbalanced (more than currently considered outstanding) number of packets otherwise it could cause the hcon->sent to underflow and loop around breaking the tracking of the outstanding packets pending acknowledgment. Fixes: f42809185896 ("Bluetooth: Simplify num_comp_pkts_evt function") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ce0ff06f2f73..904bcff4f4ca 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4404,7 +4404,17 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, if (!conn) continue; - conn->sent -= count; + /* Check if there is really enough packets outstanding before + * attempting to decrease the sent counter otherwise it could + * underflow.. + */ + if (conn->sent >= count) { + conn->sent -= count; + } else { + bt_dev_warn(hdev, "hcon %p sent %u < count %u", + conn, conn->sent, count); + conn->sent = 0; + } for (i = 0; i < count; ++i) hci_conn_tx_dequeue(conn); From 55b9551fcdf6a2fe7f3422918d5697b56794da72 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 20 Aug 2025 10:16:17 +0800 Subject: [PATCH 0736/1307] Bluetooth: hci_event: Disconnect device when BIG sync is lost When a BIG sync is lost, the device should be set to "disconnected". This ensures symmetry with the ISO path setup, where the device is marked as "connected" once the path is established. Without this change, the device state remains inconsistent and may lead to a memory leak. Fixes: b2a5f2e1c127 ("Bluetooth: hci_event: Add support for handling LE BIG Sync Lost event") Signed-off-by: Yang Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 5 +++++ net/bluetooth/mgmt.c | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 904bcff4f4ca..7a2174851857 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7024,6 +7024,7 @@ static void hci_le_big_sync_lost_evt(struct hci_dev *hdev, void *data, { struct hci_evt_le_big_sync_lost *ev = data; struct hci_conn *bis, *conn; + bool mgmt_conn; bt_dev_dbg(hdev, "big handle 0x%2.2x", ev->handle); @@ -7042,6 +7043,10 @@ static void hci_le_big_sync_lost_evt(struct hci_dev *hdev, void *data, while ((bis = hci_conn_hash_lookup_big_state(hdev, ev->handle, BT_CONNECTED, HCI_ROLE_SLAVE))) { + mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &bis->flags); + mgmt_device_disconnected(hdev, &bis->dst, bis->type, bis->dst_type, + ev->reason, mgmt_conn); + clear_bit(HCI_CONN_BIG_SYNC, &bis->flags); hci_disconn_cfm(bis, ev->reason); hci_conn_del(bis); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3166f5fb876b..90e37ff2c85d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9705,7 +9705,9 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, if (!mgmt_connected) return; - if (link_type != ACL_LINK && link_type != LE_LINK) + if (link_type != ACL_LINK && + link_type != LE_LINK && + link_type != BIS_LINK) return; bacpy(&ev.addr.bdaddr, bdaddr); From 6bbd0d3f0c23fc53c17409dd7476f38ae0ff0cd9 Mon Sep 17 00:00:00 2001 From: Pavel Shpakovskiy Date: Fri, 22 Aug 2025 12:20:55 +0300 Subject: [PATCH 0737/1307] Bluetooth: hci_sync: fix set_local_name race condition Function set_name_sync() uses hdev->dev_name field to send HCI_OP_WRITE_LOCAL_NAME command, but copying from data to hdev->dev_name is called after mgmt cmd was queued, so it is possible that function set_name_sync() will read old name value. This change adds name as a parameter for function hci_update_name_sync() to avoid race condition. Fixes: 6f6ff38a1e14 ("Bluetooth: hci_sync: Convert MGMT_OP_SET_LOCAL_NAME") Signed-off-by: Pavel Shpakovskiy Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 +- net/bluetooth/hci_sync.c | 6 +++--- net/bluetooth/mgmt.c | 5 ++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 5224f57f6af2..e352a4e0ef8d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -93,7 +93,7 @@ int hci_update_class_sync(struct hci_dev *hdev); int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); -int hci_update_name_sync(struct hci_dev *hdev); +int hci_update_name_sync(struct hci_dev *hdev, const u8 *name); int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 31d72b9683ef..b6f888d8354e 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3481,13 +3481,13 @@ int hci_update_scan_sync(struct hci_dev *hdev) return hci_write_scan_enable_sync(hdev, scan); } -int hci_update_name_sync(struct hci_dev *hdev) +int hci_update_name_sync(struct hci_dev *hdev, const u8 *name) { struct hci_cp_write_local_name cp; memset(&cp, 0, sizeof(cp)); - memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); + memcpy(cp.name, name, sizeof(cp.name)); return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp, @@ -3540,7 +3540,7 @@ int hci_powered_update_sync(struct hci_dev *hdev) hci_write_fast_connectable_sync(hdev, false); hci_update_scan_sync(hdev); hci_update_class_sync(hdev); - hci_update_name_sync(hdev); + hci_update_name_sync(hdev, hdev->dev_name); hci_update_eir_sync(hdev); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 90e37ff2c85d..50634ef5c8b7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3892,8 +3892,11 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err) static int set_name_sync(struct hci_dev *hdev, void *data) { + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_set_local_name *cp = cmd->param; + if (lmp_bredr_capable(hdev)) { - hci_update_name_sync(hdev); + hci_update_name_sync(hdev, cp->name); hci_update_eir_sync(hdev); } From 7b28232921782aa38048249132899c337405eaa8 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sun, 17 Aug 2025 14:49:06 +0200 Subject: [PATCH 0738/1307] mips: dts: lantiq: danube: add missing burst length property The upstream dts lacks the lantiq,{rx/tx}-burst-length property. Other issues were also fixed: arch/mips/boot/dts/lantiq/danube_easy50712.dtb: etop@e180000 (lantiq,etop-xway): 'interrupt-names' is a required property from schema $id: http://devicetree.org/schemas/net/lantiq,etop-xway.yaml# arch/mips/boot/dts/lantiq/danube_easy50712.dtb: etop@e180000 (lantiq,etop-xway): 'lantiq,tx-burst-length' is a required property from schema $id: http://devicetree.org/schemas/net/lantiq,etop-xway.yaml# arch/mips/boot/dts/lantiq/danube_easy50712.dtb: etop@e180000 (lantiq,etop-xway): 'lantiq,rx-burst-length' is a required property from schema $id: http://devicetree.org/schemas/net/lantiq,etop-xway.yaml# Fixes: 14d4e308e0aa ("net: lantiq: configure the burst length in ethernet drivers") Signed-off-by: Aleksander Jan Bajkowski Acked-by: Jakub Kicinski --- arch/mips/boot/dts/lantiq/danube_easy50712.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/boot/dts/lantiq/danube_easy50712.dts b/arch/mips/boot/dts/lantiq/danube_easy50712.dts index 1ce20b7d05cb..d8b3cd69eda3 100644 --- a/arch/mips/boot/dts/lantiq/danube_easy50712.dts +++ b/arch/mips/boot/dts/lantiq/danube_easy50712.dts @@ -87,8 +87,11 @@ etop@e180000 { reg = <0xe180000 0x40000>; interrupt-parent = <&icu0>; interrupts = <73 78>; + interrupt-names = "tx", "rx"; phy-mode = "rmii"; mac-address = [ 00 11 22 33 44 55 ]; + lantiq,rx-burst-length = <4>; + lantiq,tx-burst-length = <4>; }; stp0: stp@e100bb0 { From 8c431ea8f3f795c4b9cfa57a85bc4166b9cce0ac Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sun, 17 Aug 2025 14:49:07 +0200 Subject: [PATCH 0739/1307] mips: lantiq: xway: sysctrl: rename the etop node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bindig requires a node name matching ‘^ethernet@[0-9a-f]+$’. This patch changes the clock name from “etop” to “ethernet”. This fixes the following warning: arch/mips/boot/dts/lantiq/danube_easy50712.dtb: etop@e180000 (lantiq,etop-xway): $nodename:0: 'etop@e180000' does not match '^ethernet@[0-9a-f]+$' from schema $id: http://devicetree.org/schemas/net/lantiq,etop-xway.yaml# Fixes: dac0bad93741 ("dt-bindings: net: lantiq,etop-xway: Document Lantiq Xway ETOP bindings") Signed-off-by: Aleksander Jan Bajkowski Acked-by: Jakub Kicinski --- arch/mips/boot/dts/lantiq/danube_easy50712.dts | 2 +- arch/mips/lantiq/xway/sysctrl.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/mips/boot/dts/lantiq/danube_easy50712.dts b/arch/mips/boot/dts/lantiq/danube_easy50712.dts index d8b3cd69eda3..c4d7aa5753b0 100644 --- a/arch/mips/boot/dts/lantiq/danube_easy50712.dts +++ b/arch/mips/boot/dts/lantiq/danube_easy50712.dts @@ -82,7 +82,7 @@ conf_out { }; }; - etop@e180000 { + ethernet@e180000 { compatible = "lantiq,etop-xway"; reg = <0xe180000 0x40000>; interrupt-parent = <&icu0>; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 5a75283d17f1..6031a0272d87 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -497,7 +497,7 @@ void __init ltq_soc_init(void) ifccr = CGU_IFCCR_VR9; pcicr = CGU_PCICR_VR9; } else { - clkdev_add_pmu("1e180000.etop", NULL, 1, 0, PMU_PPE); + clkdev_add_pmu("1e180000.ethernet", NULL, 1, 0, PMU_PPE); } if (!of_machine_is_compatible("lantiq,ase")) @@ -531,9 +531,9 @@ void __init ltq_soc_init(void) CLOCK_133M, CLOCK_133M); clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0); clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P); - clkdev_add_pmu("1e180000.etop", "ppe", 1, 0, PMU_PPE); - clkdev_add_cgu("1e180000.etop", "ephycgu", CGU_EPHY); - clkdev_add_pmu("1e180000.etop", "ephy", 1, 0, PMU_EPHY); + clkdev_add_pmu("1e180000.ethernet", "ppe", 1, 0, PMU_PPE); + clkdev_add_cgu("1e180000.ethernet", "ephycgu", CGU_EPHY); + clkdev_add_pmu("1e180000.ethernet", "ephy", 1, 0, PMU_EPHY); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_ASE_SDIO); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); } else if (of_machine_is_compatible("lantiq,grx390")) { @@ -592,7 +592,7 @@ void __init ltq_soc_init(void) clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0 | PMU_AHBM); clkdev_add_pmu("1f203034.usb2-phy", "phy", 1, 0, PMU_USB1_P); clkdev_add_pmu("1e106000.usb", "otg", 1, 0, PMU_USB1 | PMU_AHBM); - clkdev_add_pmu("1e180000.etop", "switch", 1, 0, PMU_SWITCH); + clkdev_add_pmu("1e180000.ethernet", "switch", 1, 0, PMU_SWITCH); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); From 80af3745ca465c6c47e833c1902004a7fa944f37 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Aug 2025 11:08:46 +0300 Subject: [PATCH 0740/1307] of: dynamic: Fix use after free in of_changeset_add_prop_helper() If the of_changeset_add_property() function call fails, then this code frees "new_pp" and then dereference it on the next line. Return the error code directly instead. Fixes: c81f6ce16785 ("of: dynamic: Fix memleak when of_pci_add_properties() failed") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/aKgljjhnpa4lVpdx@stanley.mountain Signed-off-by: Rob Herring (Arm) --- drivers/of/dynamic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index dd30b7d8b5e4..2eaaddcb0ec4 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -935,13 +935,15 @@ static int of_changeset_add_prop_helper(struct of_changeset *ocs, return -ENOMEM; ret = of_changeset_add_property(ocs, np, new_pp); - if (ret) + if (ret) { __of_prop_free(new_pp); + return ret; + } new_pp->next = np->deadprops; np->deadprops = new_pp; - return ret; + return 0; } /** From e3d01979e4bff5c87eb4054a22e7568bb679b1fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 Aug 2025 19:55:22 -0400 Subject: [PATCH 0741/1307] fgraph: Copy args in intermediate storage with entry The output of the function graph tracer has two ways to display its entries. One way for leaf functions with no events recorded within them, and the other is for functions with events recorded inside it. As function graph has an entry and exit event, to simplify the output of leaf functions it combines the two, where as non leaf functions are separate: 2) | invoke_rcu_core() { 2) | raise_softirq() { 2) 0.391 us | __raise_softirq_irqoff(); 2) 1.191 us | } 2) 2.086 us | } The __raise_softirq_irqoff() function above is really two events that were merged into one. Otherwise it would have looked like: 2) | invoke_rcu_core() { 2) | raise_softirq() { 2) | __raise_softirq_irqoff() { 2) 0.391 us | } 2) 1.191 us | } 2) 2.086 us | } In order to do this merge, the reading of the trace output file needs to look at the next event before printing. But since the pointer to the event is on the ring buffer, it needs to save the entry event before it looks at the next event as the next event goes out of focus as soon as a new event is read from the ring buffer. After it reads the next event, it will print the entry event with either the '{' (non leaf) or ';' and timestamps (leaf). The iterator used to read the trace file has storage for this event. The problem happens when the function graph tracer has arguments attached to the entry event as the entry now has a variable length "args" field. This field only gets set when funcargs option is used. But the args are not recorded in this temp data and garbage could be printed. The entry field is copied via: data->ent = *curr; Where "curr" is the entry field. But this method only saves the non variable length fields from the structure. Add a helper structure to the iterator data that adds the max args size to the data storage in the iterator. Then simply copy the entire entry into this storage (with size protection). Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Link: https://lore.kernel.org/20250820195522.51d4a268@gandalf.local.home Reported-by: Sasha Levin Tested-by: Sasha Levin Closes: https://lore.kernel.org/all/aJaxRVKverIjF4a6@lappy/ Fixes: ff5c9c576e75 ("ftrace: Add support for function argument to graph tracer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 66e1a527cf1a..a7f4b9a47a71 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -27,14 +27,21 @@ struct fgraph_cpu_data { unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; }; +struct fgraph_ent_args { + struct ftrace_graph_ent_entry ent; + /* Force the sizeof of args[] to have FTRACE_REGS_MAX_ARGS entries */ + unsigned long args[FTRACE_REGS_MAX_ARGS]; +}; + struct fgraph_data { struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ union { - struct ftrace_graph_ent_entry ent; + struct fgraph_ent_args ent; + /* TODO allow retaddr to have args */ struct fgraph_retaddr_ent_entry rent; - } ent; + }; struct ftrace_graph_ret_entry ret; int failed; int cpu; @@ -627,10 +634,13 @@ get_return_for_leaf(struct trace_iterator *iter, * Save current and next entries for later reference * if the output fails. */ - if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) - data->ent.rent = *(struct fgraph_retaddr_ent_entry *)curr; - else - data->ent.ent = *curr; + if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) { + data->rent = *(struct fgraph_retaddr_ent_entry *)curr; + } else { + int size = min((int)sizeof(data->ent), (int)iter->ent_size); + + memcpy(&data->ent, curr, size); + } /* * If the next event is not a return type, then * we only care about what type it is. Otherwise we can From 4013aef2ced9b756a410f50d12df9ebe6a883e4a Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Fri, 22 Aug 2025 03:33:43 +0000 Subject: [PATCH 0742/1307] ftrace: Fix potential warning in trace_printk_seq during ftrace_dump When calling ftrace_dump_one() concurrently with reading trace_pipe, a WARN_ON_ONCE() in trace_printk_seq() can be triggered due to a race condition. The issue occurs because: CPU0 (ftrace_dump) CPU1 (reader) echo z > /proc/sysrq-trigger !trace_empty(&iter) trace_iterator_reset(&iter) <- len = size = 0 cat /sys/kernel/tracing/trace_pipe trace_find_next_entry_inc(&iter) __find_next_entry ring_buffer_empty_cpu <- all empty return NULL trace_printk_seq(&iter.seq) WARN_ON_ONCE(s->seq.len >= s->seq.size) In the context between trace_empty() and trace_find_next_entry_inc() during ftrace_dump, the ring buffer data was consumed by other readers. This caused trace_find_next_entry_inc to return NULL, failing to populate `iter.seq`. At this point, due to the prior trace_iterator_reset, both `iter.seq.len` and `iter.seq.size` were set to 0. Since they are equal, the WARN_ON_ONCE condition is triggered. Move the trace_printk_seq() into the if block that checks to make sure the return value of trace_find_next_entry_inc() is non-NULL in ftrace_dump_one(), ensuring the 'iter.seq' is properly populated before subsequent operations. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Ingo Molnar Link: https://lore.kernel.org/20250822033343.3000289-1-wutengda@huaweicloud.com Fixes: d769041f8653 ("ring_buffer: implement new locking") Signed-off-by: Tengda Wu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d8935ed416d..1b7db732c0b1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10638,10 +10638,10 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m ret = print_trace_line(&iter); if (ret != TRACE_TYPE_NO_CONSUME) trace_consume(&iter); + + trace_printk_seq(&iter.seq); } touch_nmi_watchdog(); - - trace_printk_seq(&iter.seq); } if (!cnt) From abadf0ff63be488dc502ecfc9f622929a21b7117 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 21 Aug 2025 03:03:46 +0000 Subject: [PATCH 0743/1307] page_pool: fix incorrect mp_ops error handling Minor fix to the memory provider error handling, we should be jumping to free_ptr_ring in this error case rather than returning directly. Found by code-inspection. Cc: skhawaja@google.com Fixes: b400f4b87430 ("page_pool: Set `dma_sync` to false for devmem memory provider") Signed-off-by: Mina Almasry Reviewed-by: Samiullah Khawaja Link: https://patch.msgid.link/20250821030349.705244-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 343a6cac21e3..ba70569bd4b0 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -287,8 +287,10 @@ static int page_pool_init(struct page_pool *pool, } if (pool->mp_ops) { - if (!pool->dma_map || !pool->dma_sync) - return -EOPNOTSUPP; + if (!pool->dma_map || !pool->dma_sync) { + err = -EOPNOTSUPP; + goto free_ptr_ring; + } if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) { err = -EFAULT; From a64494aafc56939564e3e9e57f99df5c27204e04 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Thu, 21 Aug 2025 11:55:28 +0530 Subject: [PATCH 0744/1307] Octeontx2-vf: Fix max packet length errors Once driver submits the packets to the hardware, each packet traverse through multiple transmit levels in the following order: SMQ -> TL4 -> TL3 -> TL2 -> TL1 The SMQ supports configurable minimum and maximum packet sizes. It enters to a hang state, if driver submits packets with out of bound lengths. To avoid the same, implement packet length validation before submitting packets to the hardware. Increment tx_dropped counter on failure. Fixes: 3184fb5ba96e ("octeontx2-vf: Virtual function driver support") Fixes: 22f858796758 ("octeontx2-pf: Add basic net_device_ops") Fixes: 3ca6c4c882a7 ("octeontx2-pf: Add packet transmission support") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20250821062528.1697992-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 4 +++- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 1 + .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +++ .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 10 ++++++++++ drivers/net/ethernet/marvell/octeontx2/nic/rep.c | 13 ++++++++++++- drivers/net/ethernet/marvell/octeontx2/nic/rep.h | 1 + 6 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index f674729124e6..aff17c37ddde 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -124,7 +124,9 @@ void otx2_get_dev_stats(struct otx2_nic *pfvf) dev_stats->rx_ucast_frames; dev_stats->tx_bytes = OTX2_GET_TX_STATS(TX_OCTS); - dev_stats->tx_drops = OTX2_GET_TX_STATS(TX_DROP); + dev_stats->tx_drops = OTX2_GET_TX_STATS(TX_DROP) + + (unsigned long)atomic_long_read(&dev_stats->tx_discards); + dev_stats->tx_bcast_frames = OTX2_GET_TX_STATS(TX_BCAST); dev_stats->tx_mcast_frames = OTX2_GET_TX_STATS(TX_MCAST); dev_stats->tx_ucast_frames = OTX2_GET_TX_STATS(TX_UCAST); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index e3765b73c434..1c8a3c078a64 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -153,6 +153,7 @@ struct otx2_dev_stats { u64 tx_bcast_frames; u64 tx_mcast_frames; u64 tx_drops; + atomic_long_t tx_discards; }; /* Driver counted stats */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index b23585c5e5c2..5027fae0aa77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2220,6 +2220,7 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) { struct otx2_nic *pf = netdev_priv(netdev); int qidx = skb_get_queue_mapping(skb); + struct otx2_dev_stats *dev_stats; struct otx2_snd_queue *sq; struct netdev_queue *txq; int sq_idx; @@ -2232,6 +2233,8 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) /* Check for minimum and maximum packet length */ if (skb->len <= ETH_HLEN || (!skb_shinfo(skb)->gso_size && skb->len > pf->tx_max_pktlen)) { + dev_stats = &pf->hw.dev_stats; + atomic_long_inc(&dev_stats->tx_discards); dev_kfree_skb(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 5589fccd370b..7ebb6e656884 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -417,9 +417,19 @@ static netdev_tx_t otx2vf_xmit(struct sk_buff *skb, struct net_device *netdev) { struct otx2_nic *vf = netdev_priv(netdev); int qidx = skb_get_queue_mapping(skb); + struct otx2_dev_stats *dev_stats; struct otx2_snd_queue *sq; struct netdev_queue *txq; + /* Check for minimum and maximum packet length */ + if (skb->len <= ETH_HLEN || + (!skb_shinfo(skb)->gso_size && skb->len > vf->tx_max_pktlen)) { + dev_stats = &vf->hw.dev_stats; + atomic_long_inc(&dev_stats->tx_discards); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + sq = &vf->qset.sq[qidx]; txq = netdev_get_tx_queue(netdev, qidx); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 25af98034e2e..b476733a0234 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -371,7 +371,8 @@ static void rvu_rep_get_stats(struct work_struct *work) stats->rx_mcast_frames = rsp->rx.mcast; stats->tx_bytes = rsp->tx.octs; stats->tx_frames = rsp->tx.ucast + rsp->tx.bcast + rsp->tx.mcast; - stats->tx_drops = rsp->tx.drop; + stats->tx_drops = rsp->tx.drop + + (unsigned long)atomic_long_read(&stats->tx_discards); exit: mutex_unlock(&priv->mbox.lock); } @@ -418,6 +419,16 @@ static netdev_tx_t rvu_rep_xmit(struct sk_buff *skb, struct net_device *dev) struct otx2_nic *pf = rep->mdev; struct otx2_snd_queue *sq; struct netdev_queue *txq; + struct rep_stats *stats; + + /* Check for minimum and maximum packet length */ + if (skb->len <= ETH_HLEN || + (!skb_shinfo(skb)->gso_size && skb->len > pf->tx_max_pktlen)) { + stats = &rep->stats; + atomic_long_inc(&stats->tx_discards); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } sq = &pf->qset.sq[rep->rep_id]; txq = netdev_get_tx_queue(dev, 0); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.h b/drivers/net/ethernet/marvell/octeontx2/nic/rep.h index 38446b3e4f13..5bc9e2c7d800 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.h @@ -27,6 +27,7 @@ struct rep_stats { u64 tx_bytes; u64 tx_frames; u64 tx_drops; + atomic_long_t tx_discards; }; struct rep_dev { From e2fe0c54fb7401e6ecd3c10348519ab9e23bd639 Mon Sep 17 00:00:00 2001 From: James Jones Date: Mon, 11 Aug 2025 15:00:16 -0700 Subject: [PATCH 0745/1307] drm/nouveau/disp: Always accept linear modifier On some chipsets, which block-linear modifiers are supported is format-specific. However, linear modifiers are always be supported. The prior modifier filtering logic was not accounting for the linear case. Cc: stable@vger.kernel.org Fixes: c586f30bf74c ("drm/nouveau/kms: Add format mod prop to base/ovly/nvdisp") Signed-off-by: James Jones Link: https://lore.kernel.org/r/20250811220017.1337-3-jajones@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 11d5b923d6e7..e2c55f4b9c5a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -795,6 +795,10 @@ static bool nv50_plane_format_mod_supported(struct drm_plane *plane, struct nouveau_drm *drm = nouveau_drm(plane->dev); uint8_t i; + /* All chipsets can display all formats in linear layout */ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return true; + if (drm->client.device.info.chipset < 0xc0) { const struct drm_format_info *info = drm_format_info(format); const uint8_t kind = (modifier >> 12) & 0xff; From bfb336cf97df7b37b2b2edec0f69773e06d11955 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 22 Aug 2025 18:36:06 -0400 Subject: [PATCH 0746/1307] ftrace: Also allocate and copy hash for reading of filter files Currently the reader of set_ftrace_filter and set_ftrace_notrace just adds the pointer to the global tracer hash to its iterator. Unlike the writer that allocates a copy of the hash, the reader keeps the pointer to the filter hashes. This is problematic because this pointer is static across function calls that release the locks that can update the global tracer hashes. This can cause UAF and similar bugs. Allocate and copy the hash for reading the filter files like it is done for the writers. This not only fixes UAF bugs, but also makes the code a bit simpler as it doesn't have to differentiate when to free the iterator's hash between writers and readers. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Nathan Chancellor Cc: Linus Torvalds Link: https://lore.kernel.org/20250822183606.12962cc3@batman.local.home Fixes: c20489dad156 ("ftrace: Assign iter->hash to filter or notrace hashes on seq read") Closes: https://lore.kernel.org/all/20250813023044.2121943-1-wutengda@huaweicloud.com/ Closes: https://lore.kernel.org/all/20250822192437.GA458494@ax162/ Reported-by: Tengda Wu Tested-by: Tengda Wu Tested-by: Nathan Chancellor Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 00b76d450a89..a69067367c29 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4661,13 +4661,17 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, } else { iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash); } + } else { + if (hash) + iter->hash = alloc_and_copy_ftrace_hash(hash->size_bits, hash); + else + iter->hash = EMPTY_HASH; + } - if (!iter->hash) { - trace_parser_put(&iter->parser); - goto out_unlock; - } - } else - iter->hash = hash; + if (!iter->hash) { + trace_parser_put(&iter->parser); + goto out_unlock; + } ret = 0; @@ -6543,9 +6547,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file) ftrace_hash_move_and_update_ops(iter->ops, orig_hash, iter->hash, filter_hash); mutex_unlock(&ftrace_lock); - } else { - /* For read only, the hash is the ops hash */ - iter->hash = NULL; } mutex_unlock(&iter->ops->func_hash->regex_lock); From ec79003c5f9d2c7f9576fc69b8dbda80305cbe3a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 21 Aug 2025 02:18:24 +0000 Subject: [PATCH 0747/1307] atm: atmtcp: Prevent arbitrary write in atmtcp_recv_control(). syzbot reported the splat below. [0] When atmtcp_v_open() or atmtcp_v_close() is called via connect() or close(), atmtcp_send_control() is called to send an in-kernel special message. The message has ATMTCP_HDR_MAGIC in atmtcp_control.hdr.length. Also, a pointer of struct atm_vcc is set to atmtcp_control.vcc. The notable thing is struct atmtcp_control is uAPI but has a space for an in-kernel pointer. struct atmtcp_control { struct atmtcp_hdr hdr; /* must be first */ ... atm_kptr_t vcc; /* both directions */ ... } __ATM_API_ALIGN; typedef struct { unsigned char _[8]; } __ATM_API_ALIGN atm_kptr_t; The special message is processed in atmtcp_recv_control() called from atmtcp_c_send(). atmtcp_c_send() is vcc->dev->ops->send() and called from 2 paths: 1. .ndo_start_xmit() (vcc->send() == atm_send_aal0()) 2. vcc_sendmsg() The problem is sendmsg() does not validate the message length and userspace can abuse atmtcp_recv_control() to overwrite any kptr by atmtcp_control. Let's add a new ->pre_send() hook to validate messages from sendmsg(). [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc00200000ab: 0000 [#1] SMP KASAN PTI KASAN: probably user-memory-access in range [0x0000000100000558-0x000000010000055f] CPU: 0 UID: 0 PID: 5865 Comm: syz-executor331 Not tainted 6.17.0-rc1-syzkaller-00215-gbab3ce404553 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:atmtcp_recv_control drivers/atm/atmtcp.c:93 [inline] RIP: 0010:atmtcp_c_send+0x1da/0x950 drivers/atm/atmtcp.c:297 Code: 4d 8d 75 1a 4c 89 f0 48 c1 e8 03 42 0f b6 04 20 84 c0 0f 85 15 06 00 00 41 0f b7 1e 4d 8d b7 60 05 00 00 4c 89 f0 48 c1 e8 03 <42> 0f b6 04 20 84 c0 0f 85 13 06 00 00 66 41 89 1e 4d 8d 75 1c 4c RSP: 0018:ffffc90003f5f810 EFLAGS: 00010203 RAX: 00000000200000ab RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88802a510000 RSI: 00000000ffffffff RDI: ffff888030a6068c RBP: ffff88802699fb40 R08: ffff888030a606eb R09: 1ffff1100614c0dd R10: dffffc0000000000 R11: ffffffff8718fc40 R12: dffffc0000000000 R13: ffff888030a60680 R14: 000000010000055f R15: 00000000ffffffff FS: 00007f8d7e9236c0(0000) GS:ffff888125c1c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000045ad50 CR3: 0000000075bde000 CR4: 00000000003526f0 Call Trace: vcc_sendmsg+0xa10/0xc60 net/atm/common.c:645 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:729 ____sys_sendmsg+0x505/0x830 net/socket.c:2614 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668 __sys_sendmsg net/socket.c:2700 [inline] __do_sys_sendmsg net/socket.c:2705 [inline] __se_sys_sendmsg net/socket.c:2703 [inline] __x64_sys_sendmsg+0x19b/0x260 net/socket.c:2703 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f8d7e96a4a9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f8d7e923198 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f8d7e9f4308 RCX: 00007f8d7e96a4a9 RDX: 0000000000000000 RSI: 0000200000000240 RDI: 0000000000000005 RBP: 00007f8d7e9f4300 R08: 65732f636f72702f R09: 65732f636f72702f R10: 65732f636f72702f R11: 0000000000000246 R12: 00007f8d7e9c10ac R13: 00007f8d7e9231a0 R14: 0000200000000200 R15: 0000200000000250 Modules linked in: Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68a6767c.050a0220.3d78fd.0011.GAE@google.com/ Tested-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250821021901.2814721-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- drivers/atm/atmtcp.c | 17 ++++++++++++++--- include/linux/atmdev.h | 1 + net/atm/common.c | 15 ++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index eeae160c898d..fa3c76a2b49d 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -279,6 +279,19 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) return NULL; } +static int atmtcp_c_pre_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + struct atmtcp_hdr *hdr; + + if (skb->len < sizeof(struct atmtcp_hdr)) + return -EINVAL; + + hdr = (struct atmtcp_hdr *)skb->data; + if (hdr->length == ATMTCP_HDR_MAGIC) + return -EINVAL; + + return 0; +} static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) { @@ -288,9 +301,6 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) struct sk_buff *new_skb; int result = 0; - if (skb->len < sizeof(struct atmtcp_hdr)) - goto done; - dev = vcc->dev_data; hdr = (struct atmtcp_hdr *) skb->data; if (hdr->length == ATMTCP_HDR_MAGIC) { @@ -347,6 +357,7 @@ static const struct atmdev_ops atmtcp_v_dev_ops = { static const struct atmdev_ops atmtcp_c_dev_ops = { .close = atmtcp_c_close, + .pre_send = atmtcp_c_pre_send, .send = atmtcp_c_send }; diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 45f2f278b50a..70807c679f1a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -185,6 +185,7 @@ struct atmdev_ops { /* only send is required */ int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, void __user *arg); #endif + int (*pre_send)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); diff --git a/net/atm/common.c b/net/atm/common.c index d7f7976ea13a..881c7f259dbd 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -635,18 +635,27 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) skb->dev = NULL; /* for paths shared with net_device interfaces */ if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { - atm_return_tx(vcc, skb); - kfree_skb(skb); error = -EFAULT; - goto out; + goto free_skb; } if (eff != size) memset(skb->data + size, 0, eff-size); + + if (vcc->dev->ops->pre_send) { + error = vcc->dev->ops->pre_send(vcc, skb); + if (error) + goto free_skb; + } + error = vcc->dev->ops->send(vcc, skb); error = error ? error : size; out: release_sock(sk); return error; +free_skb: + atm_return_tx(vcc, skb); + kfree_skb(skb); + goto out; } __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) From c2bac68067bba5edda09112c09f2f670792dcdc8 Mon Sep 17 00:00:00 2001 From: Edgar Bonet Date: Thu, 14 Aug 2025 14:59:42 +0200 Subject: [PATCH 0748/1307] irqchip/atmel-aic[5]: Fix incorrect lock guard conversion Commit b00bee8afaca ("irqchip: Convert generic irqchip locking to guards") replaced calls to irq_gc_lock_irq{save,restore}() with guard(raw_spinlock_irq). However, in irq-atmel-aic5.c and irq-atmel-aic.c, the xlate callback is used in the early boot process, before interrupts are initially enabled. As its destructor enables interrupts, this triggers the warning in start_kernel(): WARNING: CPU: 0 PID: 0 at init/main.c:1024 start_kernel+0x4d0/0x5dc Interrupts were enabled early Fix this by using guard(raw_spinlock_irqsave) instead. [ tglx: Folded the equivivalent fix for atmel-aic ] Fixes: b00bee8afaca ("irqchip: Convert generic irqchip locking to guards") Signed-off-by: Edgar Bonet Signed-off-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/all/280dd506-e1fc-4d2e-bdc4-98dd9dca6138@grenoble.cnrs.fr --- drivers/irqchip/irq-atmel-aic.c | 2 +- drivers/irqchip/irq-atmel-aic5.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c index 03aeed39a4d2..1dcc52760eca 100644 --- a/drivers/irqchip/irq-atmel-aic.c +++ b/drivers/irqchip/irq-atmel-aic.c @@ -188,7 +188,7 @@ static int aic_irq_domain_xlate(struct irq_domain *d, gc = dgc->gc[idx]; - guard(raw_spinlock_irq)(&gc->lock); + guard(raw_spinlock_irqsave)(&gc->lock); smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq)); aic_common_set_priority(intspec[2], &smr); irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq)); diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 60b00d2c3d7a..1f14b401f71d 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -279,7 +279,7 @@ static int aic5_irq_domain_xlate(struct irq_domain *d, if (ret) return ret; - guard(raw_spinlock_irq)(&bgc->lock); + guard(raw_spinlock_irqsave)(&bgc->lock); irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR); smr = irq_reg_readl(bgc, AT91_AIC5_SMR); aic_common_set_priority(intspec[2], &smr); From 8976583832579fe7e450034d6143d74d9f8c8608 Mon Sep 17 00:00:00 2001 From: Maud Spierings Date: Sat, 23 Aug 2025 14:43:50 +0200 Subject: [PATCH 0749/1307] arm64: dts: rockchip: Fix the headphone detection on the orangepi 5 plus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic of the headphone detect pin seems to be inverted, with this change headphones actually output sound when plugged in. Verified by checking /sys/kernel/debug/gpio and by listening. Fixes: 236d225e1ee7 ("arm64: dts: rockchip: Add board device tree for rk3588-orangepi-5-plus") Signed-off-by: Maud Spierings Reviewed-by: Ondřej Jirman Link: https://lore.kernel.org/r/20250823-orangepi5-v1-1-ae77dd0e06d7@hotmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts index 121e4d1c3fa5..8222f1fae8fa 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts @@ -77,7 +77,7 @@ &analog_sound { pinctrl-names = "default"; pinctrl-0 = <&hp_detect>; simple-audio-card,aux-devs = <&speaker_amp>, <&headphone_amp>; - simple-audio-card,hp-det-gpios = <&gpio1 RK_PD3 GPIO_ACTIVE_LOW>; + simple-audio-card,hp-det-gpios = <&gpio1 RK_PD3 GPIO_ACTIVE_HIGH>; simple-audio-card,widgets = "Microphone", "Onboard Microphone", "Microphone", "Microphone Jack", From 2dea24df234940b27d378f786933dc10f33de6b8 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 21 Aug 2025 13:29:39 +0800 Subject: [PATCH 0750/1307] arm64: dts: rockchip: Add supplies for eMMC on rk3588-orangepi-5 The eMMC description is missing both vmmc and vqmmc supplies. Add them to complete the description. Fixes: 236d225e1ee7 ("arm64: dts: rockchip: Add board device tree for rk3588-orangepi-5-plus") Fixes: ea63f4666e48 ("arm64: dts: rockchip: refactor common rk3588-orangepi-5.dtsi") Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20250821052939.1869171-1-wens@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-orangepi-5.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5.dtsi index 91d56c34a1e4..8a8f3b26754d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5.dtsi @@ -365,6 +365,8 @@ &sdhci { max-frequency = <200000000>; mmc-hs400-1_8v; mmc-hs400-enhanced-strobe; + vmmc-supply = <&vcc_3v3_s3>; + vqmmc-supply = <&vcc_1v8_s3>; status = "okay"; }; From 106bdca970c1f66e2d4ee53675df1575b01c65ce Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 19 Aug 2025 15:16:08 +0200 Subject: [PATCH 0751/1307] ARM: dts: rockchip: Minor whitespace cleanup The DTS code coding style expects exactly one space around '=' character. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250819131607.86338-3-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts | 2 +- arch/arm/boot/dts/rockchip/rv1109-relfor-saib.dts | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts b/arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts index 21f824b09191..decbf2726ec4 100644 --- a/arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts +++ b/arch/arm/boot/dts/rockchip/rk3128-xpi-3128.dts @@ -272,7 +272,7 @@ &gmac { phy-mode = "rmii"; phy-handle = <&phy0>; assigned-clocks = <&cru SCLK_MAC_SRC>; - assigned-clock-rates= <50000000>; + assigned-clock-rates = <50000000>; pinctrl-names = "default"; pinctrl-0 = <&rmii_pins>; status = "okay"; diff --git a/arch/arm/boot/dts/rockchip/rv1109-relfor-saib.dts b/arch/arm/boot/dts/rockchip/rv1109-relfor-saib.dts index c13829d32c32..8a92700349b4 100644 --- a/arch/arm/boot/dts/rockchip/rv1109-relfor-saib.dts +++ b/arch/arm/boot/dts/rockchip/rv1109-relfor-saib.dts @@ -250,9 +250,9 @@ rtc0: rtc@52 { &i2s0 { /delete-property/ pinctrl-0; rockchip,trcm-sync-rx-only; - pinctrl-0 = <&i2s0m0_sclk_rx>, - <&i2s0m0_lrck_rx>, - <&i2s0m0_sdi0>; + pinctrl-0 = <&i2s0m0_sclk_rx>, + <&i2s0m0_lrck_rx>, + <&i2s0m0_sdi0>; pinctrl-names = "default"; status = "okay"; }; From 521b36e5a243d94e843c0f03285fc49ee88c37b6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 19 Aug 2025 15:16:09 +0200 Subject: [PATCH 0752/1307] arm64: dts: rockchip: Minor whitespace cleanup The DTS code coding style expects exactly one space around '=' or '{' characters. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250819131607.86338-4-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-pp1516.dtsi | 8 ++++---- .../dts/rockchip/px30-ringneck-haikou-video-demo.dtso | 6 +++--- arch/arm64/boot/dts/rockchip/rk3308-sakurapi-rk3308b.dts | 2 +- .../boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso | 6 +++--- arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi | 4 ++-- arch/arm64/boot/dts/rockchip/rk3588j.dtsi | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/px30-pp1516.dtsi b/arch/arm64/boot/dts/rockchip/px30-pp1516.dtsi index b4bd4e34747c..192791993f05 100644 --- a/arch/arm64/boot/dts/rockchip/px30-pp1516.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-pp1516.dtsi @@ -72,7 +72,7 @@ vcc5v0_sys: regulator-vccsys { }; vcc_cam_avdd: regulator-vcc-cam-avdd { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; regulator-name = "vcc_cam_avdd"; gpio = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; @@ -83,7 +83,7 @@ vcc_cam_avdd: regulator-vcc-cam-avdd { }; vcc_cam_dovdd: regulator-vcc-cam-dovdd { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; regulator-name = "vcc_cam_dovdd"; gpio = <&gpio3 RK_PC1 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; @@ -94,7 +94,7 @@ vcc_cam_dovdd: regulator-vcc-cam-dovdd { }; vcc_cam_dvdd: regulator-vcc-cam-dvdd { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; regulator-name = "vcc_cam_dvdd"; gpio = <&gpio3 RK_PC5 GPIO_ACTIVE_HIGH>; enable-active-high; @@ -106,7 +106,7 @@ vcc_cam_dvdd: regulator-vcc-cam-dvdd { }; vcc_lens_afvdd: regulator-vcc-lens-afvdd { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; regulator-name = "vcc_lens_afvdd"; gpio = <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou-video-demo.dtso b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou-video-demo.dtso index ea5ce919984f..760d5139f95d 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou-video-demo.dtso +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou-video-demo.dtso @@ -26,7 +26,7 @@ backlight: backlight { }; cam_afvdd_2v8: regulator-cam-afvdd-2v8 { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; gpio = <&pca9670 2 GPIO_ACTIVE_LOW>; regulator-max-microvolt = <2800000>; regulator-min-microvolt = <2800000>; @@ -35,7 +35,7 @@ cam_afvdd_2v8: regulator-cam-afvdd-2v8 { }; cam_avdd_2v8: regulator-cam-avdd-2v8 { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; gpio = <&pca9670 4 GPIO_ACTIVE_LOW>; regulator-max-microvolt = <2800000>; regulator-min-microvolt = <2800000>; @@ -44,7 +44,7 @@ cam_avdd_2v8: regulator-cam-avdd-2v8 { }; cam_dovdd_1v8: regulator-cam-dovdd-1v8 { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; gpio = <&pca9670 3 GPIO_ACTIVE_LOW>; regulator-max-microvolt = <1800000>; regulator-min-microvolt = <1800000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-sakurapi-rk3308b.dts b/arch/arm64/boot/dts/rockchip/rk3308-sakurapi-rk3308b.dts index f9f633aebb64..e5e6b800c2d1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-sakurapi-rk3308b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-sakurapi-rk3308b.dts @@ -260,6 +260,6 @@ &usb_host_ehci { status = "okay"; }; -&usb_host_ohci{ +&usb_host_ohci { status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso index d28880b8dd44..5e8f729c2cf2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou-video-demo.dtso @@ -26,7 +26,7 @@ backlight: backlight { }; cam_afvdd_2v8: regulator-cam-afvdd-2v8 { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; gpio = <&pca9670 2 GPIO_ACTIVE_LOW>; regulator-max-microvolt = <2800000>; regulator-min-microvolt = <2800000>; @@ -35,7 +35,7 @@ cam_afvdd_2v8: regulator-cam-afvdd-2v8 { }; cam_avdd_2v8: regulator-cam-avdd-2v8 { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; gpio = <&pca9670 4 GPIO_ACTIVE_LOW>; regulator-max-microvolt = <2800000>; regulator-min-microvolt = <2800000>; @@ -44,7 +44,7 @@ cam_avdd_2v8: regulator-cam-avdd-2v8 { }; cam_dovdd_1v8: regulator-cam-dovdd-1v8 { - compatible = "regulator-fixed"; + compatible = "regulator-fixed"; gpio = <&pca9670 3 GPIO_ACTIVE_LOW>; regulator-max-microvolt = <1800000>; regulator-min-microvolt = <1800000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi index e7ba477e75f9..7f578c50b4ad 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi @@ -53,7 +53,7 @@ led-0 { gpios = <&gpio4 RK_PA1 GPIO_ACTIVE_LOW>; linux,default-trigger = "default-on"; pinctrl-names = "default"; - pinctrl-0 =<&blue_led>; + pinctrl-0 = <&blue_led>; }; led-1 { @@ -62,7 +62,7 @@ led-1 { gpios = <&gpio0 RK_PB7 GPIO_ACTIVE_LOW>; linux,default-trigger = "heartbeat"; pinctrl-names = "default"; - pinctrl-0 =<&heartbeat_led>; + pinctrl-0 = <&heartbeat_led>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi index 3045cb3bd68c..9884a5df47df 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi @@ -28,7 +28,7 @@ cluster1_opp_table: opp-table-cluster1 { compatible = "operating-points-v2"; opp-shared; - opp-1200000000{ + opp-1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <750000 750000 950000>; clock-latency-ns = <40000>; @@ -49,7 +49,7 @@ cluster2_opp_table: opp-table-cluster2 { compatible = "operating-points-v2"; opp-shared; - opp-1200000000{ + opp-1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <750000 750000 950000>; clock-latency-ns = <40000>; From c8bb0f00a4886b24d933ffaabcdc09bf9a370dca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Aug 2025 12:40:02 +0300 Subject: [PATCH 0753/1307] irqchip/mvebu-gicp: Fix an IS_ERR() vs NULL check in probe() ioremap() never returns error pointers, it returns NULL on error. Fix the check to match. Fixes: 3c3d7dbab2c7 ("irqchip/mvebu-gicp: Clear pending interrupts on init") Signed-off-by: Dan Carpenter Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/aKRGcgMeaXm2TMIC@stanley.mountain --- drivers/irqchip/irq-mvebu-gicp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index 54833717f8a7..667bde3c651f 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -238,7 +238,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev) } base = ioremap(gicp->res->start, resource_size(gicp->res)); - if (IS_ERR(base)) { + if (!base) { dev_err(&pdev->dev, "ioremap() failed. Unable to clear pending interrupts.\n"); } else { for (i = 0; i < 64; i++) From 1a2cce5b91eeeac24104cbccd8cd3a4dfbdbaa7a Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 11 Aug 2025 15:50:01 +0200 Subject: [PATCH 0754/1307] irqchip/gic-v5: Fix kmemleak L2 IST table entries false positives L2 IST table entries are allocated with the kmalloc interface and their physical addresses are programmed in the GIC (either IST base address register or L1 IST table entries) but their virtual addresses are not stored in any kernel data structure because they are not needed at runtime - the L2 IST table entries are managed through system instructions but never dereferenced directly by the driver. This triggers kmemleak false positive reports: unreferenced object 0xffff00080039a000 (size 4096): comm "swapper/0", pid 0, jiffies 4294892296 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): kmemleak_alloc+0x34/0x40 __kmalloc_noprof+0x320/0x464 gicv5_irs_iste_alloc+0x1a4/0x484 gicv5_irq_lpi_domain_alloc+0xe4/0x194 irq_domain_alloc_irqs_parent+0x78/0xd8 gicv5_irq_ipi_domain_alloc+0x180/0x238 irq_domain_alloc_irqs_locked+0x238/0x7d4 __irq_domain_alloc_irqs+0x88/0x114 gicv5_of_init+0x284/0x37c of_irq_init+0x3b8/0xb18 irqchip_init+0x18/0x40 init_IRQ+0x104/0x164 start_kernel+0x1a4/0x3d4 __primary_switched+0x8c/0x94 Instruct kmemleak to ignore L2 IST table memory allocation virtual addresses to prevent these false positive reports. Reported-by: Jinjie Ruan Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Reviewed-by: Jinjie Ruan Reviewed-by: Zenghui Yu Acked-by: Marc Zyngier Link: https://lore.kernel.org/all/20250811135001.1333684-1-lpieralisi@kernel.org Closes: https://lore.kernel.org/lkml/cc611dda-d1e4-4793-9bb2-0eaa47277584@huawei.com/ --- drivers/irqchip/irq-gic-v5-irs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c index f845415f9143..ffc97737a757 100644 --- a/drivers/irqchip/irq-gic-v5-irs.c +++ b/drivers/irqchip/irq-gic-v5-irs.c @@ -5,6 +5,7 @@ #define pr_fmt(fmt) "GICv5 IRS: " fmt +#include #include #include #include @@ -117,6 +118,7 @@ static int __init gicv5_irs_init_ist_linear(struct gicv5_irs_chip_data *irs_data kfree(ist); return ret; } + kmemleak_ignore(ist); return 0; } @@ -232,6 +234,7 @@ int gicv5_irs_iste_alloc(const u32 lpi) kfree(l2ist); return ret; } + kmemleak_ignore(l2ist); /* * Make sure we invalidate the cache line pulled before the IRS From 09cce878427962a5c2a3a37d6cc52485a0134ac1 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 18 Aug 2025 19:12:23 +0200 Subject: [PATCH 0755/1307] arm64: dts: rockchip: correct network description on Sige5 Both network PHYs have dedicated crystals for the 25 MHz clock and do not source it from the RK3576. Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250818-sige5-network-phy-clock-v1-1-87a9122d41c2@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts index 101e2ee9766d..3386084f6318 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts @@ -302,8 +302,7 @@ &gmac1 { ð1m0_tx_bus2 ð1m0_rx_bus2 ð1m0_rgmii_clk - ð1m0_rgmii_bus - ðm0_clk1_25m_out>; + ð1m0_rgmii_bus>; status = "okay"; }; @@ -784,7 +783,6 @@ &mdio0 { rgmii_phy0: phy@1 { compatible = "ethernet-phy-ieee802.3-c22"; reg = <0x1>; - clocks = <&cru REFCLKO25M_GMAC0_OUT>; pinctrl-names = "default"; pinctrl-0 = <&gmac0_rst>; reset-assert-us = <20000>; @@ -797,7 +795,6 @@ &mdio1 { rgmii_phy1: phy@1 { compatible = "ethernet-phy-ieee802.3-c22"; reg = <0x1>; - clocks = <&cru REFCLKO25M_GMAC1_OUT>; pinctrl-names = "default"; pinctrl-0 = <&gmac1_rst>; reset-assert-us = <20000>; From c9f986a54d4031a9b9dff1eb616b0796aa28c730 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 18 Aug 2025 11:04:47 +0200 Subject: [PATCH 0756/1307] arm64: dts: rockchip: Fix Bluetooth interrupts flag on Neardi LBA3368 GPIO_ACTIVE_x flags are not correct in the context of interrupt flags. These are simple defines so they could be used in DTS but they will not have the same meaning: GPIO_ACTIVE_HIGH = 0 = IRQ_TYPE_NONE. Correct the interrupt flags, assuming the author of the code wanted same logical behavior behind the name "ACTIVE_xxx", this is: ACTIVE_HIGH => IRQ_TYPE_LEVEL_HIGH Fixes: 7b4a8097e58b ("arm64: dts: rockchip: Add Neardi LBA3368 board") Cc: stable+noautosel@kernel.org # Needs testing, because actual level is just a guess Signed-off-by: Krzysztof Kozlowski Tested-By: Alex Bee Link: https://lore.kernel.org/r/20250818090445.28112-4-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368-lba3368.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3368-lba3368.dts b/arch/arm64/boot/dts/rockchip/rk3368-lba3368.dts index b99bb0a5f900..b9801a691b48 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-lba3368.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-lba3368.dts @@ -609,7 +609,7 @@ &uart0 { bluetooth { compatible = "brcm,bcm4345c5"; - interrupts-extended = <&gpio3 RK_PA7 GPIO_ACTIVE_HIGH>; + interrupts-extended = <&gpio3 RK_PA7 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "host-wakeup"; clocks = <&rk808 RK808_CLKOUT1>; clock-names = "lpo"; From 4138adfd3594ebe957c6cb640372c93e354aa171 Mon Sep 17 00:00:00 2001 From: Kaison Deng Date: Thu, 14 Aug 2025 12:32:30 +0800 Subject: [PATCH 0757/1307] arm64: dts: rockchip: fix es8388 address on rk3588s-roc-pc Use the correct es8388 address for rk3588s-roc-ps Signed-off-by: Kaison Deng Link: https://lore.kernel.org/r/20250814043230.2774813-1-dkx@t-chip.com.cn Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588s-roc-pc.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3588s-roc-pc.dts index 7434ac39246f..7e179862da6e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-roc-pc.dts @@ -320,9 +320,9 @@ hym8563: rtc@51 { &i2c3 { status = "okay"; - es8388: audio-codec@10 { + es8388: audio-codec@11 { compatible = "everest,es8388", "everest,es8328"; - reg = <0x10>; + reg = <0x11>; clocks = <&cru I2S1_8CH_MCLKOUT>; AVDD-supply = <&vcc_3v3_s0>; DVDD-supply = <&vcc_1v8_s0>; From 35c23871be0072738ccc7ca00354c791711e5640 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 14 Aug 2025 11:41:38 +0200 Subject: [PATCH 0758/1307] irqchip/gic-v5: Remove undue WARN_ON()s in the IRS affinity parsing In gicv5_irs_of_init_affinity() a WARN_ON() is triggered if: 1) a phandle in the "cpus" property does not correspond to a valid OF node 2 a CPU logical id does not exist for a given OF cpu_node #1 is a firmware bug and should be reported as such but does not warrant a WARN_ON() backtrace. #2 is not necessarily an error condition (eg a kernel can be booted with nr_cpus=X limiting the number of cores artificially) and therefore there is no reason to clutter the kernel log with WARN_ON() output when the condition is hit. Rework the IRS affinity parsing code to remove undue WARN_ON()s thus making it less noisy. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250814094138.1611017-1-lpieralisi@kernel.org --- drivers/irqchip/irq-gic-v5-irs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c index ffc97737a757..13c035727e32 100644 --- a/drivers/irqchip/irq-gic-v5-irs.c +++ b/drivers/irqchip/irq-gic-v5-irs.c @@ -626,12 +626,14 @@ static int __init gicv5_irs_of_init_affinity(struct device_node *node, int cpu; cpu_node = of_parse_phandle(node, "cpus", i); - if (WARN_ON(!cpu_node)) + if (!cpu_node) { + pr_warn(FW_BUG "Erroneous CPU node phandle\n"); continue; + } cpu = of_cpu_node_to_id(cpu_node); of_node_put(cpu_node); - if (WARN_ON(cpu < 0)) + if (cpu < 0) continue; if (iaffids[i] & ~iaffid_mask) { From 1b237f190eb3d36f52dffe07a40b5eb210280e00 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Aug 2025 12:04:12 -0400 Subject: [PATCH 0759/1307] Linux 6.17-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d1adb78c3596..06c28b1d7e67 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* From 168873ca1799d3f23442b9e79eae55f907b9b126 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 19 Aug 2025 01:58:51 +0000 Subject: [PATCH 0760/1307] ASoC: soc-core: care NULL dirver name on snd_soc_lookup_component_nolocked() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit soc-generic-dmaengine-pcm.c uses same dev for both CPU and Platform. In such case, CPU component driver might not have driver->name, then snd_soc_lookup_component_nolocked() will be NULL pointer access error. Care NULL driver name. Call trace: strcmp from snd_soc_lookup_component_nolocked+0x64/0xa4 snd_soc_lookup_component_nolocked from snd_soc_unregister_component_by_driver+0x2c/0x44 snd_soc_unregister_component_by_driver from snd_dmaengine_pcm_unregister+0x28/0x64 snd_dmaengine_pcm_unregister from devres_release_all+0x98/0xfc devres_release_all from device_unbind_cleanup+0xc/0x60 device_unbind_cleanup from really_probe+0x220/0x2c8 really_probe from __driver_probe_device+0x88/0x1a0 __driver_probe_device from driver_probe_device+0x30/0x110 driver_probe_device from __driver_attach+0x90/0x178 __driver_attach from bus_for_each_dev+0x7c/0xcc bus_for_each_dev from bus_add_driver+0xcc/0x1ec bus_add_driver from driver_register+0x80/0x11c driver_register from do_one_initcall+0x58/0x23c do_one_initcall from kernel_init_freeable+0x198/0x1f4 kernel_init_freeable from kernel_init+0x1c/0x12c kernel_init from ret_from_fork+0x14/0x28 Fixes: 144d6dfc7482 ("ASoC: soc-core: merge snd_soc_unregister_component() and snd_soc_unregister_component_by_driver()") Reported-by: J. Neuschäfer Closes: https://lore.kernel.org/r/aJb311bMDc9x-dpW@probook Signed-off-by: Kuninori Morimoto Reported-by: Ondřej Jirman Closes: https://lore.kernel.org/r/arxpwzu6nzgjxvsndct65ww2wz4aezb5gjdzlgr24gfx7xvyih@natjg6dg2pj6 Tested-by: J. Neuschäfer Message-ID: <87ect8ysv8.wl-kuninori.morimoto.gx@renesas.com> Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 37bc5867f81d..c1cf205e0556 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -375,8 +375,9 @@ struct snd_soc_component for_each_component(component) { if ((dev == component->dev) && (!driver_name || - (driver_name == component->driver->name) || - (strcmp(component->driver->name, driver_name) == 0))) { + (component->driver->name && + ((component->driver->name == driver_name) || + (strcmp(component->driver->name, driver_name) == 0))))) { found_component = component; break; } From b833b412a522e58b790abe79b6ec46e8ba1f312a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 19 Aug 2025 01:59:01 +0000 Subject: [PATCH 0761/1307] ASoC: soc-core: tidyup snd_soc_lookup_component_nolocked() snd_soc_lookup_component_nolocked() is very complex today. Let's tidyup the code. Signed-off-by: Kuninori Morimoto Message-ID: <87cy8sysuy.wl-kuninori.morimoto.gx@renesas.com> Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c1cf205e0556..cc9125ffe92a 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -369,21 +369,25 @@ struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev, const char *driver_name) { struct snd_soc_component *component; - struct snd_soc_component *found_component; - found_component = NULL; for_each_component(component) { - if ((dev == component->dev) && - (!driver_name || - (component->driver->name && - ((component->driver->name == driver_name) || - (strcmp(component->driver->name, driver_name) == 0))))) { - found_component = component; - break; - } + if (dev != component->dev) + continue; + + if (!driver_name) + return component; + + if (!component->driver->name) + continue; + + if (component->driver->name == driver_name) + return component; + + if (strcmp(component->driver->name, driver_name) == 0) + return component; } - return found_component; + return NULL; } EXPORT_SYMBOL_GPL(snd_soc_lookup_component_nolocked); From b1c99d5bd24ce0a1193d8476d83bf8c8bc633266 Mon Sep 17 00:00:00 2001 From: Cryolitia PukNgae Date: Fri, 22 Aug 2025 18:28:33 +0800 Subject: [PATCH 0762/1307] ASoC: codecs: idt821034: fix wrong log in idt821034_chip_direction_output() Change `dir in` to `dir out` Suggested-by: Jun Zhan Signed-off-by: Cryolitia PukNgae Acked-by: Herve Codina Message-ID: <20250822-idt821034-v1-1-e2bfffbde56f@uniontech.com> Signed-off-by: Mark Brown --- sound/soc/codecs/idt821034.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/idt821034.c b/sound/soc/codecs/idt821034.c index 6738cf21983b..a03d4e5e7d14 100644 --- a/sound/soc/codecs/idt821034.c +++ b/sound/soc/codecs/idt821034.c @@ -1067,7 +1067,7 @@ static int idt821034_chip_direction_output(struct gpio_chip *c, unsigned int off ret = idt821034_set_slic_conf(idt821034, ch, slic_conf); if (ret) { - dev_err(&idt821034->spi->dev, "dir in gpio %d (%u, 0x%x) failed (%d)\n", + dev_err(&idt821034->spi->dev, "dir out gpio %d (%u, 0x%x) failed (%d)\n", offset, ch, mask, ret); } From ef3e9c91ed87f13dba877a20569f4a0accf0612c Mon Sep 17 00:00:00 2001 From: Kamal Wadhwa Date: Sat, 23 Aug 2025 01:56:39 +0530 Subject: [PATCH 0763/1307] regulator: pm8008: fix probe failure due to negative voltage selector In the current design, the `pm8008_regulator_get_voltage_sel()` callback can return a negative value if the raw voltage value is read as 0 uV from the PMIC HW register. This can cause the probe to fail when the `machine_constraints_voltage()` check is called during the regulator registration flow. Fix this by using the helper `regulator_map_voltage_linear_range()` to convert the raw value to a voltage selector inside the mentioned get voltage selector function. This ensures that the value returned is always within the defined range. Signed-off-by: Kamal Wadhwa Message-ID: <20250823-pm8008-negitive-selector-v1-1-52b026a4b5e8@quicinc.com> Signed-off-by: Mark Brown --- drivers/regulator/qcom-pm8008-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom-pm8008-regulator.c b/drivers/regulator/qcom-pm8008-regulator.c index da017c1969d0..90c78ee1c37b 100644 --- a/drivers/regulator/qcom-pm8008-regulator.c +++ b/drivers/regulator/qcom-pm8008-regulator.c @@ -96,7 +96,7 @@ static int pm8008_regulator_get_voltage_sel(struct regulator_dev *rdev) uV = le16_to_cpu(val) * 1000; - return (uV - preg->desc.min_uV) / preg->desc.uV_step; + return regulator_map_voltage_linear_range(rdev, uV, INT_MAX); } static const struct regulator_ops pm8008_regulator_ops = { From 9bca8be646e043d1fc6cd426fef05558c02de3df Mon Sep 17 00:00:00 2001 From: Fangyu Yu Date: Thu, 7 Aug 2025 15:07:29 +0800 Subject: [PATCH 0764/1307] RISC-V: KVM: Fix pte settings within kvm_riscv_gstage_ioremap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, kvm_riscv_gstage_ioremap() is used to map IMSIC gpa to the spa of IMSIC guest interrupt file. The PAGE_KERNEL_IO property includes global setting whereas it does not include user mode settings, so when accessing the IMSIC address in the virtual machine, a guest page fault will occur, this is not expected. According to the RISC-V Privileged Architecture Spec, for G-stage address translation, all memory accesses are considered to be user-level accesses as though executed in U-mode. Fixes: 659ad6d82c31 ("RISC-V: KVM: Use PAGE_KERNEL_IO in kvm_riscv_gstage_ioremap()") Signed-off-by: Fangyu Yu Reviewed-by: Radim Krčmář Reviewed-by: Nutty Liu Link: https://lore.kernel.org/r/20250807070729.89701-1-fangyu.yu@linux.alibaba.com Signed-off-by: Anup Patel --- arch/riscv/kvm/mmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index a1c3b2ec1dde..525fb5a330c0 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -39,6 +39,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, unsigned long size, bool writable, bool in_atomic) { int ret = 0; + pgprot_t prot; unsigned long pfn; phys_addr_t addr, end; struct kvm_mmu_memory_cache pcache = { @@ -55,10 +56,12 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(hpa); + prot = pgprot_noncached(PAGE_WRITE); for (addr = gpa; addr < end; addr += PAGE_SIZE) { map.addr = addr; - map.pte = pfn_pte(pfn, PAGE_KERNEL_IO); + map.pte = pfn_pte(pfn, prot); + map.pte = pte_mkdirty(map.pte); map.level = 0; if (!writable) From e61a12a4baf06a4c71e15f522bb5c4345c2ba198 Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Mon, 11 Aug 2025 10:18:29 +0800 Subject: [PATCH 0765/1307] RISC-V: KVM: Correct kvm_riscv_check_vcpu_requests() comment Correct `check_vcpu_requests` to `kvm_riscv_check_vcpu_requests` in comments. Fixes: f55ffaf89636 ("RISC-V: KVM: Enable ring-based dirty memory tracking") Signed-off-by: Quan Zhou Reviewed-by: Nutty Liu Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/49680363098c45516ec4b305283d662d26fa9386.1754326285.git.zhouquan@iscas.ac.cn Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index f001e56403f9..3ebcfffaa978 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -683,7 +683,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) } /** - * check_vcpu_requests - check and handle pending vCPU requests + * kvm_riscv_check_vcpu_requests - check and handle pending vCPU requests * @vcpu: the VCPU pointer * * Return: 1 if we should enter the guest From 799766208f09f95677a9ab111b93872d414fbad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 5 Aug 2025 12:44:21 +0200 Subject: [PATCH 0766/1307] RISC-V: KVM: fix stack overrun when loading vlenb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The userspace load can put up to 2048 bits into an xlen bit stack buffer. We want only xlen bits, so check the size beforehand. Fixes: 2fa290372dfe ("RISC-V: KVM: add 'vlenb' Vector CSR") Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář Reviewed-by: Nutty Liu Reviewed-by: Daniel Henrique Barboza Link: https://lore.kernel.org/r/20250805104418.196023-4-rkrcmar@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_vector.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index a5f88cb717f3..05f3cc2d8e31 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -182,6 +182,8 @@ int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; unsigned long reg_val; + if (reg_size != sizeof(reg_val)) + return -EINVAL; if (copy_from_user(®_val, uaddr, reg_size)) return -EFAULT; if (reg_val != cntx->vector.vlenb) From d3a8ca2ebe6e3f2b1fb0e8e74f909d109a1d77c7 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Fri, 22 Aug 2025 16:33:29 +0800 Subject: [PATCH 0767/1307] platform/x86/amd: hfi: Fix pcct_tbl leak in amd_hfi_metadata_parser() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a permanent ACPI table memory leak when amd_hfi_metadata_parser() fails due to invalid PCCT table length or memory allocation errors. Fixes: d4e95ea7a78e ("platform/x86: hfi: Parse CPU core ranking data from shared memory") Cc: stable@vger.kernel.org Signed-off-by: Zhen Ni Reviewed-by: Mario Limonciello (AMD) Link: https://lore.kernel.org/r/20250822083329.710857-1-zhen.ni@easystack.cn Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/hfi/hfi.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/amd/hfi/hfi.c b/drivers/platform/x86/amd/hfi/hfi.c index 4f56149b3774..a465ac6f607e 100644 --- a/drivers/platform/x86/amd/hfi/hfi.c +++ b/drivers/platform/x86/amd/hfi/hfi.c @@ -385,12 +385,16 @@ static int amd_hfi_metadata_parser(struct platform_device *pdev, amd_hfi_data->pcct_entry = pcct_entry; pcct_ext = (struct acpi_pcct_ext_pcc_slave *)pcct_entry; - if (pcct_ext->length <= 0) - return -EINVAL; + if (pcct_ext->length <= 0) { + ret = -EINVAL; + goto out; + } amd_hfi_data->shmem = devm_kzalloc(amd_hfi_data->dev, pcct_ext->length, GFP_KERNEL); - if (!amd_hfi_data->shmem) - return -ENOMEM; + if (!amd_hfi_data->shmem) { + ret = -ENOMEM; + goto out; + } pcc_chan->shmem_base_addr = pcct_ext->base_address; pcc_chan->shmem_size = pcct_ext->length; @@ -398,6 +402,8 @@ static int amd_hfi_metadata_parser(struct platform_device *pdev, /* parse the shared memory info from the PCCT table */ ret = amd_hfi_fill_metadata(amd_hfi_data); +out: + /* Don't leak any ACPI memory */ acpi_put_table(pcct_tbl); return ret; From cf3940ac737d05c85395f343fe33a3cfcadb47db Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Fri, 8 Aug 2025 17:47:09 +0200 Subject: [PATCH 0768/1307] platform/x86: asus-wmi: Remove extra keys from ignore_key_wlan quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the ignore_key_wlan quirk applies to keycodes 0x5D, 0x5E, and 0x5F. However, the relevant code for the Asus Zenbook Duo is only 0x5F. Since this code is emitted by other Asus devices, such as from the Z13 for its ROG button, remove the extra codes before expanding the quirk. For the Duo devices, which are the only ones that use this quirk, there should be no effect. Fixes: 9286dfd5735b ("platform/x86: asus-wmi: Fix spurious rfkill on UX8406MA") Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250808154710.8981-1-lkml@antheas.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-nb-wmi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index f84c3d03c1de..e6726be5890e 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -655,8 +655,6 @@ static void asus_nb_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code, if (atkbd_reports_vol_keys) *code = ASUS_WMI_KEY_IGNORE; break; - case 0x5D: /* Wireless console Toggle */ - case 0x5E: /* Wireless console Enable */ case 0x5F: /* Wireless console Disable */ if (quirks->ignore_key_wlan) *code = ASUS_WMI_KEY_IGNORE; From 132bfcd24925d4d4531a19b87acb8474be82a017 Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Fri, 8 Aug 2025 17:47:10 +0200 Subject: [PATCH 0769/1307] platform/x86: asus-wmi: Fix ROG button mapping, tablet mode on ASUS ROG Z13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On commit 9286dfd5735b ("platform/x86: asus-wmi: Fix spurious rfkill on UX8406MA"), Mathieu adds a quirk for the Zenbook Duo to ignore the code 0x5f (WLAN button disable). On that laptop, this code is triggered when the device keyboard is attached. On the ASUS ROG Z13 2025, this code is triggered when pressing the side button of the device, which is used to open Armoury Crate in Windows. As this is becoming a pattern, where newer Asus laptops use this keycode for emitting events, let's convert the wlan ignore quirk to instead allow emitting codes, so that userspace programs can listen to it and so that it does not interfere with the rfkill state. With this patch, the Z13 wil emit KEY_PROG3 and the Duo will remain unchanged and emit no event. While at it, add a quirk for the Z13 to switch into tablet mode when removing the keyboard. Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250808154710.8981-2-lkml@antheas.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-nb-wmi.c | 23 +++++++++++++++++++---- drivers/platform/x86/asus-wmi.h | 3 ++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index e6726be5890e..6928bb6ae0f3 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -147,7 +147,12 @@ static struct quirk_entry quirk_asus_ignore_fan = { }; static struct quirk_entry quirk_asus_zenbook_duo_kbd = { - .ignore_key_wlan = true, + .key_wlan_event = ASUS_WMI_KEY_IGNORE, +}; + +static struct quirk_entry quirk_asus_z13 = { + .key_wlan_event = ASUS_WMI_KEY_ARMOURY, + .tablet_switch_mode = asus_wmi_kbd_dock_devid, }; static int dmi_matched(const struct dmi_system_id *dmi) @@ -539,6 +544,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_zenbook_duo_kbd, }, + { + .callback = dmi_matched, + .ident = "ASUS ROG Z13", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow Z13"), + }, + .driver_data = &quirk_asus_z13, + }, {}, }; @@ -636,6 +650,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_IGNORE, 0xCF, }, /* AC mode */ { KE_KEY, 0xFA, { KEY_PROG2 } }, /* Lid flip action */ { KE_KEY, 0xBD, { KEY_PROG2 } }, /* Lid flip action on ROG xflow laptops */ + { KE_KEY, ASUS_WMI_KEY_ARMOURY, { KEY_PROG3 } }, { KE_END, 0}, }; @@ -655,9 +670,9 @@ static void asus_nb_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code, if (atkbd_reports_vol_keys) *code = ASUS_WMI_KEY_IGNORE; break; - case 0x5F: /* Wireless console Disable */ - if (quirks->ignore_key_wlan) - *code = ASUS_WMI_KEY_IGNORE; + case 0x5F: /* Wireless console Disable / Special Key */ + if (quirks->key_wlan_event) + *code = quirks->key_wlan_event; break; } } diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 018dfde4025e..5cd4392b964e 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -18,6 +18,7 @@ #include #define ASUS_WMI_KEY_IGNORE (-1) +#define ASUS_WMI_KEY_ARMOURY 0xffff01 #define ASUS_WMI_BRN_DOWN 0x2e #define ASUS_WMI_BRN_UP 0x2f @@ -40,7 +41,7 @@ struct quirk_entry { bool wmi_force_als_set; bool wmi_ignore_fan; bool filter_i8042_e1_extended_codes; - bool ignore_key_wlan; + int key_wlan_event; enum asus_wmi_tablet_switch_mode tablet_switch_mode; int wapf; /* From ff2a66d21fd2364ed9396d151115eec59612b200 Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Thu, 31 Jul 2025 04:15:27 +0100 Subject: [PATCH 0770/1307] EDAC/altera: Delete an inappropriate dma_free_coherent() call dma_free_coherent() must only be called if the corresponding dma_alloc_coherent() call has succeeded. Calling it when the allocation fails leads to undefined behavior. Delete the wrong call. [ bp: Massage commit message. ] Fixes: 71bcada88b0f3 ("edac: altera: Add Altera SDRAM EDAC support") Signed-off-by: Salah Triki Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/aIrfzzqh4IzYtDVC@pc --- drivers/edac/altera_edac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index cae52c654a15..7685a8550d4b 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -128,7 +128,6 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file, ptemp = dma_alloc_coherent(mci->pdev, 16, &dma_handle, GFP_KERNEL); if (!ptemp) { - dma_free_coherent(mci->pdev, 16, ptemp, dma_handle); edac_printk(KERN_ERR, EDAC_MC, "Inject: Buffer Allocation error\n"); return -ENOMEM; From ae91aea2d2265c88dbed65a07bbaf3c133fe970c Mon Sep 17 00:00:00 2001 From: Junhui Pei Date: Mon, 2 Jun 2025 23:38:41 +0800 Subject: [PATCH 0771/1307] ubsan: Fix incorrect hand-side used in handle __ubsan_handle_divrem_overflow() incorrectly uses the RHS to report. It always reports the same log: division of -1 by -1. But it should report division of LHS by -1. Signed-off-by: Junhui Pei Fixes: c6d308534aef ("UBSAN: run-time undefined behavior sanity checker") Link: https://lore.kernel.org/r/20250602153841.62935-1-paradoxskin233@gmail.com Signed-off-by: Kees Cook --- lib/ubsan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index a6ca235dd714..456e3dd8f4ea 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -333,18 +333,18 @@ EXPORT_SYMBOL(__ubsan_handle_implicit_conversion); void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs) { struct overflow_data *data = _data; - char rhs_val_str[VALUE_LENGTH]; + char lhs_val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; ubsan_prologue(&data->location, "division-overflow"); - val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs); + val_to_string(lhs_val_str, sizeof(lhs_val_str), data->type, lhs); if (type_is_signed(data->type) && get_signed_val(data->type, rhs) == -1) pr_err("division of %s by -1 cannot be represented in type %s\n", - rhs_val_str, data->type->type_name); + lhs_val_str, data->type->type_name); else pr_err("division by zero\n"); From f05995cc6d3bb742a10996036043ade8d1c65ab7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Aug 2025 09:18:58 +0200 Subject: [PATCH 0772/1307] ARM: stacktrace: include asm/sections.h in asm/stacktrace.h The recent kstack erase changes appear to have uncovered an existing issue with a missing header inclusion: In file included from drivers/misc/lkdtm/kstack_erase.c:12: In file included from include/linux/kstack_erase.h:16: arch/arm/include/asm/stacktrace.h:48:21: error: call to undeclared function 'in_entry_text'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 48 | frame->ex_frame = in_entry_text(frame->pc); | ^ Include asm/sections.h here so the compiler can see the in_entry_text() declaration. Fixes: 752ec621ef5c ("ARM: 9234/1: stacktrace: Avoid duplicate saving of exception PC value") Cc: Kees Cook Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250807071902.4077714-1-arnd@kernel.org Signed-off-by: Kees Cook --- arch/arm/include/asm/stacktrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index f80a85b091d6..ba2f771cca23 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -2,8 +2,9 @@ #ifndef __ASM_STACKTRACE_H #define __ASM_STACKTRACE_H -#include #include +#include +#include struct stackframe { /* From d14469ed7c00314fe8957b2841bda329e4eaf4ab Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 25 Aug 2025 17:32:05 +0800 Subject: [PATCH 0773/1307] loop: fix zero sized loop for block special file By default, /dev/sda is block special file from devtmpfs, getattr will return file size as zero, causing loop failed for raw block device. We can add bdev_statx() to return device size, however this may introduce changes that are not acknowledged by user. Fix this problem by reverting changes for block special file, file mapping host is set to bdev inode while opening, and use i_size_read() directly to get device size. Fixes: 47b71abd5846 ("loop: use vfs_getattr_nosec for accurate file size") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508200409.b2459c02-lkp@intel.com Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250825093205.3684121-1-yukuai1@huaweicloud.com [axboe: fix spelling error] Signed-off-by: Jens Axboe --- drivers/block/loop.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 57263c273f0f..053a086d547e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -139,20 +139,26 @@ static int part_shift; static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { - struct kstat stat; loff_t loopsize; int ret; - /* - * Get the accurate file size. This provides better results than - * cached inode data, particularly for network filesystems where - * metadata may be stale. - */ - ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); - if (ret) - return 0; + if (S_ISBLK(file_inode(file)->i_mode)) { + loopsize = i_size_read(file->f_mapping->host); + } else { + struct kstat stat; + + /* + * Get the accurate file size. This provides better results than + * cached inode data, particularly for network filesystems where + * metadata may be stale. + */ + ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); + if (ret) + return 0; + + loopsize = stat.size; + } - loopsize = stat.size; if (lo->lo_offset > 0) loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ From 0e08fa789d39aa01923e3ba144bd808291895c3c Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 23 Aug 2025 21:15:59 -0500 Subject: [PATCH 0774/1307] smb3 client: fix return code mapping of remap_file_range We were returning -EOPNOTSUPP for various remap_file_range cases but for some of these the copy_file_range_syscall() requires -EINVAL to be returned (e.g. where source and target file ranges overlap when source and target are the same file). This fixes xfstest generic/157 which was expecting EINVAL for that (and also e.g. for when the src offset is beyond end of file). Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 3bd85ab2deb1..e1848276bab4 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1358,6 +1358,20 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, truncate_setsize(target_inode, new_size); fscache_resize_cookie(cifs_inode_cookie(target_inode), new_size); + } else if (rc == -EOPNOTSUPP) { + /* + * copy_file_range syscall man page indicates EINVAL + * is returned e.g when "fd_in and fd_out refer to the + * same file and the source and target ranges overlap." + * Test generic/157 was what showed these cases where + * we need to remap EOPNOTSUPP to EINVAL + */ + if (off >= src_inode->i_size) { + rc = -EINVAL; + } else if (src_inode == target_inode) { + if (off + len > destoff) + rc = -EINVAL; + } } if (rc == 0 && new_size > target_cifsi->netfs.zero_point) target_cifsi->netfs.zero_point = new_size; From b4efccec8d06ceb10a7d34d7b1c449c569d53770 Mon Sep 17 00:00:00 2001 From: Li Qiong Date: Mon, 4 Aug 2025 10:57:59 +0800 Subject: [PATCH 0775/1307] mm/slub: avoid accessing metadata when pointer is invalid in object_err() object_err() reports details of an object for further debugging, such as the freelist pointer, redzone, etc. However, if the pointer is invalid, attempting to access object metadata can lead to a crash since it does not point to a valid object. One known path to the crash is when alloc_consistency_checks() determines the pointer to the allocated object is invalid because of a freelist corruption, and calls object_err() to report it. The debug code should report and handle the corruption gracefully and not crash in the process. In case the pointer is NULL or check_valid_pointer() returns false for the pointer, only print the pointer value and skip accessing metadata. Fixes: 81819f0fc828 ("SLUB core") Cc: Signed-off-by: Li Qiong Reviewed-by: Harry Yoo Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka --- mm/slub.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 30003763d224..1787e4d51e48 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1140,7 +1140,12 @@ static void object_err(struct kmem_cache *s, struct slab *slab, return; slab_bug(s, reason); - print_trailer(s, slab, object); + if (!object || !check_valid_pointer(s, slab, object)) { + print_slab_info(slab); + pr_err("Invalid pointer 0x%p\n", object); + } else { + print_trailer(s, slab, object); + } add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); WARN_ON(1); From 24963ae1b0b6596dc36e352c18593800056251d8 Mon Sep 17 00:00:00 2001 From: Suchit Karunakaran Date: Sat, 16 Aug 2025 12:21:26 +0530 Subject: [PATCH 0776/1307] x86/cpu/intel: Fix the constant_tsc model check for Pentium 4 Pentium 4's which are INTEL_P4_PRESCOTT (model 0x03) and later have a constant TSC. This was correctly captured until commit fadb6f569b10 ("x86/cpu/intel: Limit the non-architectural constant_tsc model checks"). In that commit, an error was introduced while selecting the last P4 model (0x06) as the upper bound. Model 0x06 was transposed to INTEL_P4_WILLAMETTE, which is just plain wrong. That was presumably a simple typo, probably just copying and pasting the wrong P4 model. Fix the constant TSC logic to cover all later P4 models. End at INTEL_P4_CEDARMILL which accurately corresponds to the last P4 model. Fixes: fadb6f569b10 ("x86/cpu/intel: Limit the non-architectural constant_tsc model checks") Signed-off-by: Suchit Karunakaran Signed-off-by: Dave Hansen Reviewed-by: Sohil Mehta Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20250816065126.5000-1-suchitkarunakaran%40gmail.com --- arch/x86/kernel/cpu/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 076eaa41b8c8..98ae4c37c93e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -262,7 +262,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - } else if ((c->x86_vfm >= INTEL_P4_PRESCOTT && c->x86_vfm <= INTEL_P4_WILLAMETTE) || + } else if ((c->x86_vfm >= INTEL_P4_PRESCOTT && c->x86_vfm <= INTEL_P4_CEDARMILL) || (c->x86_vfm >= INTEL_CORE_YONAH && c->x86_vfm <= INTEL_IVYBRIDGE)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } From 66e82b6e0a28d4970383e1ee5d60f431001128cd Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 12 Aug 2025 19:10:02 -0500 Subject: [PATCH 0777/1307] drm/nouveau: fix error path in nvkm_gsp_fwsec_v2 Function nvkm_gsp_fwsec_v2() sets 'ret' if the kmemdup() call fails, but it never uses or returns 'ret' after that point. We always need to release the firmware regardless, so do that and then check for error. Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Timur Tabi Link: https://lore.kernel.org/r/20250813001004.2986092-1-ttabi@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c index 52412965fac1..5b721bd9d799 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c @@ -209,11 +209,12 @@ nvkm_gsp_fwsec_v2(struct nvkm_gsp *gsp, const char *name, fw->boot_addr = bld->start_tag << 8; fw->boot_size = bld->code_size; fw->boot = kmemdup(bl->data + hdr->data_offset + bld->code_off, fw->boot_size, GFP_KERNEL); - if (!fw->boot) - ret = -ENOMEM; nvkm_firmware_put(bl); + if (!fw->boot) + return -ENOMEM; + /* Patch in interface data. */ return nvkm_gsp_fwsec_patch(gsp, fw, desc->InterfaceOffset, init_cmd); } From f529b8915543fb9ceb732cec5571f7fe12bc9530 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 12 Aug 2025 19:10:03 -0500 Subject: [PATCH 0778/1307] drm/nouveau: remove unused increment in gm200_flcn_pio_imem_wr The 'tag' parameter is passed by value and is not actually used after being incremented, so remove the increment. It's the function that calls gm200_flcn_pio_imem_wr that is supposed to (and does) increment 'tag'. Fixes: 0e44c2170876 ("drm/nouveau/flcn: new code to load+boot simple HS FWs (VPR scrubber)") Reviewed-by: Philipp Stanner Signed-off-by: Timur Tabi Link: https://lore.kernel.org/r/20250813001004.2986092-2-ttabi@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c index b7da3ab44c27..6a004c6e6742 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c @@ -103,7 +103,7 @@ gm200_flcn_pio_imem_wr_init(struct nvkm_falcon *falcon, u8 port, bool sec, u32 i static void gm200_flcn_pio_imem_wr(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len, u16 tag) { - nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag++); + nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag); while (len >= 4) { nvkm_falcon_wr32(falcon, 0x184 + (port * 0x10), *(u32 *)img); img += 4; From 64c722b5e7f6b909b0e448e580f64628a0d76208 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 12 Aug 2025 19:10:04 -0500 Subject: [PATCH 0779/1307] drm/nouveau: remove unused memory target test The memory target check is a hold-over from a refactor. It's harmless but distracting, so just remove it. Fixes: 2541626cfb79 ("drm/nouveau/acr: use common falcon HS FW code for ACR FWs") Signed-off-by: Timur Tabi Link: https://lore.kernel.org/r/20250813001004.2986092-3-ttabi@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c index 6a004c6e6742..7c43397c19e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c @@ -249,9 +249,11 @@ int gm200_flcn_fw_load(struct nvkm_falcon_fw *fw) { struct nvkm_falcon *falcon = fw->falcon; - int target, ret; + int ret; if (fw->inst) { + int target; + nvkm_falcon_mask(falcon, 0x048, 0x00000001, 0x00000001); switch (nvkm_memory_target(fw->inst)) { @@ -285,15 +287,6 @@ gm200_flcn_fw_load(struct nvkm_falcon_fw *fw) } if (fw->boot) { - switch (nvkm_memory_target(&fw->fw.mem.memory)) { - case NVKM_MEM_TARGET_VRAM: target = 4; break; - case NVKM_MEM_TARGET_HOST: target = 5; break; - case NVKM_MEM_TARGET_NCOH: target = 6; break; - default: - WARN_ON(1); - return -EINVAL; - } - ret = nvkm_falcon_pio_wr(falcon, fw->boot, 0, 0, IMEM, falcon->code.limit - fw->boot_size, fw->boot_size, fw->boot_addr >> 8, false); From 60dfe2434eed13082f26eb7409665dfafb38fa51 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Tue, 24 Jun 2025 07:26:40 -0700 Subject: [PATCH 0780/1307] ice: fix NULL pointer dereference in ice_unplug_aux_dev() on reset Issuing a reset when the driver is loaded without RDMA support, will results in a crash as it attempts to remove RDMA's non-existent auxbus device: echo 1 > /sys/class/net//device/reset BUG: kernel NULL pointer dereference, address: 0000000000000008 ... RIP: 0010:ice_unplug_aux_dev+0x29/0x70 [ice] ... Call Trace: ice_prepare_for_reset+0x77/0x260 [ice] pci_dev_save_and_disable+0x2c/0x70 pci_reset_function+0x88/0x130 reset_store+0x5a/0xa0 kernfs_fop_write_iter+0x15e/0x210 vfs_write+0x273/0x520 ksys_write+0x6b/0xe0 do_syscall_64+0x79/0x3b0 entry_SYSCALL_64_after_hwframe+0x76/0x7e ice_unplug_aux_dev() checks pf->cdev_info->adev for NULL pointer, but pf->cdev_info will also be NULL, leading to the deref in the trace above. Introduce a flag to be set when the creation of the auxbus device is successful, to avoid multiple NULL pointer checks in ice_unplug_aux_dev(). Fixes: c24a65b6a27c7 ("iidc/ice/irdma: Update IDC to support multiple consumers") Signed-off-by: Emil Tantilov Reviewed-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_idc.c | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2098f00b3cd3..8a8a01a4bb40 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -510,6 +510,7 @@ enum ice_pf_flags { ICE_FLAG_LINK_LENIENT_MODE_ENA, ICE_FLAG_PLUG_AUX_DEV, ICE_FLAG_UNPLUG_AUX_DEV, + ICE_FLAG_AUX_DEV_CREATED, ICE_FLAG_MTU_CHANGED, ICE_FLAG_GNSS, /* GNSS successfully initialized */ ICE_FLAG_DPLL, /* SyncE/PTP dplls initialized */ diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 6ab53e430f91..420d45c2558b 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -336,6 +336,7 @@ int ice_plug_aux_dev(struct ice_pf *pf) mutex_lock(&pf->adev_mutex); cdev->adev = adev; mutex_unlock(&pf->adev_mutex); + set_bit(ICE_FLAG_AUX_DEV_CREATED, pf->flags); return 0; } @@ -347,15 +348,16 @@ void ice_unplug_aux_dev(struct ice_pf *pf) { struct auxiliary_device *adev; + if (!test_and_clear_bit(ICE_FLAG_AUX_DEV_CREATED, pf->flags)) + return; + mutex_lock(&pf->adev_mutex); adev = pf->cdev_info->adev; pf->cdev_info->adev = NULL; mutex_unlock(&pf->adev_mutex); - if (adev) { - auxiliary_device_delete(adev); - auxiliary_device_uninit(adev); - } + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); } /** From 86aae43f21cf784c1d7f6a9af93e5116b0f232ab Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 17 Jul 2025 09:57:09 -0700 Subject: [PATCH 0781/1307] ice: don't leave device non-functional if Tx scheduler config fails The ice_cfg_tx_topo function attempts to apply Tx scheduler topology configuration based on NVM parameters, selecting either a 5 or 9 layer topology. As part of this flow, the driver acquires the "Global Configuration Lock", which is a hardware resource associated with programming the DDP package to the device. This "lock" is implemented by firmware as a way to guarantee that only one PF can program the DDP for a device. Unlike a traditional lock, once a PF has acquired this lock, no other PF will be able to acquire it again (including that PF) until a CORER of the device. Future requests to acquire the lock report that global configuration has already completed. The following flow is used to program the Tx topology: * Read the DDP package for scheduler configuration data * Acquire the global configuration lock * Program Tx scheduler topology according to DDP package data * Trigger a CORER which clears the global configuration lock This is followed by the flow for programming the DDP package: * Acquire the global configuration lock (again) * Download the DDP package to the device * Release the global configuration lock. However, if configuration of the Tx topology fails, (i.e. ice_get_set_tx_topo returns an error code), the driver exits ice_cfg_tx_topo() immediately, and fails to trigger CORER. While the global configuration lock is held, the firmware rejects most AdminQ commands, as it is waiting for the DDP package download (or Tx scheduler topology programming) to occur. The current driver flows assume that the global configuration lock has been reset by CORER after programming the Tx topology. Thus, the same PF attempts to acquire the global lock again, and fails. This results in the driver reporting "an unknown error occurred when loading the DDP package". It then attempts to enter safe mode, but ultimately fails to finish ice_probe() since nearly all AdminQ command report error codes, and the driver stops loading the device at some point during its initialization. The only currently known way that ice_get_set_tx_topo() can fail is with certain older DDP packages which contain invalid topology configuration, on firmware versions which strictly validate this data. The most recent releases of the DDP have resolved the invalid data. However, it is still poor practice to essentially brick the device, and prevent access to the device even through safe mode or recovery mode. It is also plausible that this command could fail for some other reason in the future. We cannot simply release the global lock after a failed call to ice_get_set_tx_topo(). Releasing the lock indicates to firmware that global configuration (downloading of the DDP) has completed. Future attempts by this or other PFs to load the DDP will fail with a report that the DDP package has already been downloaded. Then, PFs will enter safe mode as they realize that the package on the device does not meet the minimum version requirement to load. The reported error messages are confusing, as they indicate the version of the default "safe mode" package in the NVM, rather than the version of the file loaded from /lib/firmware. Instead, we need to trigger CORER to clear global configuration. This is the lowest level of hardware reset which clears the global configuration lock and related state. It also clears any already downloaded DDP. Crucially, it does *not* clear the Tx scheduler topology configuration. Refactor ice_cfg_tx_topo() to always trigger a CORER after acquiring the global lock, regardless of success or failure of the topology configuration. We need to re-initialize the HW structure when we trigger the CORER. Thus, it makes sense for this to be the responsibility of ice_cfg_tx_topo() rather than its caller, ice_init_tx_topology(). This avoids needless re-initialization in cases where we don't attempt to update the Tx scheduler topology, such as if it has already been programmed. There is one catch: failure to re-initialize the HW struct should stop ice_probe(). If this function fails, we won't have a valid HW structure and cannot ensure the device is functioning properly. To handle this, ensure ice_cfg_tx_topo() returns a limited set of error codes. Set aside one specifically, -ENODEV, to indicate that the ice_init_tx_topology() should fail and stop probe. Other error codes indicate failure to apply the Tx scheduler topology. This is treated as a non-fatal error, with an informational message informing the system administrator that the updated Tx topology did not apply. This allows the device to load and function with the default Tx scheduler topology, rather than failing to load entirely. Note that this use of CORER will not result in loops with future PFs attempting to also load the invalid Tx topology configuration. The first PF will acquire the global configuration lock as part of programming the DDP. Each PF after this will attempt to acquire the global lock as part of programming the Tx topology, and will fail with the indication from firmware that global configuration is already complete. Tx scheduler topology configuration is only performed during driver init (probe or devlink reload) and not during cleanup for a CORER that happens after probe completes. Fixes: 91427e6d9030 ("ice: Support 5 layer topology") Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ddp.c | 44 ++++++++++++++++------- drivers/net/ethernet/intel/ice/ice_main.c | 16 ++++++--- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index e2a036ce76ca..3b2d9c436979 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2377,7 +2377,13 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, * The function will apply the new Tx topology from the package buffer * if available. * - * Return: zero when update was successful, negative values otherwise. + * Return: + * * 0 - Successfully applied topology configuration. + * * -EBUSY - Failed to acquire global configuration lock. + * * -EEXIST - Topology configuration has already been applied. + * * -EIO - Unable to apply topology configuration. + * * -ENODEV - Failed to re-initialize device after applying configuration. + * * Other negative error codes indicate unexpected failures. */ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len) { @@ -2410,7 +2416,7 @@ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len) if (status) { ice_debug(hw, ICE_DBG_INIT, "Get current topology is failed\n"); - return status; + return -EIO; } /* Is default topology already applied ? */ @@ -2497,31 +2503,45 @@ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len) ICE_GLOBAL_CFG_LOCK_TIMEOUT); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to acquire global lock\n"); - return status; + return -EBUSY; } /* Check if reset was triggered already. */ reg = rd32(hw, GLGEN_RSTAT); if (reg & GLGEN_RSTAT_DEVSTATE_M) { - /* Reset is in progress, re-init the HW again */ ice_debug(hw, ICE_DBG_INIT, "Reset is in progress. Layer topology might be applied already\n"); ice_check_reset(hw); - return 0; + /* Reset is in progress, re-init the HW again */ + goto reinit_hw; } /* Set new topology */ status = ice_get_set_tx_topo(hw, new_topo, size, NULL, NULL, true); if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed setting Tx topology\n"); - return status; + ice_debug(hw, ICE_DBG_INIT, "Failed to set Tx topology, status %pe\n", + ERR_PTR(status)); + /* only report -EIO here as the caller checks the error value + * and reports an informational error message informing that + * the driver failed to program Tx topology. + */ + status = -EIO; } - /* New topology is updated, delay 1 second before issuing the CORER */ + /* Even if Tx topology config failed, we need to CORE reset here to + * clear the global configuration lock. Delay 1 second to allow + * hardware to settle then issue a CORER + */ msleep(1000); ice_reset(hw, ICE_RESET_CORER); - /* CORER will clear the global lock, so no explicit call - * required for release. - */ + ice_check_reset(hw); - return 0; +reinit_hw: + /* Since we triggered a CORER, re-initialize hardware */ + ice_deinit_hw(hw); + if (ice_init_hw(hw)) { + ice_debug(hw, ICE_DBG_INIT, "Failed to re-init hardware after setting Tx topology\n"); + return -ENODEV; + } + + return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 8e0b06c1e02b..cae992d8f03c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4536,17 +4536,23 @@ ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware) dev_info(dev, "Tx scheduling layers switching feature disabled\n"); else dev_info(dev, "Tx scheduling layers switching feature enabled\n"); - /* if there was a change in topology ice_cfg_tx_topo triggered - * a CORER and we need to re-init hw + return 0; + } else if (err == -ENODEV) { + /* If we failed to re-initialize the device, we can no longer + * continue loading. */ - ice_deinit_hw(hw); - err = ice_init_hw(hw); - + dev_warn(dev, "Failed to initialize hardware after applying Tx scheduling configuration.\n"); return err; } else if (err == -EIO) { dev_info(dev, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n"); + return 0; + } else if (err == -EEXIST) { + return 0; } + /* Do not treat this as a fatal error. */ + dev_info(dev, "Failed to apply Tx scheduling configuration, err %pe\n", + ERR_PTR(err)); return 0; } From 5c5e5b52bf05c7fe88768318c041052c5fac36b8 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 1 Aug 2025 15:27:12 -0700 Subject: [PATCH 0782/1307] ice: use fixed adapter index for E825C embedded devices The ice_adapter structure is used by the ice driver to connect multiple physical functions of a device in software. It was introduced by commit 0e2bddf9e5f9 ("ice: add ice_adapter for shared data across PFs on the same NIC") and is primarily used for PTP support, as well as for handling certain cross-PF synchronization. The original design of ice_adapter used PCI address information to determine which devices should be connected. This was extended to support E825C devices by commit fdb7f54700b1 ("ice: Initial support for E825C hardware in ice_adapter"), which used the device ID for E825C devices instead of the PCI address. Later, commit 0093cb194a75 ("ice: use DSN instead of PCI BDF for ice_adapter index") replaced the use of Bus/Device/Function addressing with use of the device serial number. E825C devices may appear in "Dual NAC" configuration which has multiple physical devices tied to the same clock source and which need to use the same ice_adapter. Unfortunately, each "NAC" has its own NVM which has its own unique Device Serial Number. Thus, use of the DSN for connecting ice_adapter does not work properly. It "worked" in the pre-production systems because the DSN was not initialized on the test NVMs and all the NACs had the same zero'd serial number. Since we cannot rely on the DSN, lets fall back to the logic in the original E825C support which used the device ID. This is safe for E825C only because of the embedded nature of the device. It isn't a discreet adapter that can be plugged into an arbitrary system. All E825C devices on a given system are connected to the same clock source and need to be configured through the same PTP clock. To make this separation clear, reserve bit 63 of the 64-bit index values as a "fixed index" indicator. Always clear this bit when using the device serial number as an index. For E825C, use a fixed value defined as the 0x579C E825C backplane device ID bitwise ORed with the fixed index indicator. This is slightly different than the original logic of just using the device ID directly. Doing so prevents a potential issue with systems where only one of the NACs is connected with an external PHY over SGMII. In that case, one NAC would have the E825C_SGMII device ID, but the other would not. Separate the determination of the full 64-bit index from the 32-bit reduction logic. Provide both ice_adapter_index() and a wrapping ice_adapter_xa_index() which handles reducing the index to a long on 32-bit systems. As before, cache the full index value in the adapter structure to warn about collisions. This fixes issues with E825C not initializing PTP on both NACs, due to failure to connect the appropriate devices to the same ice_adapter. Fixes: 0093cb194a75 ("ice: use DSN instead of PCI BDF for ice_adapter index") Signed-off-by: Jacob Keller Reviewed-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_adapter.c | 49 +++++++++++++++----- drivers/net/ethernet/intel/ice/ice_adapter.h | 4 +- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 9e4adc43e474..b53561c34708 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -13,16 +13,45 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -static unsigned long ice_adapter_index(u64 dsn) +#define ICE_ADAPTER_FIXED_INDEX BIT_ULL(63) + +#define ICE_ADAPTER_INDEX_E825C \ + (ICE_DEV_ID_E825C_BACKPLANE | ICE_ADAPTER_FIXED_INDEX) + +static u64 ice_adapter_index(struct pci_dev *pdev) { + switch (pdev->device) { + case ICE_DEV_ID_E825C_BACKPLANE: + case ICE_DEV_ID_E825C_QSFP: + case ICE_DEV_ID_E825C_SFP: + case ICE_DEV_ID_E825C_SGMII: + /* E825C devices have multiple NACs which are connected to the + * same clock source, and which must share the same + * ice_adapter structure. We can't use the serial number since + * each NAC has its own NVM generated with its own unique + * Device Serial Number. Instead, rely on the embedded nature + * of the E825C devices, and use a fixed index. This relies on + * the fact that all E825C physical functions in a given + * system are part of the same overall device. + */ + return ICE_ADAPTER_INDEX_E825C; + default: + return pci_get_dsn(pdev) & ~ICE_ADAPTER_FIXED_INDEX; + } +} + +static unsigned long ice_adapter_xa_index(struct pci_dev *pdev) +{ + u64 index = ice_adapter_index(pdev); + #if BITS_PER_LONG == 64 - return dsn; + return index; #else - return (u32)dsn ^ (u32)(dsn >> 32); + return (u32)index ^ (u32)(index >> 32); #endif } -static struct ice_adapter *ice_adapter_new(u64 dsn) +static struct ice_adapter *ice_adapter_new(struct pci_dev *pdev) { struct ice_adapter *adapter; @@ -30,7 +59,7 @@ static struct ice_adapter *ice_adapter_new(u64 dsn) if (!adapter) return NULL; - adapter->device_serial_number = dsn; + adapter->index = ice_adapter_index(pdev); spin_lock_init(&adapter->ptp_gltsyn_time_lock); spin_lock_init(&adapter->txq_ctx_lock); refcount_set(&adapter->refcount, 1); @@ -64,24 +93,23 @@ static void ice_adapter_free(struct ice_adapter *adapter) */ struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; unsigned long index; int err; - index = ice_adapter_index(dsn); + index = ice_adapter_xa_index(pdev); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); - WARN_ON_ONCE(adapter->device_serial_number != dsn); + WARN_ON_ONCE(adapter->index != ice_adapter_index(pdev)); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(dsn); + adapter = ice_adapter_new(pdev); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -100,11 +128,10 @@ struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) */ void ice_adapter_put(struct pci_dev *pdev) { - u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; unsigned long index; - index = ice_adapter_index(dsn); + index = ice_adapter_xa_index(pdev); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index db66d03c9f96..e95266c7f20b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -33,7 +33,7 @@ struct ice_port_list { * @txq_ctx_lock: Spinlock protecting access to the GLCOMM_QTX_CNTX_CTL register * @ctrl_pf: Control PF of the adapter * @ports: Ports list - * @device_serial_number: DSN cached for collision detection on 32bit systems + * @index: 64-bit index cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -44,7 +44,7 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; - u64 device_serial_number; + u64 index; }; struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); From b1a0c977c6f1130f7dd125ee3db8c2435d7e3d41 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Fri, 8 Aug 2025 17:53:10 +0200 Subject: [PATCH 0783/1307] ice: fix incorrect counter for buffer allocation failures Currently, the driver increments `alloc_page_failed` when buffer allocation fails in `ice_clean_rx_irq()`. However, this counter is intended for page allocation failures, not buffer allocation issues. This patch corrects the counter by incrementing `alloc_buf_failed` instead, ensuring accurate statistics reporting for buffer allocation failures. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Reported-by: Jacob Keller Suggested-by: Paul Menzel Signed-off-by: Michal Kubiak Reviewed-by: Paul Menzel Reviewed-by: Jason Xing Reviewed-by: Aleksandr Loktionov Tested-by: Priya Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 29e0088ab6b2..d2871757ec94 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1352,7 +1352,7 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) skb = ice_construct_skb(rx_ring, xdp); /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->ring_stats->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; xdp_verdict = ICE_XDP_CONSUMED; } ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); From ed913b343dcf9f623e7436fa1a153c89b22d109b Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 31 Jul 2025 14:45:33 +0200 Subject: [PATCH 0784/1307] ixgbe: fix ixgbe_orom_civd_info struct layout The current layout of struct ixgbe_orom_civd_info causes incorrect data storage due to compiler-inserted padding. This results in issues when writing OROM data into the structure. Add the __packed attribute to ensure the structure layout matches the expected binary format without padding. Fixes: 70db0788a262 ("ixgbe: read the OROM version information") Reviewed-by: Aleksandr Loktionov Signed-off-by: Jedrzej Jagielski Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index d74116441d1c..bfeef5b0b99d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -3125,7 +3125,7 @@ static int ixgbe_get_orom_ver_info(struct ixgbe_hw *hw, if (err) return err; - combo_ver = le32_to_cpu(civd.combo_ver); + combo_ver = get_unaligned_le32(&civd.combo_ver); orom->major = (u8)FIELD_GET(IXGBE_OROM_VER_MASK, combo_ver); orom->patch = (u8)FIELD_GET(IXGBE_OROM_VER_PATCH_MASK, combo_ver); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h index d2f22d8558f8..ff8d640a50b1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h @@ -932,7 +932,7 @@ struct ixgbe_orom_civd_info { __le32 combo_ver; /* Combo Image Version number */ u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */ __le16 combo_name[32]; /* Unicode string representing the Combo Image version */ -}; +} __packed; /* Function specific capabilities */ struct ixgbe_hw_func_caps { From e228e7d382fa85005ee2ebf303e1bf194aca49a8 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 25 Aug 2025 09:22:09 +0000 Subject: [PATCH 0785/1307] drm/gpuvm: fix various typos in .c and .h gpuvm file After working with this code for a while, I came across several typos. This patch fixes them. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250825-gpuvm-typo-fix-v1-1-14e9e78e28e6@google.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/drm_gpuvm.c | 78 ++++++++++++++++++------------------- include/drm/drm_gpuvm.h | 10 ++--- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 74d949995a72..60b672d3fd83 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -40,7 +40,7 @@ * mapping's backing &drm_gem_object buffers. * * &drm_gem_object buffers maintain a list of &drm_gpuva objects representing - * all existent GPU VA mappings using this &drm_gem_object as backing buffer. + * all existing GPU VA mappings using this &drm_gem_object as backing buffer. * * GPU VAs can be flagged as sparse, such that drivers may use GPU VAs to also * keep track of sparse PTEs in order to support Vulkan 'Sparse Resources'. @@ -72,7 +72,7 @@ * but it can also be a 'dummy' object, which can be allocated with * drm_gpuvm_resv_object_alloc(). * - * In order to connect a struct drm_gpuva its backing &drm_gem_object each + * In order to connect a struct drm_gpuva to its backing &drm_gem_object each * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each * &drm_gpuvm_bo contains a list of &drm_gpuva structures. * @@ -81,7 +81,7 @@ * This is ensured by the API through drm_gpuvm_bo_obtain() and * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this - * particular combination. If not existent a new instance is created and linked + * particular combination. If not present, a new instance is created and linked * to the &drm_gem_object. * * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used @@ -108,7 +108,7 @@ * sequence of operations to satisfy a given map or unmap request. * * Therefore the DRM GPU VA manager provides an algorithm implementing splitting - * and merging of existent GPU VA mappings with the ones that are requested to + * and merging of existing GPU VA mappings with the ones that are requested to * be mapped or unmapped. This feature is required by the Vulkan API to * implement Vulkan 'Sparse Memory Bindings' - drivers UAPIs often refer to this * as VM BIND. @@ -119,7 +119,7 @@ * execute in order to integrate the new mapping cleanly into the current state * of the GPU VA space. * - * Depending on how the new GPU VA mapping intersects with the existent mappings + * Depending on how the new GPU VA mapping intersects with the existing mappings * of the GPU VA space the &drm_gpuvm_ops callbacks contain an arbitrary amount * of unmap operations, a maximum of two remap operations and a single map * operation. The caller might receive no callback at all if no operation is @@ -139,16 +139,16 @@ * one unmap operation and one or two map operations, such that drivers can * derive the page table update delta accordingly. * - * Note that there can't be more than two existent mappings to split up, one at + * Note that there can't be more than two existing mappings to split up, one at * the beginning and one at the end of the new mapping, hence there is a * maximum of two remap operations. * * Analogous to drm_gpuvm_sm_map() drm_gpuvm_sm_unmap() uses &drm_gpuvm_ops to * call back into the driver in order to unmap a range of GPU VA space. The - * logic behind this function is way simpler though: For all existent mappings + * logic behind this function is way simpler though: For all existing mappings * enclosed by the given range unmap operations are created. For mappings which - * are only partically located within the given range, remap operations are - * created such that those mappings are split up and re-mapped partically. + * are only partially located within the given range, remap operations are + * created such that those mappings are split up and re-mapped partially. * * As an alternative to drm_gpuvm_sm_map() and drm_gpuvm_sm_unmap(), * drm_gpuvm_sm_map_ops_create() and drm_gpuvm_sm_unmap_ops_create() can be used @@ -168,7 +168,7 @@ * provided helper functions drm_gpuva_map(), drm_gpuva_remap() and * drm_gpuva_unmap() instead. * - * The following diagram depicts the basic relationships of existent GPU VA + * The following diagram depicts the basic relationships of existing GPU VA * mappings, a newly requested mapping and the resulting mappings as implemented * by drm_gpuvm_sm_map() - it doesn't cover any arbitrary combinations of these. * @@ -218,7 +218,7 @@ * * * 4) Existent mapping is a left aligned subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -236,9 +236,9 @@ * and/or non-contiguous BO offset. * * - * 5) Requested mapping's range is a left aligned subset of the existent one, + * 5) Requested mapping's range is a left aligned subset of the existing one, * but backed by a different BO. Hence, map the requested mapping and split - * the existent one adjusting its BO offset. + * the existing one adjusting its BO offset. * * :: * @@ -271,9 +271,9 @@ * new: |-----|-----| (a.bo_offset=n, a'.bo_offset=n+1) * * - * 7) Requested mapping's range is a right aligned subset of the existent one, + * 7) Requested mapping's range is a right aligned subset of the existing one, * but backed by a different BO. Hence, map the requested mapping and split - * the existent one, without adjusting the BO offset. + * the existing one, without adjusting the BO offset. * * :: * @@ -304,7 +304,7 @@ * * 9) Existent mapping is overlapped at the end by the requested mapping backed * by a different BO. Hence, map the requested mapping and split up the - * existent one, without adjusting the BO offset. + * existing one, without adjusting the BO offset. * * :: * @@ -334,9 +334,9 @@ * new: |-----|-----------| (a'.bo_offset=n, a.bo_offset=n+1) * * - * 11) Requested mapping's range is a centered subset of the existent one + * 11) Requested mapping's range is a centered subset of the existing one * having a different backing BO. Hence, map the requested mapping and split - * up the existent one in two mappings, adjusting the BO offset of the right + * up the existing one in two mappings, adjusting the BO offset of the right * one accordingly. * * :: @@ -351,7 +351,7 @@ * new: |-----|-----|-----| (a.bo_offset=n,b.bo_offset=m,a'.bo_offset=n+2) * * - * 12) Requested mapping is a contiguous subset of the existent one. Split it + * 12) Requested mapping is a contiguous subset of the existing one. Split it * up, but indicate that the backing PTEs could be kept. * * :: @@ -367,7 +367,7 @@ * * * 13) Existent mapping is a right aligned subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -386,7 +386,7 @@ * * * 14) Existent mapping is a centered subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -406,7 +406,7 @@ * * 15) Existent mappings is overlapped at the beginning by the requested mapping * backed by a different BO. Hence, map the requested mapping and split up - * the existent one, adjusting its BO offset accordingly. + * the existing one, adjusting its BO offset accordingly. * * :: * @@ -469,8 +469,8 @@ * make use of them. * * The below code is strictly limited to illustrate the generic usage pattern. - * To maintain simplicitly, it doesn't make use of any abstractions for common - * code, different (asyncronous) stages with fence signalling critical paths, + * To maintain simplicity, it doesn't make use of any abstractions for common + * code, different (asynchronous) stages with fence signalling critical paths, * any other helpers or error handling in terms of freeing memory and dropping * previously taken locks. * @@ -479,7 +479,7 @@ * // Allocates a new &drm_gpuva. * struct drm_gpuva * driver_gpuva_alloc(void); * - * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva + * // Typically drivers would embed the &drm_gpuvm and &drm_gpuva * // structure in individual driver structures and lock the dma-resv with * // drm_exec or similar helpers. * int driver_mapping_create(struct drm_gpuvm *gpuvm, @@ -582,7 +582,7 @@ * .sm_step_unmap = driver_gpuva_unmap, * }; * - * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva + * // Typically drivers would embed the &drm_gpuvm and &drm_gpuva * // structure in individual driver structures and lock the dma-resv with * // drm_exec or similar helpers. * int driver_mapping_create(struct drm_gpuvm *gpuvm, @@ -680,7 +680,7 @@ * * This helper is here to provide lockless list iteration. Lockless as in, the * iterator releases the lock immediately after picking the first element from - * the list, so list insertion deletion can happen concurrently. + * the list, so list insertion and deletion can happen concurrently. * * Elements popped from the original list are kept in a local list, so removal * and is_empty checks can still happen while we're iterating the list. @@ -1160,7 +1160,7 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, } /** - * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs + * drm_gpuvm_prepare_objects() - prepare all associated BOs * @gpuvm: the &drm_gpuvm * @exec: the &drm_exec locking context * @num_fences: the amount of &dma_fences to reserve @@ -1230,13 +1230,13 @@ drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec, EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range); /** - * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs + * drm_gpuvm_exec_lock() - lock all dma-resv of all associated BOs * @vm_exec: the &drm_gpuvm_exec wrapper * * Acquires all dma-resv locks of all &drm_gem_objects the given * &drm_gpuvm contains mappings of. * - * Addionally, when calling this function with struct drm_gpuvm_exec::extra + * Additionally, when calling this function with struct drm_gpuvm_exec::extra * being set the driver receives the given @fn callback to lock additional * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers * would call drm_exec_prepare_obj() from within this callback. @@ -1293,7 +1293,7 @@ fn_lock_array(struct drm_gpuvm_exec *vm_exec) } /** - * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs + * drm_gpuvm_exec_lock_array() - lock all dma-resv of all associated BOs * @vm_exec: the &drm_gpuvm_exec wrapper * @objs: additional &drm_gem_objects to lock * @num_objs: the number of additional &drm_gem_objects to lock @@ -1588,7 +1588,7 @@ drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find); /** - * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the + * drm_gpuvm_bo_obtain() - obtains an instance of the &drm_gpuvm_bo for the * given &drm_gpuvm and &drm_gem_object * @gpuvm: The &drm_gpuvm the @obj is mapped in. * @obj: The &drm_gem_object being mapped in the @gpuvm. @@ -1624,7 +1624,7 @@ drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); /** - * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo + * drm_gpuvm_bo_obtain_prealloc() - obtains an instance of the &drm_gpuvm_bo * for the given &drm_gpuvm and &drm_gem_object * @__vm_bo: A pre-allocated struct drm_gpuvm_bo. * @@ -1688,7 +1688,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add); * @vm_bo: the &drm_gpuvm_bo to add or remove * @evict: indicates whether the object is evicted * - * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list. + * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvm's evicted list. */ void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict) @@ -1790,7 +1790,7 @@ __drm_gpuva_remove(struct drm_gpuva *va) * drm_gpuva_remove() - remove a &drm_gpuva * @va: the &drm_gpuva to remove * - * This removes the given &va from the underlaying tree. + * This removes the given &va from the underlying tree. * * It is safe to use this function using the safe versions of iterating the GPU * VA space, such as drm_gpuvm_for_each_va_safe() and @@ -2358,7 +2358,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map); * * This function iterates the given range of the GPU VA space. It utilizes the * &drm_gpuvm_ops to call back into the driver providing the operations to - * unmap and, if required, split existent mappings. + * unmap and, if required, split existing mappings. * * Drivers may use these callbacks to update the GPU VA space right away within * the callback. In case the driver decides to copy and store the operations for @@ -2475,7 +2475,7 @@ static const struct drm_gpuvm_ops lock_ops = { * required without the earlier DRIVER_OP_MAP. This is safe because we've * already locked the GEM object in the earlier DRIVER_OP_MAP step. * - * Returns: 0 on success or a negative error codec + * Returns: 0 on success or a negative error code */ int drm_gpuvm_sm_map_exec_lock(struct drm_gpuvm *gpuvm, @@ -2619,12 +2619,12 @@ static const struct drm_gpuvm_ops gpuvm_list_ops = { * @req_offset: the offset within the &drm_gem_object * * This function creates a list of operations to perform splitting and merging - * of existent mapping(s) with the newly requested one. + * of existing mapping(s) with the newly requested one. * * The list can be iterated with &drm_gpuva_for_each_op and must be processed * in the given order. It can contain map, unmap and remap operations, but it * also can be empty if no operation is required, e.g. if the requested mapping - * already exists is the exact same way. + * already exists in the exact same way. * * There can be an arbitrary amount of unmap operations, a maximum of two remap * operations and a single map operation. The latter one represents the original diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 274532facfd6..2e7088264355 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -103,7 +103,7 @@ struct drm_gpuva { } va; /** - * @gem: structure containing the &drm_gem_object and it's offset + * @gem: structure containing the &drm_gem_object and its offset */ struct { /** @@ -843,7 +843,7 @@ struct drm_gpuva_op_map { } va; /** - * @gem: structure containing the &drm_gem_object and it's offset + * @gem: structure containing the &drm_gem_object and its offset */ struct { /** @@ -1189,11 +1189,11 @@ struct drm_gpuvm_ops { /** * @sm_step_unmap: called from &drm_gpuvm_sm_map and - * &drm_gpuvm_sm_unmap to unmap an existent mapping + * &drm_gpuvm_sm_unmap to unmap an existing mapping * - * This callback is called when existent mapping needs to be unmapped. + * This callback is called when existing mapping needs to be unmapped. * This is the case when either a newly requested mapping encloses an - * existent mapping or an unmap of an existent mapping is requested. + * existing mapping or an unmap of an existing mapping is requested. * * The &priv pointer matches the one the driver passed to * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively. From 7ab3b7579a6d2660a3425b9ea93b9a140b07f49c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 9 Aug 2025 11:36:54 +0300 Subject: [PATCH 0786/1307] dt-bindings: display/msm: qcom,mdp5: drop lut clock None of MDP5 platforms have a LUT clock on the display-controller, it was added by the mistake. Drop it, fixing DT warnings on MSM8976 / MSM8956 platforms. Technically it's an ABI break, but no other platforms are affected. Fixes: 385c8ac763b3 ("dt-bindings: display/msm: convert MDP5 schema to YAML format") Signed-off-by: Dmitry Baryshkov Acked-by: Rob Herring (Arm) Patchwork: https://patchwork.freedesktop.org/patch/667822/ Signed-off-by: Rob Clark --- Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml b/Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml index e153f8d26e7a..2735c78b0b67 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml @@ -60,7 +60,6 @@ properties: - const: bus - const: core - const: vsync - - const: lut - const: tbu - const: tbu_rt # MSM8996 has additional iommu clock From abebfed208515726760d79cf4f9f1a76b9a10a84 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Tue, 22 Jul 2025 16:17:40 -0500 Subject: [PATCH 0787/1307] drm/msm/dpu: Add a null ptr check for dpu_encoder_needs_modeset The drm_atomic_get_new_connector_state() can return NULL if the connector is not part of the atomic state. Add a check to prevent a NULL pointer dereference. This follows the same pattern used in dpu_encoder_update_topology() within the same file, which checks for NULL before using conn_state. Signed-off-by: Chenyuan Yang Fixes: 1ce69c265a53 ("drm/msm/dpu: move resource allocation to CRTC") Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/665188/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 05e5f3463e30..258edaa18fc0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -730,6 +730,8 @@ bool dpu_encoder_needs_modeset(struct drm_encoder *drm_enc, struct drm_atomic_st return false; conn_state = drm_atomic_get_new_connector_state(state, connector); + if (!conn_state) + return false; /** * These checks are duplicated from dpu_encoder_update_topology() since From 5cfd298cc0359697f26b2b6e25385c665e431a7e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 14 Aug 2025 10:22:20 +0300 Subject: [PATCH 0788/1307] soc: qcom: ubwc: use no-uwbc config for MSM8917 MSM8917 has MDSS 1.15 and Adreno 308, neither of which support UBWC. Change UBWC configuration to point out that UBWC is not supported on this platform. Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/668500/ Signed-off-by: Rob Clark --- drivers/soc/qcom/ubwc_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 1490a7f63767..5113c2902bf2 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -225,7 +225,7 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,apq8096", .data = &msm8998_data }, { .compatible = "qcom,msm8226", .data = &no_ubwc_data }, { .compatible = "qcom,msm8916", .data = &no_ubwc_data }, - { .compatible = "qcom,msm8917", .data = &msm8937_data }, + { .compatible = "qcom,msm8917", .data = &no_ubwc_data }, { .compatible = "qcom,msm8937", .data = &msm8937_data }, { .compatible = "qcom,msm8939", .data = &no_ubwc_data }, { .compatible = "qcom,msm8953", .data = &msm8937_data }, From 61f3c19af5ce6606a8f50ba9a0661881925d28c2 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 14 Aug 2025 10:22:21 +0300 Subject: [PATCH 0789/1307] soc: qcom: ubwc: add more missing platforms Add UBWC configuration for SDA660 (modem-less variant of SDM660), SDM450 (similar to MSM8953), SDM632 (similar to MSM8953) and SM7325 (similar to SC7280). Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/668501/ Signed-off-by: Rob Clark --- drivers/soc/qcom/ubwc_config.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 5113c2902bf2..8b23b4d4e398 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -244,7 +244,10 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,sc7280", .data = &sc7280_data, }, { .compatible = "qcom,sc8180x", .data = &sc8180x_data, }, { .compatible = "qcom,sc8280xp", .data = &sc8280xp_data, }, + { .compatible = "qcom,sda660", .data = &msm8937_data }, + { .compatible = "qcom,sdm450", .data = &msm8937_data }, { .compatible = "qcom,sdm630", .data = &msm8937_data }, + { .compatible = "qcom,sdm632", .data = &msm8937_data }, { .compatible = "qcom,sdm636", .data = &msm8937_data }, { .compatible = "qcom,sdm660", .data = &msm8937_data }, { .compatible = "qcom,sdm670", .data = &sdm670_data, }, @@ -258,6 +261,7 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,sm7125", .data = &sc7180_data }, { .compatible = "qcom,sm7150", .data = &sm7150_data, }, { .compatible = "qcom,sm7225", .data = &sm6350_data, }, + { .compatible = "qcom,sm7325", .data = &sc7280_data, }, { .compatible = "qcom,sm8150", .data = &sm8150_data, }, { .compatible = "qcom,sm8250", .data = &sm8250_data, }, { .compatible = "qcom,sm8350", .data = &sm8350_data, }, From ec770bb2e19196b28868698a81321d3a3c74da9d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 14 Aug 2025 10:22:22 +0300 Subject: [PATCH 0790/1307] soc: qcom: add configuration for MSM8929 MSM8929 is similar to MSM8939, it doesn't support UBWC. Provide no-UBWC config for the platform. Fixes: 197713d0cf01 ("soc: qcom: ubwc: provide no-UBWC configuration") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/668502/ Signed-off-by: Rob Clark --- drivers/soc/qcom/ubwc_config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 8b23b4d4e398..689e333ae443 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -227,6 +227,7 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,msm8916", .data = &no_ubwc_data }, { .compatible = "qcom,msm8917", .data = &no_ubwc_data }, { .compatible = "qcom,msm8937", .data = &msm8937_data }, + { .compatible = "qcom,msm8929", .data = &no_ubwc_data }, { .compatible = "qcom,msm8939", .data = &no_ubwc_data }, { .compatible = "qcom,msm8953", .data = &msm8937_data }, { .compatible = "qcom,msm8956", .data = &msm8937_data }, From 3cf6147f2b51a569761e1ef010efbd891e3a3a15 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 14 Aug 2025 10:22:23 +0300 Subject: [PATCH 0791/1307] soc: qcom: use no-UBWC config for MSM8956/76 Both MSM8956 and MSM8976 have MDSS 1.11 which doesn't support UBWC (although they also have Adreno 510, which might support UBWC). Disable UBWC support for those platforms. Fixes: 1924272b9ce1 ("soc: qcom: Add UBWC config provider") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/668503/ Signed-off-by: Rob Clark --- drivers/soc/qcom/ubwc_config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/ubwc_config.c b/drivers/soc/qcom/ubwc_config.c index 689e333ae443..15d373bff231 100644 --- a/drivers/soc/qcom/ubwc_config.c +++ b/drivers/soc/qcom/ubwc_config.c @@ -230,9 +230,9 @@ static const struct of_device_id qcom_ubwc_configs[] __maybe_unused = { { .compatible = "qcom,msm8929", .data = &no_ubwc_data }, { .compatible = "qcom,msm8939", .data = &no_ubwc_data }, { .compatible = "qcom,msm8953", .data = &msm8937_data }, - { .compatible = "qcom,msm8956", .data = &msm8937_data }, + { .compatible = "qcom,msm8956", .data = &no_ubwc_data }, { .compatible = "qcom,msm8974", .data = &no_ubwc_data }, - { .compatible = "qcom,msm8976", .data = &msm8937_data }, + { .compatible = "qcom,msm8976", .data = &no_ubwc_data }, { .compatible = "qcom,msm8996", .data = &msm8998_data }, { .compatible = "qcom,msm8998", .data = &msm8998_data }, { .compatible = "qcom,qcm2290", .data = &qcm2290_data, }, From ba0b7081f7a521d7c28b527a4f18666a148471e7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 22 Aug 2025 17:00:23 -0700 Subject: [PATCH 0792/1307] perf symbol-minimal: Fix ehdr reading in filename__read_build_id The e_ident is part of the ehdr and so reading it a second time would mean the read ehdr was displaced by 16-bytes. Switch from stdio to open/read/lseek syscalls for similarity with the symbol-elf version of the function and so that later changes can alter then open flags. Fixes: fef8f648bb47 ("perf symbol: Fix use-after-free in filename__read_build_id") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250823000024.724394-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/symbol-minimal.c | 55 ++++++++++++++++---------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7201494c5c20..8d41bd7842df 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -88,11 +87,8 @@ int filename__read_debuglink(const char *filename __maybe_unused, */ int filename__read_build_id(const char *filename, struct build_id *bid) { - FILE *fp; - int ret = -1; + int fd, ret = -1; bool need_swap = false, elf32; - u8 e_ident[EI_NIDENT]; - int i; union { struct { Elf32_Ehdr ehdr32; @@ -103,28 +99,27 @@ int filename__read_build_id(const char *filename, struct build_id *bid) Elf64_Phdr *phdr64; }; } hdrs; - void *phdr; - size_t phdr_size; - void *buf = NULL; - size_t buf_size = 0; + void *phdr, *buf = NULL; + ssize_t phdr_size, ehdr_size, buf_size = 0; - fp = fopen(filename, "r"); - if (fp == NULL) + fd = open(filename, O_RDONLY); + if (fd < 0) return -1; - if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + if (read(fd, hdrs.ehdr32.e_ident, EI_NIDENT) != EI_NIDENT) goto out; - if (memcmp(e_ident, ELFMAG, SELFMAG) || - e_ident[EI_VERSION] != EV_CURRENT) + if (memcmp(hdrs.ehdr32.e_ident, ELFMAG, SELFMAG) || + hdrs.ehdr32.e_ident[EI_VERSION] != EV_CURRENT) goto out; - need_swap = check_need_swap(e_ident[EI_DATA]); - elf32 = e_ident[EI_CLASS] == ELFCLASS32; + need_swap = check_need_swap(hdrs.ehdr32.e_ident[EI_DATA]); + elf32 = hdrs.ehdr32.e_ident[EI_CLASS] == ELFCLASS32; + ehdr_size = (elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64)) - EI_NIDENT; - if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64, - elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64), - 1, fp) != 1) + if (read(fd, + (elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64) + EI_NIDENT, + ehdr_size) != ehdr_size) goto out; if (need_swap) { @@ -138,14 +133,18 @@ int filename__read_build_id(const char *filename, struct build_id *bid) hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } } - phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum - : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum; + if ((elf32 && hdrs.ehdr32.e_phentsize != sizeof(Elf32_Phdr)) || + (!elf32 && hdrs.ehdr64.e_phentsize != sizeof(Elf64_Phdr))) + goto out; + + phdr_size = elf32 ? sizeof(Elf32_Phdr) * hdrs.ehdr32.e_phnum + : sizeof(Elf64_Phdr) * hdrs.ehdr64.e_phnum; phdr = malloc(phdr_size); if (phdr == NULL) goto out; - fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); - if (fread(phdr, phdr_size, 1, fp) != 1) + lseek(fd, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (read(fd, phdr, phdr_size) != phdr_size) goto out_free; if (elf32) @@ -153,8 +152,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid) else hdrs.phdr64 = phdr; - for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) { - size_t p_filesz; + for (int i = 0; i < (elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum); i++) { + ssize_t p_filesz; if (need_swap) { if (elf32) { @@ -180,8 +179,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; buf = tmp; } - fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); - if (fread(buf, p_filesz, 1, fp) != 1) + lseek(fd, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (read(fd, buf, p_filesz) != p_filesz) goto out_free; ret = read_build_id(buf, p_filesz, bid, need_swap); @@ -194,7 +193,7 @@ int filename__read_build_id(const char *filename, struct build_id *bid) free(buf); free(phdr); out: - fclose(fp); + close(fd); return ret; } From 2c369d91d0933aaff96b6b807b22363e6a38a625 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 22 Aug 2025 17:00:24 -0700 Subject: [PATCH 0793/1307] perf symbol: Add blocking argument to filename__read_build_id When synthesizing build-ids, for build ID mmap2 events, they will be added for data mmaps if -d/--data is specified. The files opened for their build IDs may block on the open causing perf to hang during synthesis. There is some robustness in existing calls to filename__read_build_id by checking the file path is to a regular file, which unfortunately fails for symlinks. Rather than adding more is_regular_file calls, switch filename__read_build_id to take a "block" argument and specify O_NONBLOCK when this is false. The existing is_regular_file checking callers and the event synthesis callers are made to pass false and thereby avoiding the hang. Fixes: 53b00ff358dc ("perf record: Make --buildid-mmap the default") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250823000024.724394-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/bench/inject-buildid.c | 2 +- tools/perf/builtin-buildid-cache.c | 8 ++++---- tools/perf/builtin-inject.c | 4 ++-- tools/perf/tests/sdt.c | 2 +- tools/perf/util/build-id.c | 4 ++-- tools/perf/util/debuginfo.c | 8 ++++++-- tools/perf/util/dsos.c | 4 ++-- tools/perf/util/symbol-elf.c | 9 +++++---- tools/perf/util/symbol-minimal.c | 6 +++--- tools/perf/util/symbol.c | 8 ++++---- tools/perf/util/symbol.h | 2 +- tools/perf/util/synthetic-events.c | 2 +- 12 files changed, 32 insertions(+), 27 deletions(-) diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index aad572a78d7f..12387ea88b9a 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -85,7 +85,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, if (typeflag == FTW_D || typeflag == FTW_SL) return 0; - if (filename__read_build_id(fpath, &bid) < 0) + if (filename__read_build_id(fpath, &bid, /*block=*/true) < 0) return 0; dso->name = realpath(fpath, NULL); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index c98104481c8a..2e0f2004696a 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -180,7 +180,7 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) struct nscookie nsc; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -204,7 +204,7 @@ static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -280,7 +280,7 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) if (!dso__build_id_filename(dso, filename, sizeof(filename), false)) return true; - if (filename__read_build_id(filename, &bid) == -1) { + if (filename__read_build_id(filename, &bid, /*block=*/true) == -1) { if (errno == ENOENT) return false; @@ -309,7 +309,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 40ba6a94f719..a114b3fa1bea 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -680,12 +680,12 @@ static int dso__read_build_id(struct dso *dso) mutex_lock(dso__lock(dso)); nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), &bid) > 0) + if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); else if (dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, &bid) > 0) + if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); free(new_name); } diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 93baee2eae42..6132f1af3e22 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -31,7 +31,7 @@ static int build_id_cache__add_file(const char *filename) struct build_id bid = { .size = 0, }; int err; - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); if (err < 0) { pr_debug("Failed to read build id of %s\n", filename); return err; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index a7018a3b0437..bf7f3268b9a2 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -115,7 +115,7 @@ int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sb struct build_id bid = { .size = 0, }; int ret; - ret = filename__read_build_id(pathname, &bid); + ret = filename__read_build_id(pathname, &bid, /*block=*/true); if (ret < 0) return ret; @@ -841,7 +841,7 @@ static int filename__read_build_id_ns(const char *filename, int ret; nsinfo__mountns_enter(nsi, &nsc); - ret = filename__read_build_id(filename, bid); + ret = filename__read_build_id(filename, bid, /*block=*/true); nsinfo__mountns_exit(&nsc); return ret; diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c index a44c70f93156..bb9ebd84ec2d 100644 --- a/tools/perf/util/debuginfo.c +++ b/tools/perf/util/debuginfo.c @@ -110,8 +110,12 @@ struct debuginfo *debuginfo__new(const char *path) if (!dso) goto out; - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + /* + * Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO. Don't block + * incase the path isn't for a regular file. + */ + assert(!dso__has_build_id(dso)); + if (filename__read_build_id(path, &bid, /*block=*/false) > 0) dso__set_build_id(dso, &bid); for (type = distro_dwarf_types; diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 0a7645c7fae7..64c1d65b0149 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -81,13 +81,13 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data) return 0; } nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), &bid) > 0) { + if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) { dso__set_build_id(dso, &bid); args->have_build_id = true; } else if (errno == ENOENT && dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, &bid) > 0) { + if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) { dso__set_build_id(dso, &bid); args->have_build_id = true; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6d2c280a1730..033c79231a54 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -902,7 +902,7 @@ static int read_build_id(const char *filename, struct build_id *bid) #else // HAVE_LIBBFD_BUILDID_SUPPORT -static int read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid, bool block) { size_t size = sizeof(bid->data); int fd, err = -1; @@ -911,7 +911,7 @@ static int read_build_id(const char *filename, struct build_id *bid) if (size < BUILD_ID_SIZE) goto out; - fd = open(filename, O_RDONLY); + fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); if (fd < 0) goto out; @@ -934,7 +934,7 @@ static int read_build_id(const char *filename, struct build_id *bid) #endif // HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, struct build_id *bid) +int filename__read_build_id(const char *filename, struct build_id *bid, bool block) { struct kmod_path m = { .name = NULL, }; char path[PATH_MAX]; @@ -958,9 +958,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) } close(fd); filename = path; + block = true; } - err = read_build_id(filename, bid); + err = read_build_id(filename, bid, block); if (m.comp) unlink(filename); diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 8d41bd7842df..41e4ebe5eac5 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -85,7 +85,7 @@ int filename__read_debuglink(const char *filename __maybe_unused, /* * Just try PT_NOTE header otherwise fails */ -int filename__read_build_id(const char *filename, struct build_id *bid) +int filename__read_build_id(const char *filename, struct build_id *bid, bool block) { int fd, ret = -1; bool need_swap = false, elf32; @@ -102,7 +102,7 @@ int filename__read_build_id(const char *filename, struct build_id *bid) void *phdr, *buf = NULL; ssize_t phdr_size, ehdr_size, buf_size = 0; - fd = open(filename, O_RDONLY); + fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); if (fd < 0) return -1; @@ -323,7 +323,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, if (ret >= 0) RC_CHK_ACCESS(dso)->is_64_bit = ret; - if (filename__read_build_id(ss->name, &bid) > 0) + if (filename__read_build_id(ss->name, &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e816e4220d33..3fed54de5401 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1869,14 +1869,14 @@ int dso__load(struct dso *dso, struct map *map) /* * Read the build id if possible. This is required for - * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work + * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work. Don't block in case path + * isn't for a regular file. */ - if (!dso__has_build_id(dso) && - is_regular_file(dso__long_name(dso))) { + if (!dso__has_build_id(dso)) { struct build_id bid = { .size = 0, }; __symbol__join_symfs(name, PATH_MAX, dso__long_name(dso)); - if (filename__read_build_id(name, &bid) > 0) + if (filename__read_build_id(name, &bid, /*block=*/false) > 0) dso__set_build_id(dso, &bid); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 3fb5d146d9b1..347106218799 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -140,7 +140,7 @@ struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); -int filename__read_build_id(const char *filename, struct build_id *id); +int filename__read_build_id(const char *filename, struct build_id *id, bool block); int sysfs__read_build_id(const char *filename, struct build_id *bid); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index cb2c1ace304a..fcd1fd13c30e 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -401,7 +401,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, nsi = nsinfo__new(event->pid); nsinfo__mountns_enter(nsi, &nc); - rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1; + rc = filename__read_build_id(event->filename, &bid, /*block=*/false) > 0 ? 0 : -1; nsinfo__mountns_exit(&nc); nsinfo__put(nsi); From 70c1595c181c48a022756116a6c46d5e8bad2c6f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 21 Aug 2025 16:23:14 +0900 Subject: [PATCH 0794/1307] ata: ahci: Allow ignoring the external/hotplug capability of ports Commit 4edf1505b76d ("ata: ahci: Disallow LPM policy control for external ports") introduced disabling link power management (LPM) for ports that are advertized as external/hotplug capable. This is necessary to force the maximum power policy (ATA_LPM_MAX_POWER) onto the port link to ensure that the hotplug capability of the port is functional. However, doing so blindly for all ports can prevent systems from going into a low power state, even if the external/hotplug ports on the system are unused. E.g., a laptop may see the internal SATA slot of a docking station as an external hotplug capable port, and in such case, the user may prefer to not use the port and to favor instead enabling LPM to allow the laptop to transition to low power states. Since there is no easy method to automatically detect such choice, introduce the new mask_port_ext module parameter to allow a user to ignore the external/hotplug capability of a port. The format for this parameter value is identical to the format used for the mask_port_map parameter: a mask can be defined for all AHCI adapters of a system or for a particular adapters identified with their PCI IDs (bus:dev.func format). The function ahci_get_port_map_mask() is renamed to ahci_get_port_mask() and modified to return a mask, either for the port map mask of an adapter (to ignore ports) or for the external/hotplug capability of an adapter. Differentiation between map_port_mask and map_port_ext_mask is done by passing the parameter string to ahci_get_port_mask() as a second argument. To be consistent with this change, the function ahci_apply_port_map_mask() is renamed ahci_port_mask() and changed to return a mask value. The mask for the external/hotplug capability for an adapter, if defined by the map_port_ext_mask parameter, is stored in the new field mask_port_ext of struct ahci_host_priv. ahci_mark_external_port() is modified to not set the ATA_PFLAG_EXTERNAL flag for a port if hpriv->mask_port_ext includes the number of the port. In such case, an information message is printed to notify that the external/hotplug capability is being ignored. Reported-by: Dieter Mummenschanz Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220465 Fixes: 4edf1505b76d ("ata: ahci: Disallow LPM policy control for external ports") Signed-off-by: Damien Le Moal Tested-by: Dieter Mummenschanz --- drivers/ata/ahci.c | 57 ++++++++++++++++++++++++++++++++-------------- drivers/ata/ahci.h | 1 + 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e1c24bbacf64..7a7f88b3fa2b 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -689,40 +689,50 @@ MODULE_PARM_DESC(mask_port_map, "where is the PCI ID of an AHCI controller in the " "form \"domain:bus:dev.func\""); -static void ahci_apply_port_map_mask(struct device *dev, - struct ahci_host_priv *hpriv, char *mask_s) +static char *ahci_mask_port_ext; +module_param_named(mask_port_ext, ahci_mask_port_ext, charp, 0444); +MODULE_PARM_DESC(mask_port_ext, + "32-bits mask to ignore the external/hotplug capability of ports. " + "Valid values are: " + "\"\" to apply the same mask to all AHCI controller " + "devices, and \"=,=,...\" to " + "specify different masks for the controllers specified, " + "where is the PCI ID of an AHCI controller in the " + "form \"domain:bus:dev.func\""); + +static u32 ahci_port_mask(struct device *dev, char *mask_s) { unsigned int mask; if (kstrtouint(mask_s, 0, &mask)) { dev_err(dev, "Invalid port map mask\n"); - return; + return 0; } - hpriv->mask_port_map = mask; + return mask; } -static void ahci_get_port_map_mask(struct device *dev, - struct ahci_host_priv *hpriv) +static u32 ahci_get_port_mask(struct device *dev, char *mask_p) { char *param, *end, *str, *mask_s; char *name; + u32 mask = 0; - if (!strlen(ahci_mask_port_map)) - return; + if (!mask_p || !strlen(mask_p)) + return 0; - str = kstrdup(ahci_mask_port_map, GFP_KERNEL); + str = kstrdup(mask_p, GFP_KERNEL); if (!str) - return; + return 0; /* Handle single mask case */ if (!strchr(str, '=')) { - ahci_apply_port_map_mask(dev, hpriv, str); + mask = ahci_port_mask(dev, str); goto free; } /* - * Mask list case: parse the parameter to apply the mask only if + * Mask list case: parse the parameter to get the mask only if * the device name matches. */ param = str; @@ -752,11 +762,13 @@ static void ahci_get_port_map_mask(struct device *dev, param++; } - ahci_apply_port_map_mask(dev, hpriv, mask_s); + mask = ahci_port_mask(dev, mask_s); } free: kfree(str); + + return mask; } static void ahci_pci_save_initial_config(struct pci_dev *pdev, @@ -782,8 +794,10 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, } /* Handle port map masks passed as module parameter. */ - if (ahci_mask_port_map) - ahci_get_port_map_mask(&pdev->dev, hpriv); + hpriv->mask_port_map = + ahci_get_port_mask(&pdev->dev, ahci_mask_port_map); + hpriv->mask_port_ext = + ahci_get_port_mask(&pdev->dev, ahci_mask_port_ext); ahci_save_initial_config(&pdev->dev, hpriv); } @@ -1757,11 +1771,20 @@ static void ahci_mark_external_port(struct ata_port *ap) void __iomem *port_mmio = ahci_port_base(ap); u32 tmp; - /* mark external ports (hotplug-capable, eSATA) */ + /* + * Mark external ports (hotplug-capable, eSATA), unless we were asked to + * ignore this feature. + */ tmp = readl(port_mmio + PORT_CMD); if (((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)) || - (tmp & PORT_CMD_HPCP)) + (tmp & PORT_CMD_HPCP)) { + if (hpriv->mask_port_ext & (1U << ap->port_no)) { + ata_port_info(ap, + "Ignoring external/hotplug capability\n"); + return; + } ap->pflags |= ATA_PFLAG_EXTERNAL; + } } static void ahci_update_initial_lpm_policy(struct ata_port *ap) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 2c10c8f440d1..293b7fb216b5 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -330,6 +330,7 @@ struct ahci_host_priv { /* Input fields */ unsigned int flags; /* AHCI_HFLAG_* */ u32 mask_port_map; /* Mask of valid ports */ + u32 mask_port_ext; /* Mask of ports ext capability */ void __iomem * mmio; /* bus-independent mem map */ u32 cap; /* cap to use */ From d280233fc86692f495d5e08092e5422bc2f583a8 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Aug 2025 16:28:05 +0530 Subject: [PATCH 0795/1307] Octeontx2-af: Fix NIX X2P calibration failures Before configuring the NIX block, the AF driver initiates the "NIX block X2P bus calibration" and verifies that NIX interfaces such as CGX and LBK are active and functioning correctly. On few silicon variants(CNF10KA and CNF10KB), X2P calibration failures have been observed on some CGX blocks that are not mapped to the NIX block. Since both NIX-mapped and non-NIX-mapped CGX blocks share the same VENDOR,DEVICE,SUBSYS_DEVID, it's not possible to skip probe based on these parameters. This patch introuduces "is_cgx_mapped_to_nix" API to detect and skip probe of non NIX mapped CGX blocks. Fixes: aba53d5dbcea ("octeontx2-af: NIX block admin queue init") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20250822105805.2236528-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 7 +++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 4ff19a04b23e..0c46ba8a5adc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1978,6 +1978,13 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_release_regions; } + if (!is_cn20k(pdev) && + !is_cgx_mapped_to_nix(pdev->subsystem_device, cgx->cgx_id)) { + dev_notice(dev, "CGX %d not mapped to NIX, skipping probe\n", + cgx->cgx_id); + goto err_release_regions; + } + cgx->lmac_count = cgx->mac_ops->get_nr_lmacs(cgx); if (!cgx->lmac_count) { dev_notice(dev, "CGX %d LMAC count is zero, skipping probe\n", cgx->cgx_id); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 7ee1fdeb5295..18c7bb39dbc7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -783,6 +783,20 @@ static inline bool is_cn10kb(struct rvu *rvu) return false; } +static inline bool is_cgx_mapped_to_nix(unsigned short id, u8 cgx_id) +{ + /* On CNF10KA and CNF10KB silicons only two CGX blocks are connected + * to NIX. + */ + if (id == PCI_SUBSYS_DEVID_CNF10K_A || id == PCI_SUBSYS_DEVID_CNF10K_B) + return cgx_id <= 1; + + return !(cgx_id && !(id == PCI_SUBSYS_DEVID_96XX || + id == PCI_SUBSYS_DEVID_98XX || + id == PCI_SUBSYS_DEVID_CN10K_A || + id == PCI_SUBSYS_DEVID_CN10K_B)); +} + static inline bool is_rvu_npc_hash_extract_en(struct rvu *rvu) { u64 npc_const3; From 97766512a9951b9fd6fc97f1b93211642bb0b220 Mon Sep 17 00:00:00 2001 From: Vladimir Riabchun Date: Fri, 22 Aug 2025 20:11:36 +0200 Subject: [PATCH 0796/1307] mISDN: hfcpci: Fix warning when deleting uninitialized timer With CONFIG_DEBUG_OBJECTS_TIMERS unloading hfcpci module leads to the following splat: [ 250.215892] ODEBUG: assert_init not available (active state 0) object: ffffffffc01a3dc0 object type: timer_list hint: 0x0 [ 250.217520] WARNING: CPU: 0 PID: 233 at lib/debugobjects.c:612 debug_print_object+0x1b6/0x2c0 [ 250.218775] Modules linked in: hfcpci(-) mISDN_core [ 250.219537] CPU: 0 UID: 0 PID: 233 Comm: rmmod Not tainted 6.17.0-rc2-g6f713187ac98 #2 PREEMPT(voluntary) [ 250.220940] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 250.222377] RIP: 0010:debug_print_object+0x1b6/0x2c0 [ 250.223131] Code: fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 75 4f 41 56 48 8b 14 dd a0 4e 01 9f 48 89 ee 48 c7 c7 20 46 01 9f e8 cb 84d [ 250.225805] RSP: 0018:ffff888015ea7c08 EFLAGS: 00010286 [ 250.226608] RAX: 0000000000000000 RBX: 0000000000000005 RCX: ffffffff9be93a95 [ 250.227708] RDX: 1ffff1100d945138 RSI: 0000000000000008 RDI: ffff88806ca289c0 [ 250.228993] RBP: ffffffff9f014a00 R08: 0000000000000001 R09: ffffed1002bd4f39 [ 250.230043] R10: ffff888015ea79cf R11: 0000000000000001 R12: 0000000000000001 [ 250.231185] R13: ffffffff9eea0520 R14: 0000000000000000 R15: ffff888015ea7cc8 [ 250.232454] FS: 00007f3208f01540(0000) GS:ffff8880caf5a000(0000) knlGS:0000000000000000 [ 250.233851] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 250.234856] CR2: 00007f32090a7421 CR3: 0000000004d63000 CR4: 00000000000006f0 [ 250.236117] Call Trace: [ 250.236599] [ 250.236967] ? trace_irq_enable.constprop.0+0xd4/0x130 [ 250.237920] debug_object_assert_init+0x1f6/0x310 [ 250.238762] ? __pfx_debug_object_assert_init+0x10/0x10 [ 250.239658] ? __lock_acquire+0xdea/0x1c70 [ 250.240369] __try_to_del_timer_sync+0x69/0x140 [ 250.241172] ? __pfx___try_to_del_timer_sync+0x10/0x10 [ 250.242058] ? __timer_delete_sync+0xc6/0x120 [ 250.242842] ? lock_acquire+0x30/0x80 [ 250.243474] ? __timer_delete_sync+0xc6/0x120 [ 250.244262] __timer_delete_sync+0x98/0x120 [ 250.245015] HFC_cleanup+0x10/0x20 [hfcpci] [ 250.245704] __do_sys_delete_module+0x348/0x510 [ 250.246461] ? __pfx___do_sys_delete_module+0x10/0x10 [ 250.247338] do_syscall_64+0xc1/0x360 [ 250.247924] entry_SYSCALL_64_after_hwframe+0x77/0x7f Fix this by initializing hfc_tl timer with DEFINE_TIMER macro. Also, use mod_timer instead of manual timeout update. Fixes: 87c5fa1bb426 ("mISDN: Add different different timer settings for hfc-pci") Fixes: 175302f6b79e ("mISDN: hfcpci: Fix use-after-free bug in hfcpci_softirq") Signed-off-by: Vladimir Riabchun Link: https://patch.msgid.link/aKiy2D_LiWpQ5kXq@vova-pc Signed-off-by: Jakub Kicinski --- drivers/isdn/hardware/mISDN/hfcpci.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 2b05722d4dbe..ea8a0ab47afd 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -39,12 +39,13 @@ #include "hfc_pci.h" +static void hfcpci_softirq(struct timer_list *unused); static const char *hfcpci_revision = "2.0"; static int HFC_cnt; static uint debug; static uint poll, tics; -static struct timer_list hfc_tl; +static DEFINE_TIMER(hfc_tl, hfcpci_softirq); static unsigned long hfc_jiffies; MODULE_AUTHOR("Karsten Keil"); @@ -2305,8 +2306,7 @@ hfcpci_softirq(struct timer_list *unused) hfc_jiffies = jiffies + 1; else hfc_jiffies += tics; - hfc_tl.expires = hfc_jiffies; - add_timer(&hfc_tl); + mod_timer(&hfc_tl, hfc_jiffies); } static int __init @@ -2332,10 +2332,8 @@ HFC_init(void) if (poll != HFCPCI_BTRANS_THRESHOLD) { printk(KERN_INFO "%s: Using alternative poll value of %d\n", __func__, poll); - timer_setup(&hfc_tl, hfcpci_softirq, 0); - hfc_tl.expires = jiffies + tics; - hfc_jiffies = hfc_tl.expires; - add_timer(&hfc_tl); + hfc_jiffies = jiffies + tics; + mod_timer(&hfc_tl, hfc_jiffies); } else tics = 0; /* indicate the use of controller's timer */ From 007a5ffadc4fd51739527f1503b7cf048f31c413 Mon Sep 17 00:00:00 2001 From: Yeounsu Moon Date: Sun, 24 Aug 2025 03:29:24 +0900 Subject: [PATCH 0797/1307] net: dlink: fix multicast stats being counted incorrectly `McstFramesRcvdOk` counts the number of received multicast packets, and it reports the value correctly. However, reading `McstFramesRcvdOk` clears the register to zero. As a result, the driver was reporting only the packets since the last read, instead of the accumulated total. Fix this by updating the multicast statistics accumulatively instaed of instantaneously. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-on: D-Link DGE-550T Rev-A3 Signed-off-by: Yeounsu Moon Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250823182927.6063-3-yyyynoom@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/dl2k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index cc60ee454bf9..6bbf6e5584e5 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -1099,7 +1099,7 @@ get_stats (struct net_device *dev) dev->stats.rx_bytes += dr32(OctetRcvOk); dev->stats.tx_bytes += dr32(OctetXmtOk); - dev->stats.multicast = dr32(McstFramesRcvdOk); + dev->stats.multicast += dr32(McstFramesRcvdOk); dev->stats.collisions += dr32(SingleColFrames) + dr32(MultiColFrames); From a39d13e291c2681e475d9fd41655764dab09be7b Mon Sep 17 00:00:00 2001 From: Liming Wu Date: Thu, 31 Jul 2025 17:27:57 +0800 Subject: [PATCH 0798/1307] virtio_pci: Fix misleading comment for queue vector This patch fixes misleading comments in both legacy and modern virtio-pci device implementations. The comments previously referred to the "config vector" for parameters and return values of the `vp_legacy_queue_vector()` and `vp_modern_queue_vector()` functions, which is incorrect. Signed-off-by: Liming Wu Message-Id: <20250731092757.1000-1-liming.wu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_legacy_dev.c | 4 ++-- drivers/virtio/virtio_pci_modern_dev.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c index 677d1f68bc9b..bbbf89c22880 100644 --- a/drivers/virtio/virtio_pci_legacy_dev.c +++ b/drivers/virtio/virtio_pci_legacy_dev.c @@ -140,9 +140,9 @@ EXPORT_SYMBOL_GPL(vp_legacy_set_status); * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue * @ldev: the legacy virtio-pci device * @index: queue index - * @vector: the config vector + * @vector: the queue vector * - * Returns the config vector read from the device + * Returns the queue vector read from the device */ u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, u16 index, u16 vector) diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index d665f8f73ea8..9e503b7a58d8 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -546,9 +546,9 @@ EXPORT_SYMBOL_GPL(vp_modern_set_queue_reset); * vp_modern_queue_vector - set the MSIX vector for a specific virtqueue * @mdev: the modern virtio-pci device * @index: queue index - * @vector: the config vector + * @vector: the queue vector * - * Returns the config vector read from the device + * Returns the queue vector read from the device */ u16 vp_modern_queue_vector(struct virtio_pci_modern_device *mdev, u16 index, u16 vector) From dd54bcf86c91a4455b1f95cbc8e9ac91205f3193 Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Tue, 5 Aug 2025 16:09:17 +0300 Subject: [PATCH 0799/1307] vhost/net: Protect ubufs with rcu read lock in vhost_net_ubuf_put() When operating on struct vhost_net_ubuf_ref, the following execution sequence is theoretically possible: CPU0 is finalizing DMA operation CPU1 is doing VHOST_NET_SET_BACKEND // ubufs->refcount == 2 vhost_net_ubuf_put() vhost_net_ubuf_put_wait_and_free(oldubufs) vhost_net_ubuf_put_and_wait() vhost_net_ubuf_put() int r = atomic_sub_return(1, &ubufs->refcount); // r = 1 int r = atomic_sub_return(1, &ubufs->refcount); // r = 0 wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); // no wait occurs here because condition is already true kfree(ubufs); if (unlikely(!r)) wake_up(&ubufs->wait); // use-after-free This leads to use-after-free on ubufs access. This happens because CPU1 skips waiting for wake_up() when refcount is already zero. To prevent that use a read-side RCU critical section in vhost_net_ubuf_put(), as suggested by Hillf Danton. For this lock to take effect, free ubufs with kfree_rcu(). Cc: stable@vger.kernel.org Fixes: 0ad8b480d6ee9 ("vhost: fix ref cnt checking deadlock") Reported-by: Andrey Ryabinin Suggested-by: Hillf Danton Signed-off-by: Nikolay Kuratov Message-Id: <20250805130917.727332-1-kniv@yandex-team.ru> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 6edac0c1ba9b..c6508fe0d5c8 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -99,6 +99,7 @@ struct vhost_net_ubuf_ref { atomic_t refcount; wait_queue_head_t wait; struct vhost_virtqueue *vq; + struct rcu_head rcu; }; #define VHOST_NET_BATCH 64 @@ -250,9 +251,13 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) { - int r = atomic_sub_return(1, &ubufs->refcount); + int r; + + rcu_read_lock(); + r = atomic_sub_return(1, &ubufs->refcount); if (unlikely(!r)) wake_up(&ubufs->wait); + rcu_read_unlock(); return r; } @@ -265,7 +270,7 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) { vhost_net_ubuf_put_and_wait(ubufs); - kfree(ubufs); + kfree_rcu(ubufs, rcu); } static void vhost_net_clear_ubuf_info(struct vhost_net *n) From ced17ee32a9988b8a260628e7c31a100d7dc082e Mon Sep 17 00:00:00 2001 From: Igor Torrente Date: Thu, 7 Aug 2025 09:41:45 -0300 Subject: [PATCH 0800/1307] Revert "virtio: reject shm region if length is zero" The commit 206cc44588f7 ("virtio: reject shm region if length is zero") breaks the Virtio-gpu `host_visible` feature. As you can see in the snippet below, host_visible_region is zero because of the `kzalloc`. It's using the `vm_get_shm_region` (drivers/virtio/virtio_mmio.c:536) to read the `addr` and `len` from qemu/crosvm. ``` drivers/gpu/drm/virtio/virtgpu_kms.c 132 vgdev = drmm_kzalloc(dev, sizeof(struct virtio_gpu_device), GFP_KERNEL); [...] 177 if (virtio_get_shm_region(vgdev->vdev, &vgdev->host_visible_region, 178 VIRTIO_GPU_SHM_ID_HOST_VISIBLE)) { ``` Now it always fails. To fix, revert the offending commit. Fixes: 206cc44588f7 ("virtio: reject shm region if length is zero") Signed-off-by: Igor Torrente Message-Id: <20250807124145.81816-1-igor.torrente@collabora.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 918cf25cd3c6..8bf156dde554 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -328,8 +328,6 @@ static inline bool virtio_get_shm_region(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id) { - if (!region->len) - return false; if (!vdev->config->get_shm_region) return false; return vdev->config->get_shm_region(vdev, region, id); From 24fc631539cc78225f5c61f99c7666fcff48024d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 23:39:57 -0700 Subject: [PATCH 0801/1307] vhost: Fix ioctl # for VHOST_[GS]ET_FORK_FROM_OWNER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VHOST_[GS]ET_FEATURES_ARRAY ioctl already took 0x83 and it would result in a build error when the vhost uapi header is used for perf tool build like below. In file included from trace/beauty/ioctl.c:93: tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c: In function ‘ioctl__scnprintf_vhost_virtio_cmd’: tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c:36:18: error: initialized field overwritten [-Werror=override-init] 36 | [0x83] = "SET_FORK_FROM_OWNER", | ^~~~~~~~~~~~~~~~~~~~~ tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c:36:18: note: (near initialization for ‘vhost_virtio_ioctl_cmds[131]’) Fixes: 7d9896e9f6d02d8a ("vhost: Reintroduce kthread API and add mode selection") Signed-off-by: Namhyung Kim Message-Id: <20250819063958.833770-1-namhyung@kernel.org> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- include/uapi/linux/vhost.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 283348b64af9..c57674a6aa0d 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -260,7 +260,7 @@ * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: * - Vhost will create vhost workers as kernel threads. */ -#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x83, __u8) +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8) /** * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. @@ -268,6 +268,6 @@ * * @return: An 8-bit value indicating the current thread mode. */ -#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x84, __u8) +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8) #endif From 528d92bfc0937a6a1ec837dbbcb3612a8545cd37 Mon Sep 17 00:00:00 2001 From: Ying Gao Date: Tue, 12 Aug 2025 17:51:18 +0800 Subject: [PATCH 0802/1307] virtio_input: Improve freeze handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When executing suspend to ram, if lacking the operations to reset device and free unused buffers before deleting a vq, resource leaks and inconsistent device status will appear. According to chapter "3.3.1 Driver Requirements: Device Cleanup:" of virtio-specification: Driver MUST ensure a virtqueue isn’t live (by device reset) before removing exposed buffers. Therefore, modify the virtinput_freeze function to reset the device and delete the unused buffers before deleting the virtqueue, just like virtinput_remove does. Co-developed-by: Ying Xu Signed-off-by: Ying Xu Co-developed-by: Junnan Wu Signed-off-by: Junnan Wu Signed-off-by: Ying Gao Message-Id: <20250812095118.3622717-1-ying01.gao@samsung.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_input.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index a5d63269f20b..d0728285b6ce 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -360,11 +360,15 @@ static int virtinput_freeze(struct virtio_device *vdev) { struct virtio_input *vi = vdev->priv; unsigned long flags; + void *buf; spin_lock_irqsave(&vi->lock, flags); vi->ready = false; spin_unlock_irqrestore(&vi->lock, flags); + virtio_reset_device(vdev); + while ((buf = virtqueue_detach_unused_buf(vi->sts)) != NULL) + kfree(buf); vdev->config->del_vqs(vdev); return 0; } From 45d8ef6322b8a828d3b1e2cfb8893e2ff882cb23 Mon Sep 17 00:00:00 2001 From: Junnan Wu Date: Tue, 12 Aug 2025 17:08:17 +0800 Subject: [PATCH 0803/1307] virtio_net: adjust the execution order of function `virtnet_close` during freeze "Use after free" issue appears in suspend once race occurs when napi poll scheduls after `netif_device_detach` and before napi disables. For details, during suspend flow of virtio-net, the tx queue state is set to "__QUEUE_STATE_DRV_XOFF" by CPU-A. And at some coincidental times, if a TCP connection is still working, CPU-B does `virtnet_poll` before napi disable. In this flow, the state "__QUEUE_STATE_DRV_XOFF" of tx queue will be cleared. This is not the normal process it expects. After that, CPU-A continues to close driver then virtqueue is removed. Sequence likes below: -------------------------------------------------------------------------- CPU-A CPU-B ----- ----- suspend is called A TCP based on virtio-net still work virtnet_freeze |- virtnet_freeze_down | |- netif_device_detach | | |- netif_tx_stop_all_queues | | |- netif_tx_stop_queue | | |- set_bit | | (__QUEUE_STATE_DRV_XOFF,...) | | softirq rasied | | |- net_rx_action | | |- napi_poll | | |- virtnet_poll | | |- virtnet_poll_cleantx | | |- netif_tx_wake_queue | | |- test_and_clear_bit | | (__QUEUE_STATE_DRV_XOFF,...) | |- virtnet_close | |- virtnet_disable_queue_pair | |- virtnet_napi_tx_disable |- remove_vq_common -------------------------------------------------------------------------- When TCP delayack timer is up, a cpu gets softirq and irq handler `tcp_delack_timer_handler` will be called, which will finally call `start_xmit` in virtio net driver. Then the access to tx virtq will cause panic. The root cause of this issue is that napi tx is not disable before `netif_tx_stop_queue`, once `virnet_poll` schedules in such coincidental time, the tx queue state will be cleared. To solve this issue, adjusts the order of function `virtnet_close` in `virtnet_freeze_down`. Co-developed-by: Ying Xu Signed-off-by: Ying Xu Signed-off-by: Junnan Wu Message-Id: <20250812090817.3463403-1-junnan01.wu@samsung.com> Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d14e6d602273..975bdc5dab84 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5758,14 +5758,15 @@ static void virtnet_freeze_down(struct virtio_device *vdev) disable_rx_mode_work(vi); flush_work(&vi->rx_mode_work); - netif_tx_lock_bh(vi->dev); - netif_device_detach(vi->dev); - netif_tx_unlock_bh(vi->dev); if (netif_running(vi->dev)) { rtnl_lock(); virtnet_close(vi->dev); rtnl_unlock(); } + + netif_tx_lock_bh(vi->dev); + netif_device_detach(vi->dev); + netif_tx_unlock_bh(vi->dev); } static int init_vqs(struct virtnet_info *vi); From b3dcc9d1d806fb1e175f85978713eef868531da4 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 26 Aug 2025 10:19:46 +0300 Subject: [PATCH 0804/1307] memblock: fix kernel-doc for MEMBLOCK_RSRV_NOINIT The kernel-doc description of MEMBLOCK_RSRV_NOINIT and memblock_reserved_mark_noinit() do not accurately describe their functionality. Expand their kernel doc to make it clear that the user of MEMBLOCK_RSRV_NOINIT is responsible to properly initialize the struct pages for such regions and add more details about effects of using this flag. Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/f8140a17-c4ec-489b-b314-d45abe48bf36@redhat.com Link: https://lore.kernel.org/r/20250826071947.1949725-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/memblock.h | 5 +++-- mm/memblock.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b96746376e17..fcda8481de9a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,8 +40,9 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. - * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are - * not initialized (only for reserved regions). + * @MEMBLOCK_RSRV_NOINIT: reserved memory region for which struct pages are not + * fully initialized. Users of this flag are responsible to properly initialize + * struct pages of this region * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use, * either explictitly with memblock_reserve_kern() or via memblock * allocation APIs. All memblock allocations set this flag. diff --git a/mm/memblock.c b/mm/memblock.c index 8a0ed3074af4..117d963e677c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1091,13 +1091,20 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) /** * memblock_reserved_mark_noinit - Mark a reserved memory region with flag - * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized - * for this region. + * MEMBLOCK_RSRV_NOINIT + * * @base: the base phys addr of the region * @size: the size of the region * - * struct pages will not be initialized for reserved memory regions marked with - * %MEMBLOCK_RSRV_NOINIT. + * The struct pages for the reserved regions marked %MEMBLOCK_RSRV_NOINIT will + * not be fully initialized to allow the caller optimize their initialization. + * + * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, setting this flag + * completely bypasses the initialization of struct pages for such region. + * + * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is disabled, struct pages in this + * region will be initialized with default values but won't be marked as + * reserved. * * Return: 0 on success, -errno on failure. */ From 1148bb0c5827e4b6fcc50358783608b0f2080302 Mon Sep 17 00:00:00 2001 From: Daniel Dadap Date: Mon, 25 Aug 2025 21:48:38 -0500 Subject: [PATCH 0805/1307] ALSA: hda/hdmi: Restore missing HDMI codec entries Commit ad781b550f9a ("ALSA: hda/hdmi: Rewrite to new probe method") rewrote the HDMI codec ID tables to a new format. In doing so, recently added codec IDs from commit e0a911ac868 ("ALSA: hda: Add missing NVIDIA HDA codec IDs") were dropped from the tables. These tables had recently been split from the unified table that existed in patch_hdmi.c, and did contain the entries in question after the split but before the codec ID entries were rewritten to the new format. Restore the missing codec ID entries to nvhdmi.c and tegrahdmi.c. There do not appear to be any additional missing entries in any of the other codec ID tables when compared to the patch_hdmi.c at the final revision before the split. Fixes: ad781b550f9a ("ALSA: hda/hdmi: Rewrite to new probe method") Signed-off-by: Daniel Dadap Link: https://patch.msgid.link/aK0ghvagXy740rxd@ddadap-lakeline.nvidia.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/hdmi/nvhdmi.c | 17 +++++++++++++++++ sound/hda/codecs/hdmi/tegrahdmi.c | 2 ++ 2 files changed, 19 insertions(+) diff --git a/sound/hda/codecs/hdmi/nvhdmi.c b/sound/hda/codecs/hdmi/nvhdmi.c index b513253b1101..94671ad24b5e 100644 --- a/sound/hda/codecs/hdmi/nvhdmi.c +++ b/sound/hda/codecs/hdmi/nvhdmi.c @@ -198,15 +198,32 @@ static const struct hda_device_id snd_hda_id_nvhdmi[] = { HDA_CODEC_ID_MODEL(0x10de0098, "GPU 98 HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de0099, "GPU 99 HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de009a, "GPU 9a HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de009b, "GPU 9b HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de009c, "GPU 9c HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de009d, "GPU 9d HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de009e, "GPU 9e HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de009f, "GPU 9f HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de00a0, "GPU a0 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00a1, "GPU a1 HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de00a3, "GPU a3 HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de00a4, "GPU a4 HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de00a5, "GPU a5 HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de00a6, "GPU a6 HDMI/DP", MODEL_GENERIC), HDA_CODEC_ID_MODEL(0x10de00a7, "GPU a7 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00a8, "GPU a8 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00a9, "GPU a9 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00aa, "GPU aa HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00ab, "GPU ab HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00ad, "GPU ad HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00ae, "GPU ae HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00af, "GPU af HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00b0, "GPU b0 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00b1, "GPU b1 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00c0, "GPU c0 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00c1, "GPU c1 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00c3, "GPU c3 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00c4, "GPU c4 HDMI/DP", MODEL_GENERIC), + HDA_CODEC_ID_MODEL(0x10de00c5, "GPU c5 HDMI/DP", MODEL_GENERIC), {} /* terminator */ }; MODULE_DEVICE_TABLE(hdaudio, snd_hda_id_nvhdmi); diff --git a/sound/hda/codecs/hdmi/tegrahdmi.c b/sound/hda/codecs/hdmi/tegrahdmi.c index f1f745187f68..5f6fe31aa202 100644 --- a/sound/hda/codecs/hdmi/tegrahdmi.c +++ b/sound/hda/codecs/hdmi/tegrahdmi.c @@ -299,7 +299,9 @@ static const struct hda_device_id snd_hda_id_tegrahdmi[] = { HDA_CODEC_ID_MODEL(0x10de002f, "Tegra194 HDMI/DP2", MODEL_TEGRA), HDA_CODEC_ID_MODEL(0x10de0030, "Tegra194 HDMI/DP3", MODEL_TEGRA), HDA_CODEC_ID_MODEL(0x10de0031, "Tegra234 HDMI/DP", MODEL_TEGRA234), + HDA_CODEC_ID_MODEL(0x10de0033, "SoC 33 HDMI/DP", MODEL_TEGRA234), HDA_CODEC_ID_MODEL(0x10de0034, "Tegra264 HDMI/DP", MODEL_TEGRA234), + HDA_CODEC_ID_MODEL(0x10de0035, "SoC 35 HDMI/DP", MODEL_TEGRA234), {} /* terminator */ }; MODULE_DEVICE_TABLE(hdaudio, snd_hda_id_tegrahdmi); From 16fdb3cc6af8460f23a706512c6f5e7dfdd4f338 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 15 Jul 2025 10:45:39 +0200 Subject: [PATCH 0806/1307] Revert "drm/tegra: Use dma_buf from GEM object instance" This reverts commit 482c7e296edc0f594e8869a789a40be53c49bd6a. The dma_buf field in struct drm_gem_object is not stable over the object instance's lifetime. The field becomes NULL when user space releases the final GEM handle on the buffer object. This resulted in a NULL-pointer deref. Workarounds in commit 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers") and commit f6bfc9afc751 ("drm/framebuffer: Acquire internal references on GEM handles") only solved the problem partially. They especially don't work for buffer objects without a DRM framebuffer associated. Hence, this revert to going back to using .import_attach->dmabuf. Signed-off-by: Thomas Zimmermann Reviewed-by: Simona Vetter Link: https://lore.kernel.org/r/20250715084549.41473-1-tzimmermann@suse.de --- drivers/gpu/drm/tegra/gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 41a285ec889f..8ede07fb7a21 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -526,7 +526,7 @@ void tegra_bo_free_object(struct drm_gem_object *gem) if (drm_gem_is_imported(gem)) { dma_buf_unmap_attachment_unlocked(gem->import_attach, bo->sgt, DMA_TO_DEVICE); - dma_buf_detach(gem->dma_buf, gem->import_attach); + dma_buf_detach(gem->import_attach->dmabuf, gem->import_attach); } } From 4717432dfd99bbd015b6782adca216c6f9340038 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 9 Aug 2025 21:04:19 +0800 Subject: [PATCH 0807/1307] sched/deadline: Fix dl_server_stopped() Commit cccb45d7c429 ("sched/deadline: Less agressive dl_server handling") introduces dl_server_stopped(). But it is obvious that dl_server_stopped() should return true if dl_se->dl_server_active is 0. Fixes: cccb45d7c429 ("sched/deadline: Less agressive dl_server handling") Signed-off-by: Huacai Chen Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250809130419.1980742-1-chenhuacai@loongson.cn --- kernel/sched/deadline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e2d51f4306b3..bb813afe5b08 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1611,7 +1611,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se) static bool dl_server_stopped(struct sched_dl_entity *dl_se) { if (!dl_se->dl_server_active) - return false; + return true; if (dl_se->dl_server_idle) { dl_server_stop(dl_se); From bb4700adc3abec34c0a38b64f66258e4e233fc16 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 21 Jul 2025 15:01:42 +0200 Subject: [PATCH 0808/1307] sched/deadline: Always stop dl-server before changing parameters Commit cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") reduced dl-server overhead by delaying disabling servers only after there are no fair task around for a whole period, which means that deadline entities are not dequeued right away on a server stop event. However, the delay opens up a window in which a request for changing server parameters can break per-runqueue running_bw tracking, as reported by Yuri. Close the problematic window by unconditionally calling dl_server_stop() before applying the new parameters (ensuring deadline entities go through an actual dequeue). Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") Reported-by: Yuri Andriaccio Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/20250721-upstream-fix-dlserver-lessaggressive-b4-v1-1-4ebc10c87e40@redhat.com --- kernel/sched/debug.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 3f06ab84d53f..02e16b70a790 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -376,10 +376,8 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu return -EINVAL; } - if (rq->cfs.h_nr_queued) { - update_rq_clock(rq); - dl_server_stop(&rq->fair_server); - } + update_rq_clock(rq); + dl_server_stop(&rq->fair_server); retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0); if (retval) From 421fc59cf58c64f898cafbbbbda0bc705837e7df Mon Sep 17 00:00:00 2001 From: kuyo chang Date: Sun, 15 Jun 2025 21:10:56 +0800 Subject: [PATCH 0809/1307] sched/deadline: Fix RT task potential starvation when expiry time passed [Symptom] The fair server mechanism, which is intended to prevent fair starvation when higher-priority tasks monopolize the CPU. Specifically, RT tasks on the runqueue may not be scheduled as expected. [Analysis] The log "sched: DL replenish lagged too much" triggered. By memory dump of dl_server: curr = 0xFFFFFF80D6A0AC00 ( dl_server = 0xFFFFFF83CD5B1470( dl_runtime = 0x02FAF080, dl_deadline = 0x3B9ACA00, dl_period = 0x3B9ACA00, dl_bw = 0xCCCC, dl_density = 0xCCCC, runtime = 0x02FAF080, deadline = 0x0000082031EB0E80, flags = 0x0, dl_throttled = 0x0, dl_yielded = 0x0, dl_non_contending = 0x0, dl_overrun = 0x0, dl_server = 0x1, dl_server_active = 0x1, dl_defer = 0x1, dl_defer_armed = 0x0, dl_defer_running = 0x1, dl_timer = ( node = ( expires = 0x000008199756E700), _softexpires = 0x000008199756E700, function = 0xFFFFFFDB9AF44D30 = dl_task_timer, base = 0xFFFFFF83CD5A12C0, state = 0x0, is_rel = 0x0, is_soft = 0x0, clock_update_flags = 0x4, clock = 0x000008204A496900, - The timer expiration time (rq->curr->dl_server->dl_timer->expires) is already in the past, indicating the timer has expired. - The timer state (rq->curr->dl_server->dl_timer->state) is 0. [Suspected Root Cause] The relevant code flow in the throttle path of update_curr_dl_se() as follows: dequeue_dl_entity(dl_se, 0); // the DL entity is dequeued if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se))) { if (dl_server(dl_se)) // timer registration fails enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);//enqueue immediately ... } The failure of `start_dl_timer` is caused by attempting to register a timer with an expiration time that is already in the past. When this situation persists, the code repeatedly re-enqueues the DL entity without properly replenishing or restarting the timer, resulting in RT task may not be scheduled as expected. [Proposed Solution]: Instead of immediately re-enqueuing the DL entity on timer registration failure, this change ensures the DL entity is properly replenished and the timer is restarted, preventing RT potential starvation. Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers") Signed-off-by: kuyo chang Signed-off-by: Peter Zijlstra (Intel) Closes: https://lore.kernel.org/CAMuHMdXn4z1pioTtBGMfQM0jsLviqS2jwysaWXpoLxWYoGa82w@mail.gmail.com Tested-by: Geert Uytterhoeven Tested-by: Jiri Slaby Tested-by: Diederik de Haas Link: https://lkml.kernel.org/r/20250615131129.954975-1-kuyo.chang@mediatek.com --- kernel/sched/deadline.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index bb813afe5b08..88c3bd64a8a0 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1496,10 +1496,12 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 } if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se))) { - if (dl_server(dl_se)) - enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH); - else + if (dl_server(dl_se)) { + replenish_dl_new_period(dl_se, rq); + start_dl_timer(dl_se); + } else { enqueue_task_dl(rq, dl_task_of(dl_se), ENQUEUE_REPLENISH); + } } if (!is_leftmost(dl_se, &rq->dl)) From 52d15521eb75f9b521744db675bee61025d2fa52 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 27 Jun 2025 11:54:20 +0800 Subject: [PATCH 0810/1307] sched/deadline: Don't count nr_running for dl_server proxy tasks On CPU offline the kernel stalled with below call trace: INFO: task kworker/0:1:11 blocked for more than 120 seconds. cpuhp hold the cpu hotplug lock endless and stalled vmstat_shepherd. This is because we count nr_running twice on cpuhp enqueuing and failed the wait condition of cpuhp: enqueue_task_fair() // pick cpuhp from idle, rq->nr_running = 0 dl_server_start() [...] add_nr_running() // rq->nr_running = 1 add_nr_running() // rq->nr_running = 2 [switch to cpuhp, waiting on balance_hotplug_wait()] rcuwait_wait_event(rq->nr_running == 1 && ...) // failed, rq->nr_running=2 schedule() // wait again It doesn't make sense to count the dl_server towards runnable tasks, since it runs other tasks. Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers") Signed-off-by: Yicong Yang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250627035420.37712-1-yangyicong@huawei.com --- kernel/sched/deadline.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 88c3bd64a8a0..f25301267e47 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1851,7 +1851,9 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) u64 deadline = dl_se->deadline; dl_rq->dl_nr_running++; - add_nr_running(rq_of_dl_rq(dl_rq), 1); + + if (!dl_server(dl_se)) + add_nr_running(rq_of_dl_rq(dl_rq), 1); inc_dl_deadline(dl_rq, deadline); } @@ -1861,7 +1863,9 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { WARN_ON(!dl_rq->dl_nr_running); dl_rq->dl_nr_running--; - sub_nr_running(rq_of_dl_rq(dl_rq), 1); + + if (!dl_server(dl_se)) + sub_nr_running(rq_of_dl_rq(dl_rq), 1); dec_dl_deadline(dl_rq, dl_se->deadline); } From ae668cd567a6a7622bc813ee0bb61c42bed61ba7 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 22 Aug 2025 12:55:56 -0500 Subject: [PATCH 0811/1307] xfs: do not propagate ENODATA disk errors into xattr code ENODATA (aka ENOATTR) has a very specific meaning in the xfs xattr code; namely, that the requested attribute name could not be found. However, a medium error from disk may also return ENODATA. At best, this medium error may escape to userspace as "attribute not found" when in fact it's an IO (disk) error. At worst, we may oops in xfs_attr_leaf_get() when we do: error = xfs_attr_leaf_hasname(args, &bp); if (error == -ENOATTR) { xfs_trans_brelse(args->trans, bp); return error; } because an ENODATA/ENOATTR error from disk leaves us with a null bp, and the xfs_trans_brelse will then null-deref it. As discussed on the list, we really need to modify the lower level IO functions to trap all disk errors and ensure that we don't let unique errors like this leak up into higher xfs functions - many like this should be remapped to EIO. However, this patch directly addresses a reported bug in the xattr code, and should be safe to backport to stable kernels. A larger-scope patch to handle more unique errors at lower levels can follow later. (Note, prior to 07120f1abdff we did not oops, but we did return the wrong error code to userspace.) Signed-off-by: Eric Sandeen Fixes: 07120f1abdff ("xfs: Add xfs_has_attr and subroutines") Cc: stable@vger.kernel.org # v5.9+ Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_attr_remote.c | 7 +++++++ fs/xfs/libxfs/xfs_da_btree.c | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 4c44ce1c8a64..bff3dc226f81 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -435,6 +435,13 @@ xfs_attr_rmtval_get( 0, &bp, &xfs_attr3_rmt_buf_ops); if (xfs_metadata_is_sick(error)) xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK); + /* + * ENODATA from disk implies a disk medium failure; + * ENODATA for xattrs means attribute not found, so + * disambiguate that here. + */ + if (error == -ENODATA) + error = -EIO; if (error) return error; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 17d9e6154f19..723a0643b838 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2833,6 +2833,12 @@ xfs_da_read_buf( &bp, ops); if (xfs_metadata_is_sick(error)) xfs_dirattr_mark_sick(dp, whichfork); + /* + * ENODATA from disk implies a disk medium failure; ENODATA for + * xattrs means attribute not found, so disambiguate that here. + */ + if (error == -ENODATA && whichfork == XFS_ATTR_FORK) + error = -EIO; if (error) goto out_free; From cba70aff623b104085ab5613fedd21f6ea19095a Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Wed, 6 Aug 2025 14:09:26 +0200 Subject: [PATCH 0812/1307] USB: serial: option: add Telit Cinterion FN990A w/audio compositions Add the following Telit Cinterion FN990A w/audio compositions: 0x1077: tty (diag) + adb + rmnet + audio + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1077 Rev=05.04 S: Manufacturer=Telit Wireless Solutions S: Product=FN990 S: SerialNumber=67e04c35 C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 0 Cls=01(audio) Sub=01 Prot=20 Driver=snd-usb-audio I: If#= 4 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=03(O) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 5 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=84(I) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 9 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8c(I) Atr=03(Int.) MxPS= 10 Ivl=32ms 0x1078: tty (diag) + adb + MBIM + audio + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1078 Rev=05.04 S: Manufacturer=Telit Wireless Solutions S: Product=FN990 S: SerialNumber=67e04c35 C: #Ifs=11 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#=10 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8c(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 3 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 0 Cls=01(audio) Sub=01 Prot=20 Driver=snd-usb-audio I: If#= 5 Alt= 0 #EPs= 0 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio I: If#= 6 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=84(I) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 9 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms 0x1079: RNDIS + tty (diag) + adb + audio + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=01 Dev#= 23 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1079 Rev=05.04 S: Manufacturer=Telit Wireless Solutions S: Product=FN990 S: SerialNumber=67e04c35 C: #Ifs=11 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=ef(misc ) Sub=04 Prot=01 Driver=rndis_host E: Ad=81(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#=10 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8c(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 0 Cls=01(audio) Sub=01 Prot=20 Driver=snd-usb-audio I: If#= 5 Alt= 0 #EPs= 0 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio I: If#= 6 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=84(I) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 9 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e5cd33093423..1f4da8120111 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1369,6 +1369,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990A (PCIe) */ .driver_info = RSVD(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1077, 0xff), /* Telit FN990A (rmnet + audio) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1078, 0xff), /* Telit FN990A (MBIM + audio) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1079, 0xff), /* Telit FN990A (RNDIS + audio) */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990A (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990A (MBIM) */ From 57834ce5a6a47df282c8419019ba5495eac58fb9 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 21 Aug 2025 19:00:03 +0200 Subject: [PATCH 0813/1307] s390/mm: Prevent possible preempt_count overflow The s390 implementation of ptep_modify_prot_start() currently does preempt_disable(), and the preempt_enable() is done later in ptep_modify_prot_commit(). This logic is not really required, because the PTE lock must be held over the complete prot_start/commit transaction, as described in the comment of the generic implementation of ptep_modify_prot_start(). That comment also mentions that this interface should be batchable, and modify_prot_start_ptes() might start a transaction over a batch of PTEs, implemented as a simple loop over ptep_modify_prot_start(). In this case, the preempt_disable() in ptep_modify_prot_start() would be called multiple times, before the corresponding preempt_enable() calls happen, and this can lead to a preempt_count overflow. To fix this, simply remove the preempt_disable/enable() calls in ptep_modify_prot_start/commit(), and rely on the PTE lock being held. Commit cac1db8c3aad ("mm: optimize mprotect() by PTE batching") made use of this PTE batching for the first time, and triggers warnings like this: DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK - 10) BUG: sleeping function called from invalid context at mm/mprotect.c:576 Hence, add a Fixes tag on that commit. Not because it is broken, but to make sure that it won't get backported w/o also this fix for s390. Fixes: cac1db8c3aad ("mm: optimize mprotect() by PTE batching") Reviewed-by: Alexander Gordeev Signed-off-by: Gerald Schaefer Signed-off-by: Alexander Gordeev --- arch/s390/mm/pgtable.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 60688be4e876..50eb57c976bc 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -335,7 +335,6 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, int nodat; struct mm_struct *mm = vma->vm_mm; - preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); old = ptep_flush_lazy(mm, addr, ptep, nodat); @@ -360,7 +359,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, } else { set_pte(ptep, pte); } - preempt_enable(); } static inline void pmdp_idte_local(struct mm_struct *mm, From a5a261bea9bf8444300d1067b4a73bedee5b5227 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Fri, 22 Aug 2025 11:08:39 +0200 Subject: [PATCH 0814/1307] USB: serial: option: add Telit Cinterion LE910C4-WWX new compositions Add the following Telit Cinterion LE910C4-WWX new compositions: 0x1034: tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1034 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1036: tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 10 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1036 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1037: tty (diag) + tty (Telit custom) + tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 15 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1037 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1038: tty (Telit custom) + tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1038 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x103b: tty (diag) + tty (Telit custom) + tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 10 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=103b Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x103c: tty (Telit custom) + tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 11 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=103c Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1f4da8120111..fc869b7f803f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1322,7 +1322,18 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1033, 0xff), /* Telit LE910C1-EUX (ECM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1034, 0xff), /* Telit LE910C4-WWX (rmnet) */ + .driver_info = RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1035, 0xff) }, /* Telit LE910C4-WWX (ECM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1036, 0xff) }, /* Telit LE910C4-WWX */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1037, 0xff), /* Telit LE910C4-WWX (rmnet) */ + .driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1038, 0xff), /* Telit LE910C4-WWX (rmnet) */ + .driver_info = NCTRL(0) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x103b, 0xff), /* Telit LE910C4-WWX */ + .driver_info = NCTRL(0) | NCTRL(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x103c, 0xff), /* Telit LE910C4-WWX */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), .driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG1), From 810e154d90f44127239957b06ee51a55553a5815 Mon Sep 17 00:00:00 2001 From: Junjie Cao Date: Mon, 25 Aug 2025 17:08:50 +0800 Subject: [PATCH 0815/1307] gpio: timberdale: fix off-by-one in IRQ type boundary check timbgpio_irq_type() currently accepts offset == ngpio, violating gpiolib's [0..ngpio-1] contract. This can lead to undefined behavior when computing '1 << offset', and it is also inconsistent with users that iterate with for_each_set_bit(..., ngpio). Tighten the upper bound to reject offset == ngpio. No functional change for in-range offsets. Signed-off-by: Junjie Cao Link: https://lore.kernel.org/r/20250825090850.127163-1-junjie.cao@intel.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-timberdale.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c index 679e27f00ff6..f488939dd00a 100644 --- a/drivers/gpio/gpio-timberdale.c +++ b/drivers/gpio/gpio-timberdale.c @@ -137,7 +137,7 @@ static int timbgpio_irq_type(struct irq_data *d, unsigned trigger) u32 ver; int ret = 0; - if (offset < 0 || offset > tgpio->gpio.ngpio) + if (offset < 0 || offset >= tgpio->gpio.ngpio) return -EINVAL; ver = ioread32(tgpio->membase + TGPIO_VER); From 6fe31c8b53003134e5573cfb89aea85f96a43afd Mon Sep 17 00:00:00 2001 From: Adrian Ng Ho Yin Date: Mon, 25 Aug 2025 15:16:37 +0800 Subject: [PATCH 0816/1307] MAINTAINERS: Change Altera-PIO driver maintainer Update Altera-PIO Driver maintainer from to as Mun Yew is no longer with Altera. Signed-off-by: Adrian Ng Ho Yin Acked-by: Mun Yew Tham Link: https://lore.kernel.org/r/20250825071637.40441-1-adrianhoyin.ng@altera.com Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fed6cd812d79..5b3f80f42f3c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -931,7 +931,7 @@ F: Documentation/devicetree/bindings/dma/altr,msgdma.yaml F: drivers/dma/altera-msgdma.c ALTERA PIO DRIVER -M: Mun Yew Tham +M: Adrian Ng L: linux-gpio@vger.kernel.org S: Maintained F: drivers/gpio/gpio-altera.c From 832e5777143e799a97e8f9b96f002a90f06ba548 Mon Sep 17 00:00:00 2001 From: Martin Hilgendorf Date: Sat, 2 Aug 2025 13:45:55 +0000 Subject: [PATCH 0817/1307] HID: elecom: add support for ELECOM M-DT2DRBK The DT2DRBK trackball has 8 buttons, but the report descriptor only specifies 5. This patch adds the device ID and performs a similar fixup as for other ELECOM devices to enable the remaining 3 buttons. Signed-off-by: Martin Hilgendorf Signed-off-by: Jiri Kosina --- drivers/hid/hid-elecom.c | 2 ++ drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index 0ad7d25d9864..69771fd35006 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -101,6 +101,7 @@ static const __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, */ mouse_button_fixup(hdev, rdesc, *rsize, 12, 30, 14, 20, 8); break; + case USB_DEVICE_ID_ELECOM_M_DT2DRBK: case USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C: /* * Report descriptor format: @@ -123,6 +124,7 @@ static const struct hid_device_id elecom_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT2DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 37dc42380373..5d18b6ac81e7 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -451,6 +451,7 @@ #define USB_DEVICE_ID_ELECOM_M_XT4DRBK 0x00fd #define USB_DEVICE_ID_ELECOM_M_DT1URBK 0x00fe #define USB_DEVICE_ID_ELECOM_M_DT1DRBK 0x00ff +#define USB_DEVICE_ID_ELECOM_M_DT2DRBK 0x018d #define USB_DEVICE_ID_ELECOM_M_HT1URBK_010C 0x010c #define USB_DEVICE_ID_ELECOM_M_HT1URBK_019B 0x019b #define USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D 0x010d diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index ff11f1ad344d..5c15af9afa98 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -411,6 +411,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT2DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, From e9c8da670e749f7dedc53e3af54a87b041918092 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jul 2025 12:08:30 +0200 Subject: [PATCH 0818/1307] fuse: do not allow mapping a non-regular backing file We do not support passthrough operations other than read/write on regular file, so allowing non-regular backing files makes no sense. Fixes: efad7153bf93 ("fuse: allow O_PATH fd for FUSE_DEV_IOC_BACKING_OPEN") Cc: stable@vger.kernel.org Signed-off-by: Amir Goldstein Reviewed-by: Bernd Schubert Signed-off-by: Miklos Szeredi --- fs/fuse/passthrough.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 607ef735ad4a..eb97ac009e75 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -237,6 +237,11 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) if (!file) goto out; + /* read/write/splice/mmap passthrough only relevant for regular files */ + res = d_is_dir(file->f_path.dentry) ? -EISDIR : -EINVAL; + if (!d_is_reg(file->f_path.dentry)) + goto out_fput; + backing_sb = file_inode(file)->i_sb; res = -ELOOP; if (backing_sb->s_stack_depth >= fc->max_stack_depth) From e5203209b3935041dac541bc5b37efb44220cc0b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 12 Aug 2025 14:07:54 +0200 Subject: [PATCH 0819/1307] fuse: check if copy_file_range() returns larger than requested size Just like write(), copy_file_range() should check if the return value is less or equal to the requested number of bytes. Reported-by: Chunsheng Luo Closes: https://lore.kernel.org/all/20250807062425.694-1-luochunsheng@ustc.edu/ Fixes: 88bc7d5097a1 ("fuse: add support for copy_file_range()") Cc: # v4.20 Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5525a4520b0f..45207a6bb85f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3026,6 +3026,9 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, fc->no_copy_file_range = 1; err = -EOPNOTSUPP; } + if (!err && outarg.size > len) + err = -EIO; + if (err) goto out; From 1e08938c3694f707bb165535df352ac97a8c75c9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 12 Aug 2025 14:46:34 +0200 Subject: [PATCH 0820/1307] fuse: prevent overflow in copy_file_range return value The FUSE protocol uses struct fuse_write_out to convey the return value of copy_file_range, which is restricted to uint32_t. But the COPY_FILE_RANGE interface supports a 64-bit size copies. Currently the number of bytes copied is silently truncated to 32-bit, which may result in poor performance or even failure to copy in case of truncation to zero. Reported-by: Florian Weimer Closes: https://lore.kernel.org/all/lhuh5ynl8z5.fsf@oldenburg.str.redhat.com/ Fixes: 88bc7d5097a1 ("fuse: add support for copy_file_range()") Cc: # v4.20 Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 45207a6bb85f..4adcf09d4b01 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2960,7 +2960,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, .nodeid_out = ff_out->nodeid, .fh_out = ff_out->fh, .off_out = pos_out, - .len = len, + .len = min_t(size_t, len, UINT_MAX & PAGE_MASK), .flags = flags }; struct fuse_write_out outarg; From 79569946502258ef53984f3000bffa77e469d8dc Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 15 Aug 2025 11:25:38 -0700 Subject: [PATCH 0821/1307] fuse: reflect cached blocksize if blocksize was changed As pointed out by Miklos[1], in the fuse_update_get_attr() path, the attributes returned to stat may be cached values instead of fresh ones fetched from the server. In the case where the server returned a modified blocksize value, we need to cache it and reflect it back to stat if values are not re-fetched since we now no longer directly change inode->i_blkbits. Link: https://lore.kernel.org/linux-fsdevel/CAJfpeguCOxeVX88_zPd1hqziB_C+tmfuDhZP5qO2nKmnb-dTUA@mail.gmail.com/ [1] Fixes: 542ede096e48 ("fuse: keep inode->i_blkbits constant") Signed-off-by: Joanne Koong Reviewed-by: Darrick J. Wong Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 1 + fs/fuse/fuse_i.h | 6 ++++++ fs/fuse/inode.c | 5 +++++ 3 files changed, 12 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2d817d7cab26..ebee7e0b1cd3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1377,6 +1377,7 @@ static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode, generic_fillattr(idmap, request_mask, inode, stat); stat->mode = fi->orig_i_mode; stat->ino = fi->orig_ino; + stat->blksize = 1 << fi->cached_i_blkbits; if (test_bit(FUSE_I_BTIME, &fi->state)) { stat->btime = fi->i_btime; stat->result_mask |= STATX_BTIME; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ec248d13c8bf..1647eb7ca6fa 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -210,6 +210,12 @@ struct fuse_inode { /** Reference to backing file in passthrough mode */ struct fuse_backing *fb; #endif + + /* + * The underlying inode->i_blkbits value will not be modified, + * so preserve the blocksize specified by the server. + */ + u8 cached_i_blkbits; }; /** FUSE inode state bits */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 67c2318bfc42..3bfd83469d9f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -289,6 +289,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, } } + if (attr->blksize) + fi->cached_i_blkbits = ilog2(attr->blksize); + else + fi->cached_i_blkbits = inode->i_sb->s_blocksize_bits; + /* * Don't set the sticky bit in i_mode, unless we want the VFS * to check permissions. This prevents failures due to the From bd24d2108e9c8459d2c9f3d6d910b0053887df57 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 15 Aug 2025 11:25:39 -0700 Subject: [PATCH 0822/1307] fuse: fix fuseblk i_blkbits for iomap partial writes On regular fuse filesystems, i_blkbits is set to PAGE_SHIFT which means any iomap partial writes will mark the entire folio as uptodate. However fuseblk filesystems work differently and allow the blocksize to be less than the page size. As such, this may lead to data corruption if fuseblk sets its blocksize to less than the page size, uses the writeback cache, and does a partial write, then a read and the read happens before the write has undergone writeback, since the folio will not be marked uptodate from the partial write so the read will read in the entire folio from disk, which will overwrite the partial write. The long-term solution for this, which will also be needed for fuse to enable large folios with the writeback cache on, is to have fuse also use iomap for folio reads, but until that is done, the cleanest workaround is to use the page size for fuseblk's internal kernel inode blksize/blkbits values while maintaining current behavior for stat(). This was verified using ntfs-3g: $ sudo mkfs.ntfs -f -c 512 /dev/vdd1 $ sudo ntfs-3g /dev/vdd1 ~/fuseblk $ stat ~/fuseblk/hi.txt IO Block: 512 Fixes: a4c9ab1d4975 ("fuse: use iomap for buffered writes") Signed-off-by: Joanne Koong Reviewed-by: Darrick J. Wong Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 2 +- fs/fuse/fuse_i.h | 8 ++++++++ fs/fuse/inode.c | 13 ++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ebee7e0b1cd3..5c569c3cb53f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1199,7 +1199,7 @@ static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode, if (attr->blksize != 0) blkbits = ilog2(attr->blksize); else - blkbits = inode->i_sb->s_blocksize_bits; + blkbits = fc->blkbits; stat->blksize = 1 << blkbits; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1647eb7ca6fa..cc428d04be3e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -975,6 +975,14 @@ struct fuse_conn { /* Request timeout (in jiffies). 0 = no timeout */ unsigned int req_timeout; } timeout; + + /* + * This is a workaround until fuse uses iomap for reads. + * For fuseblk servers, this represents the blocksize passed in at + * mount time and for regular fuse servers, this is equivalent to + * inode->i_blkbits. + */ + u8 blkbits; }; /* diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3bfd83469d9f..7ddfd2b3cc9c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -292,7 +292,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, if (attr->blksize) fi->cached_i_blkbits = ilog2(attr->blksize); else - fi->cached_i_blkbits = inode->i_sb->s_blocksize_bits; + fi->cached_i_blkbits = fc->blkbits; /* * Don't set the sticky bit in i_mode, unless we want the VFS @@ -1810,10 +1810,21 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) err = -EINVAL; if (!sb_set_blocksize(sb, ctx->blksize)) goto err; + /* + * This is a workaround until fuse hooks into iomap for reads. + * Use PAGE_SIZE for the blocksize else if the writeback cache + * is enabled, buffered writes go through iomap and a read may + * overwrite partially written data if blocksize < PAGE_SIZE + */ + fc->blkbits = sb->s_blocksize_bits; + if (ctx->blksize != PAGE_SIZE && + !sb_set_blocksize(sb, PAGE_SIZE)) + goto err; #endif } else { sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; + fc->blkbits = sb->s_blocksize_bits; } sb->s_subtype = ctx->subtype; From 1f3214aae9f49faf495f3836216afbc6c5400b2e Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Sun, 3 Aug 2025 18:02:53 +0200 Subject: [PATCH 0823/1307] HID: quirks: add support for Legion Go dual dinput modes The Legion Go features detachable controllers which support a dual dinput mode. In this mode, the controllers appear under a single HID device with two applications. Currently, both controllers appear under the same event device, causing their controls to be mixed up. This patch separates the two so that they can be used independently. In addition, the latest firmware update for the Legion Go swaps the IDs to the ones used by the Legion Go 2, so add those IDs as well. [jkosina@suse.com: improved shortlog] Signed-off-by: Antheas Kapenekakis Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 2 ++ drivers/hid/hid-quirks.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5d18b6ac81e7..149798754570 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -835,6 +835,8 @@ #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019 0x6019 #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_602E 0x602e #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6093 0x6093 +#define USB_DEVICE_ID_LENOVO_LEGION_GO_DUAL_DINPUT 0x6184 +#define USB_DEVICE_ID_LENOVO_LEGION_GO2_DUAL_DINPUT 0x61ed #define USB_VENDOR_ID_LETSKETCH 0x6161 #define USB_DEVICE_ID_WP9620N 0x4d15 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 5c15af9afa98..f619ed10535d 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -124,6 +124,8 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_T609A), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_ODDOR_HANDBRAKE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_LEGION_GO_DUAL_DINPUT), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_LEGION_GO2_DUAL_DINPUT), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019), HID_QUIRK_ALWAYS_POLL }, From 2d52c9e43a48387a323ab6a4fed755d55040e625 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 29 Jul 2025 17:29:21 +0200 Subject: [PATCH 0824/1307] wifi: rt2800: select CONFIG_RT2X00_LIB as needed The rt2800 specific code requires the more general library code: ERROR: modpost: "rt2x00queue_get_entry" [drivers/net/wireless/ralink/rt2x00/rt2x00mmio.ko] undefined! ERROR: modpost: "rt2x00lib_dmastart" [drivers/net/wireless/ralink/rt2x00/rt2x00mmio.ko] undefined! ERROR: modpost: "rt2x00lib_dmadone" [drivers/net/wireless/ralink/rt2x00/rt2x00mmio.ko] undefined! ERROR: modpost: "rt2x00lib_rxdone" [drivers/net/wireless/ralink/rt2x00/rt2x00mmio.ko] undefined! ERROR: modpost: "rt2x00lib_txdone_nomatch" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00lib_txdone" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00queue_get_entry" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00lib_get_bssidx" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00mac_conf_tx" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! ERROR: modpost: "rt2x00lib_txdone_noinfo" [drivers/net/wireless/ralink/rt2x00/rt2800lib.ko] undefined! Select the symbol to avoid this build failure. Fixes: 7f6109086c9e ("wifi: rt2800: move 2x00soc to 2800soc") Signed-off-by: Arnd Bergmann Acked-by: Stanislaw Gruszka Reviewed-by: Sergio Paracuellos Link: https://patch.msgid.link/20250729152924.2462423-1-arnd@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ralink/rt2x00/Kconfig b/drivers/net/wireless/ralink/rt2x00/Kconfig index 4d98b7723c56..d66fc839c3ce 100644 --- a/drivers/net/wireless/ralink/rt2x00/Kconfig +++ b/drivers/net/wireless/ralink/rt2x00/Kconfig @@ -225,6 +225,7 @@ config RT2800_LIB_MMIO config RT2X00_LIB_MMIO tristate + select RT2X00_LIB config RT2X00_LIB_PCI tristate From f64768bec0d57988782d26d1ea7ae21f959309dd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 31 Jul 2025 09:58:33 +0200 Subject: [PATCH 0825/1307] wifi: rt2x00: fix CRC_CCITT dependency Compile-testing this driver on Arm platforms shows a link failure when the CRC functions are not part of the kernel: x86_64-linux-ld: drivers/net/wireless/ralink/rt2x00/rt2800lib.o: in function `rt2800_check_firmware': rt2800lib.c:(.text+0x20e5): undefined reference to `crc_ccitt' Move the select statement to the correct Kconfig symbol to match the call site. Fixes: 311b05e235cf ("wifi: rt2x00: add COMPILE_TEST") Signed-off-by: Arnd Bergmann Acked-by: Stanislaw Gruszka Reviewed-by: Sergio Paracuellos Link: https://patch.msgid.link/20250731075837.1969136-1-arnd@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/Kconfig b/drivers/net/wireless/ralink/rt2x00/Kconfig index d66fc839c3ce..17f063fc0b57 100644 --- a/drivers/net/wireless/ralink/rt2x00/Kconfig +++ b/drivers/net/wireless/ralink/rt2x00/Kconfig @@ -66,7 +66,6 @@ config RT2800PCI select RT2X00_LIB_PCI select RT2X00_LIB_FIRMWARE select RT2X00_LIB_CRYPTO - select CRC_CCITT select EEPROM_93CX6 help This adds support for rt27xx/rt28xx/rt30xx wireless chipset family. @@ -142,7 +141,6 @@ config RT2800USB select RT2X00_LIB_USB select RT2X00_LIB_FIRMWARE select RT2X00_LIB_CRYPTO - select CRC_CCITT help This adds support for rt27xx/rt28xx/rt30xx wireless chipset family. Supported chips: RT2770, RT2870 & RT3070, RT3071 & RT3072 @@ -217,6 +215,7 @@ config RT2800SOC config RT2800_LIB tristate + select CRC_CCITT config RT2800_LIB_MMIO tristate From 26e84445f02ce6b2fe5f3e0e28ff7add77f35e08 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 13 Aug 2025 16:52:36 +0300 Subject: [PATCH 0826/1307] wifi: cfg80211: fix use-after-free in cmp_bss() Following bss_free() quirk introduced in commit 776b3580178f ("cfg80211: track hidden SSID networks properly"), adjust cfg80211_update_known_bss() to free the last beacon frame elements only if they're not shared via the corresponding 'hidden_beacon_bss' pointer. Reported-by: syzbot+30754ca335e6fb7e3092@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=30754ca335e6fb7e3092 Fixes: 3ab8227d3e7d ("cfg80211: refactor cfg80211_bss_update") Signed-off-by: Dmitry Antipov Link: https://patch.msgid.link/20250813135236.799384-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- net/wireless/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index a8339ed52404..6c7b7c3828a4 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1916,7 +1916,8 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, */ f = rcu_access_pointer(new->pub.beacon_ies); - kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head); + if (!new->pub.hidden_beacon_bss) + kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head); return false; } From 9cb83d4be0b9b697eae93d321e0da999f9cdfcfc Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 22 Aug 2025 13:08:39 +0800 Subject: [PATCH 0827/1307] wifi: brcmfmac: fix use-after-free when rescheduling brcmf_btcoex_info work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The brcmf_btcoex_detach() only shuts down the btcoex timer, if the flag timer_on is false. However, the brcmf_btcoex_timerfunc(), which runs as timer handler, sets timer_on to false. This creates critical race conditions: 1.If brcmf_btcoex_detach() is called while brcmf_btcoex_timerfunc() is executing, it may observe timer_on as false and skip the call to timer_shutdown_sync(). 2.The brcmf_btcoex_timerfunc() may then reschedule the brcmf_btcoex_info worker after the cancel_work_sync() has been executed, resulting in use-after-free bugs. The use-after-free bugs occur in two distinct scenarios, depending on the timing of when the brcmf_btcoex_info struct is freed relative to the execution of its worker thread. Scenario 1: Freed before the worker is scheduled The brcmf_btcoex_info is deallocated before the worker is scheduled. A race condition can occur when schedule_work(&bt_local->work) is called after the target memory has been freed. The sequence of events is detailed below: CPU0 | CPU1 brcmf_btcoex_detach | brcmf_btcoex_timerfunc | bt_local->timer_on = false; if (cfg->btcoex->timer_on) | ... | cancel_work_sync(); | ... | kfree(cfg->btcoex); // FREE | | schedule_work(&bt_local->work); // USE Scenario 2: Freed after the worker is scheduled The brcmf_btcoex_info is freed after the worker has been scheduled but before or during its execution. In this case, statements within the brcmf_btcoex_handler() — such as the container_of macro and subsequent dereferences of the brcmf_btcoex_info object will cause a use-after-free access. The following timeline illustrates this scenario: CPU0 | CPU1 brcmf_btcoex_detach | brcmf_btcoex_timerfunc | bt_local->timer_on = false; if (cfg->btcoex->timer_on) | ... | cancel_work_sync(); | ... | schedule_work(); // Reschedule | kfree(cfg->btcoex); // FREE | brcmf_btcoex_handler() // Worker /* | btci = container_of(....); // USE The kfree() above could | ... also occur at any point | btci-> // USE during the worker's execution| */ | To resolve the race conditions, drop the conditional check and call timer_shutdown_sync() directly. It can deactivate the timer reliably, regardless of its current state. Once stopped, the timer_on state is then set to false. Fixes: 61730d4dfffc ("brcmfmac: support critical protocol API for DHCP") Acked-by: Arend van Spriel Signed-off-by: Duoming Zhou Link: https://patch.msgid.link/20250822050839.4413-1-duoming@zju.edu.cn Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c index 69ef8cf203d2..67c0c5a92f99 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c @@ -393,10 +393,8 @@ void brcmf_btcoex_detach(struct brcmf_cfg80211_info *cfg) if (!cfg->btcoex) return; - if (cfg->btcoex->timer_on) { - cfg->btcoex->timer_on = false; - timer_shutdown_sync(&cfg->btcoex->timer); - } + timer_shutdown_sync(&cfg->btcoex->timer); + cfg->btcoex->timer_on = false; cancel_work_sync(&cfg->btcoex->work); From a33b375ab5b3a9897a0ab76be8258d9f6b748628 Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Mon, 25 Aug 2025 10:29:11 +0800 Subject: [PATCH 0828/1307] wifi: mac80211: fix incorrect type for ret The variable ret is declared as a u32 type, but it is assigned a value of -EOPNOTSUPP. Since unsigned types cannot correctly represent negative values, the type of ret should be changed to int. Signed-off-by: Liao Yuanhong Link: https://patch.msgid.link/20250825022911.139377-1-liaoyuanhong@vivo.com Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 181bcb34b795..55105d238d6b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1416,7 +1416,7 @@ drv_get_ftm_responder_stats(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_ftm_responder_stats *ftm_stats) { - u32 ret = -EOPNOTSUPP; + int ret = -EOPNOTSUPP; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); From 7e2f3213e85eba00acb4cfe6d71647892d63c3a1 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 26 Aug 2025 18:54:37 +1000 Subject: [PATCH 0829/1307] wifi: mac80211: increase scan_ies_len for S1G Currently the S1G capability element is not taken into account for the scan_ies_len, which leads to a buffer length validation failure in ieee80211_prep_hw_scan() and subsequent WARN in __ieee80211_start_scan(). This prevents hw scanning from functioning. To fix ensure we accommodate for the S1G capability length. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250826085437.3493-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- net/mac80211/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9c8f18b258a6..3ae6104e5cb2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1111,7 +1111,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) int result, i; enum nl80211_band band; int channels, max_bitrates; - bool supp_ht, supp_vht, supp_he, supp_eht; + bool supp_ht, supp_vht, supp_he, supp_eht, supp_s1g; struct cfg80211_chan_def dflt_chandef = {}; if (ieee80211_hw_check(hw, QUEUE_CONTROL) && @@ -1227,6 +1227,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_vht = false; supp_he = false; supp_eht = false; + supp_s1g = false; for (band = 0; band < NUM_NL80211_BANDS; band++) { const struct ieee80211_sband_iftype_data *iftd; struct ieee80211_supported_band *sband; @@ -1274,6 +1275,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) max_bitrates = sband->n_bitrates; supp_ht = supp_ht || sband->ht_cap.ht_supported; supp_vht = supp_vht || sband->vht_cap.vht_supported; + supp_s1g = supp_s1g || sband->s1g_cap.s1g; for_each_sband_iftype_data(sband, i, iftd) { u8 he_40_mhz_cap; @@ -1406,6 +1408,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->scan_ies_len += 2 + sizeof(struct ieee80211_vht_cap); + if (supp_s1g) + local->scan_ies_len += 2 + sizeof(struct ieee80211_s1g_cap); + /* * HE cap element is variable in size - set len to allow max size */ if (supp_he) { From c5e81e672699e0c5557b2b755cc8f7a69aa92bff Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 25 Aug 2025 18:07:10 +0200 Subject: [PATCH 0830/1307] efi: stmm: Fix incorrect buffer allocation method The communication buffer allocated by setup_mm_hdr() is later on passed to tee_shm_register_kernel_buf(). The latter expects those buffers to be contiguous pages, but setup_mm_hdr() just uses kmalloc(). That can cause various corruptions or BUGs, specifically since commit 9aec2fb0fd5e ("slab: allocate frozen pages"), though it was broken before as well. Fix this by using alloc_pages_exact() instead of kmalloc(). Fixes: c44b6be62e8d ("efi: Add tee-based EFI variable driver") Signed-off-by: Jan Kiszka Acked-by: Ilias Apalodimas Acked-by: Sumit Garg Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/stmm/tee_stmm_efi.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/stmm/tee_stmm_efi.c b/drivers/firmware/efi/stmm/tee_stmm_efi.c index f741ca279052..e15d11ed165e 100644 --- a/drivers/firmware/efi/stmm/tee_stmm_efi.c +++ b/drivers/firmware/efi/stmm/tee_stmm_efi.c @@ -143,6 +143,10 @@ static efi_status_t mm_communicate(u8 *comm_buf, size_t payload_size) return var_hdr->ret_status; } +#define COMM_BUF_SIZE(__payload_size) (MM_COMMUNICATE_HEADER_SIZE + \ + MM_VARIABLE_COMMUNICATE_SIZE + \ + (__payload_size)) + /** * setup_mm_hdr() - Allocate a buffer for StandAloneMM and initialize the * header data. @@ -173,9 +177,8 @@ static void *setup_mm_hdr(u8 **dptr, size_t payload_size, size_t func, return NULL; } - comm_buf = kzalloc(MM_COMMUNICATE_HEADER_SIZE + - MM_VARIABLE_COMMUNICATE_SIZE + payload_size, - GFP_KERNEL); + comm_buf = alloc_pages_exact(COMM_BUF_SIZE(payload_size), + GFP_KERNEL | __GFP_ZERO); if (!comm_buf) { *ret = EFI_OUT_OF_RESOURCES; return NULL; @@ -239,7 +242,7 @@ static efi_status_t get_max_payload(size_t *size) */ *size -= 2; out: - kfree(comm_buf); + free_pages_exact(comm_buf, COMM_BUF_SIZE(payload_size)); return ret; } @@ -282,7 +285,7 @@ static efi_status_t get_property_int(u16 *name, size_t name_size, memcpy(var_property, &smm_property->property, sizeof(*var_property)); out: - kfree(comm_buf); + free_pages_exact(comm_buf, COMM_BUF_SIZE(payload_size)); return ret; } @@ -347,7 +350,7 @@ static efi_status_t tee_get_variable(u16 *name, efi_guid_t *vendor, memcpy(data, (u8 *)var_acc->name + var_acc->name_size, var_acc->data_size); out: - kfree(comm_buf); + free_pages_exact(comm_buf, COMM_BUF_SIZE(payload_size)); return ret; } @@ -404,7 +407,7 @@ static efi_status_t tee_get_next_variable(unsigned long *name_size, memcpy(name, var_getnext->name, var_getnext->name_size); out: - kfree(comm_buf); + free_pages_exact(comm_buf, COMM_BUF_SIZE(payload_size)); return ret; } @@ -467,7 +470,7 @@ static efi_status_t tee_set_variable(efi_char16_t *name, efi_guid_t *vendor, ret = mm_communicate(comm_buf, payload_size); dev_dbg(pvt_data.dev, "Set Variable %s %d %lx\n", __FILE__, __LINE__, ret); out: - kfree(comm_buf); + free_pages_exact(comm_buf, COMM_BUF_SIZE(payload_size)); return ret; } @@ -507,7 +510,7 @@ static efi_status_t tee_query_variable_info(u32 attributes, *max_variable_size = mm_query_info->max_variable_size; out: - kfree(comm_buf); + free_pages_exact(comm_buf, COMM_BUF_SIZE(payload_size)); return ret; } From 80c6c1048625712200ab9cdc665d792b85594e2c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 25 Aug 2025 18:07:11 +0200 Subject: [PATCH 0831/1307] efi: stmm: Do not return EFI_OUT_OF_RESOURCES on internal errors When we are low on memory or when the internal API is violated, we cannot return EFI_OUT_OF_RESOURCES. According to the UEFI standard, that error code is either related to persistent storage used for the variable or even not foreseen as possible error (GetVariable e.g.). Use the not fully accurate but compliant error code EFI_DEVICE_ERROR in those cases. Signed-off-by: Jan Kiszka Reviewed-by: Ilias Apalodimas Acked-by: Sumit Garg Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/stmm/tee_stmm_efi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/firmware/efi/stmm/tee_stmm_efi.c b/drivers/firmware/efi/stmm/tee_stmm_efi.c index e15d11ed165e..8501056ade8a 100644 --- a/drivers/firmware/efi/stmm/tee_stmm_efi.c +++ b/drivers/firmware/efi/stmm/tee_stmm_efi.c @@ -218,7 +218,7 @@ static efi_status_t get_max_payload(size_t *size) SMM_VARIABLE_FUNCTION_GET_PAYLOAD_SIZE, &ret); if (!var_payload) - return EFI_OUT_OF_RESOURCES; + return EFI_DEVICE_ERROR; ret = mm_communicate(comm_buf, payload_size); if (ret != EFI_SUCCESS) @@ -264,7 +264,7 @@ static efi_status_t get_property_int(u16 *name, size_t name_size, &comm_buf, payload_size, SMM_VARIABLE_FUNCTION_VAR_CHECK_VARIABLE_PROPERTY_GET, &ret); if (!smm_property) - return EFI_OUT_OF_RESOURCES; + return EFI_DEVICE_ERROR; memcpy(&smm_property->guid, vendor, sizeof(smm_property->guid)); smm_property->name_size = name_size; @@ -320,7 +320,7 @@ static efi_status_t tee_get_variable(u16 *name, efi_guid_t *vendor, var_acc = setup_mm_hdr(&comm_buf, payload_size, SMM_VARIABLE_FUNCTION_GET_VARIABLE, &ret); if (!var_acc) - return EFI_OUT_OF_RESOURCES; + return EFI_DEVICE_ERROR; /* Fill in contents */ memcpy(&var_acc->guid, vendor, sizeof(var_acc->guid)); @@ -386,7 +386,7 @@ static efi_status_t tee_get_next_variable(unsigned long *name_size, SMM_VARIABLE_FUNCTION_GET_NEXT_VARIABLE_NAME, &ret); if (!var_getnext) - return EFI_OUT_OF_RESOURCES; + return EFI_DEVICE_ERROR; /* Fill in contents */ memcpy(&var_getnext->guid, guid, sizeof(var_getnext->guid)); @@ -442,7 +442,7 @@ static efi_status_t tee_set_variable(efi_char16_t *name, efi_guid_t *vendor, var_acc = setup_mm_hdr(&comm_buf, payload_size, SMM_VARIABLE_FUNCTION_SET_VARIABLE, &ret); if (!var_acc) - return EFI_OUT_OF_RESOURCES; + return EFI_DEVICE_ERROR; /* * The API has the ability to override RO flags. If no RO check was @@ -498,7 +498,7 @@ static efi_status_t tee_query_variable_info(u32 attributes, SMM_VARIABLE_FUNCTION_QUERY_VARIABLE_INFO, &ret); if (!mm_query_info) - return EFI_OUT_OF_RESOURCES; + return EFI_DEVICE_ERROR; mm_query_info->attr = attributes; ret = mm_communicate(comm_buf, payload_size); From 01a3044af5d910e1f8b0bee53b2e0eccee8f9a5c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 25 Aug 2025 18:07:12 +0200 Subject: [PATCH 0832/1307] efi: stmm: Drop unused EFI error from setup_mm_hdr arguments No caller ever evaluates what we return in 'ret'. They only use the return code of the function. Signed-off-by: Jan Kiszka Reviewed-by: Ilias Apalodimas Acked-by: Sumit Garg Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/stmm/tee_stmm_efi.c | 25 ++++++++---------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/efi/stmm/tee_stmm_efi.c b/drivers/firmware/efi/stmm/tee_stmm_efi.c index 8501056ade8a..c2bc8467b099 100644 --- a/drivers/firmware/efi/stmm/tee_stmm_efi.c +++ b/drivers/firmware/efi/stmm/tee_stmm_efi.c @@ -154,11 +154,9 @@ static efi_status_t mm_communicate(u8 *comm_buf, size_t payload_size) * @dptr: pointer address to store allocated buffer * @payload_size: payload size * @func: standAloneMM function number - * @ret: EFI return code * Return: pointer to corresponding StandAloneMM function buffer or NULL */ -static void *setup_mm_hdr(u8 **dptr, size_t payload_size, size_t func, - efi_status_t *ret) +static void *setup_mm_hdr(u8 **dptr, size_t payload_size, size_t func) { const efi_guid_t mm_var_guid = EFI_MM_VARIABLE_GUID; struct efi_mm_communicate_header *mm_hdr; @@ -173,16 +171,13 @@ static void *setup_mm_hdr(u8 **dptr, size_t payload_size, size_t func, if (max_buffer_size && max_buffer_size < (MM_COMMUNICATE_HEADER_SIZE + MM_VARIABLE_COMMUNICATE_SIZE + payload_size)) { - *ret = EFI_INVALID_PARAMETER; return NULL; } comm_buf = alloc_pages_exact(COMM_BUF_SIZE(payload_size), GFP_KERNEL | __GFP_ZERO); - if (!comm_buf) { - *ret = EFI_OUT_OF_RESOURCES; + if (!comm_buf) return NULL; - } mm_hdr = (struct efi_mm_communicate_header *)comm_buf; memcpy(&mm_hdr->header_guid, &mm_var_guid, sizeof(mm_hdr->header_guid)); @@ -192,7 +187,6 @@ static void *setup_mm_hdr(u8 **dptr, size_t payload_size, size_t func, var_hdr->function = func; if (dptr) *dptr = comm_buf; - *ret = EFI_SUCCESS; return var_hdr->data; } @@ -215,8 +209,7 @@ static efi_status_t get_max_payload(size_t *size) payload_size = sizeof(*var_payload); var_payload = setup_mm_hdr(&comm_buf, payload_size, - SMM_VARIABLE_FUNCTION_GET_PAYLOAD_SIZE, - &ret); + SMM_VARIABLE_FUNCTION_GET_PAYLOAD_SIZE); if (!var_payload) return EFI_DEVICE_ERROR; @@ -262,7 +255,7 @@ static efi_status_t get_property_int(u16 *name, size_t name_size, smm_property = setup_mm_hdr( &comm_buf, payload_size, - SMM_VARIABLE_FUNCTION_VAR_CHECK_VARIABLE_PROPERTY_GET, &ret); + SMM_VARIABLE_FUNCTION_VAR_CHECK_VARIABLE_PROPERTY_GET); if (!smm_property) return EFI_DEVICE_ERROR; @@ -318,7 +311,7 @@ static efi_status_t tee_get_variable(u16 *name, efi_guid_t *vendor, payload_size = MM_VARIABLE_ACCESS_HEADER_SIZE + name_size + tmp_dsize; var_acc = setup_mm_hdr(&comm_buf, payload_size, - SMM_VARIABLE_FUNCTION_GET_VARIABLE, &ret); + SMM_VARIABLE_FUNCTION_GET_VARIABLE); if (!var_acc) return EFI_DEVICE_ERROR; @@ -383,8 +376,7 @@ static efi_status_t tee_get_next_variable(unsigned long *name_size, payload_size = MM_VARIABLE_GET_NEXT_HEADER_SIZE + out_name_size; var_getnext = setup_mm_hdr(&comm_buf, payload_size, - SMM_VARIABLE_FUNCTION_GET_NEXT_VARIABLE_NAME, - &ret); + SMM_VARIABLE_FUNCTION_GET_NEXT_VARIABLE_NAME); if (!var_getnext) return EFI_DEVICE_ERROR; @@ -440,7 +432,7 @@ static efi_status_t tee_set_variable(efi_char16_t *name, efi_guid_t *vendor, * the properties, if the allocation fails */ var_acc = setup_mm_hdr(&comm_buf, payload_size, - SMM_VARIABLE_FUNCTION_SET_VARIABLE, &ret); + SMM_VARIABLE_FUNCTION_SET_VARIABLE); if (!var_acc) return EFI_DEVICE_ERROR; @@ -495,8 +487,7 @@ static efi_status_t tee_query_variable_info(u32 attributes, payload_size = sizeof(*mm_query_info); mm_query_info = setup_mm_hdr(&comm_buf, payload_size, - SMM_VARIABLE_FUNCTION_QUERY_VARIABLE_INFO, - &ret); + SMM_VARIABLE_FUNCTION_QUERY_VARIABLE_INFO); if (!mm_query_info) return EFI_DEVICE_ERROR; From 134ed1093907a79e5d98087513f13fd7652c4df9 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 25 Aug 2025 18:07:13 +0200 Subject: [PATCH 0833/1307] efi: stmm: Drop unneeded null pointer check The API documenation of setup_mm_hdr does not mention that dptr can be NULL, this is a local function, and no caller passes NULL. So drop the unneeded check. Signed-off-by: Jan Kiszka Reviewed-by: Ilias Apalodimas Acked-by: Sumit Garg Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/stmm/tee_stmm_efi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/efi/stmm/tee_stmm_efi.c b/drivers/firmware/efi/stmm/tee_stmm_efi.c index c2bc8467b099..65c0fe1ba275 100644 --- a/drivers/firmware/efi/stmm/tee_stmm_efi.c +++ b/drivers/firmware/efi/stmm/tee_stmm_efi.c @@ -185,8 +185,7 @@ static void *setup_mm_hdr(u8 **dptr, size_t payload_size, size_t func) var_hdr = (struct smm_variable_communicate_header *)mm_hdr->data; var_hdr->function = func; - if (dptr) - *dptr = comm_buf; + *dptr = comm_buf; return var_hdr->data; } From e246518aa24f1460902725e97d0abf574aec6ade Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Aug 2025 13:43:47 +0200 Subject: [PATCH 0834/1307] PM: sleep: annotate RCU list iterations These iterations require the read lock, otherwise RCU lockdep will splat: ============================= WARNING: suspicious RCU usage 6.17.0-rc3-00014-g31419c045d64 #6 Tainted: G O ----------------------------- drivers/base/power/main.c:1333 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 5 locks held by rtcwake/547: #0: 00000000643ab418 (sb_writers#6){.+.+}-{0:0}, at: file_start_write+0x2b/0x3a #1: 0000000067a0ca88 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0x181/0x24b #2: 00000000631eac40 (kn->active#3){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x191/0x24b #3: 00000000609a1308 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0xaf/0x30b #4: 0000000060c0fdb0 (device_links_srcu){.+.+}-{0:0}, at: device_links_read_lock+0x75/0x98 stack backtrace: CPU: 0 UID: 0 PID: 547 Comm: rtcwake Tainted: G O 6.17.0-rc3-00014-g31419c045d64 #6 VOLUNTARY Tainted: [O]=OOT_MODULE Stack: 223721b3a80 6089eac6 00000001 00000001 ffffff00 6089eac6 00000535 6086e528 721b3ac0 6003c294 00000000 60031fc0 Call Trace: [<600407ed>] show_stack+0x10e/0x127 [<6003c294>] dump_stack_lvl+0x77/0xc6 [<6003c2fd>] dump_stack+0x1a/0x20 [<600bc2f8>] lockdep_rcu_suspicious+0x116/0x13e [<603d8ea1>] dpm_async_suspend_superior+0x117/0x17e [<603d980f>] device_suspend+0x528/0x541 [<603da24b>] dpm_suspend+0x1a2/0x267 [<603da837>] dpm_suspend_start+0x5d/0x72 [<600ca0c9>] suspend_devices_and_enter+0xab/0x736 [...] Add the fourth argument to the iteration to annotate this and avoid the splat. Fixes: 06799631d522 ("PM: sleep: Make async suspend handle suppliers like parents") Fixes: ed18738fff02 ("PM: sleep: Make async resume handle consumers like children") Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250826134348.aba79f6e6299.I9ecf55da46ccf33778f2c018a82e1819d815b348@changeid Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index dbf5456cd891..2ea6e05e6ec9 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -675,7 +675,7 @@ static void dpm_async_resume_subordinate(struct device *dev, async_func_t func) idx = device_links_read_lock(); /* Start processing the device's "async" consumers. */ - list_for_each_entry_rcu(link, &dev->links.consumers, s_node) + list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_async_with_cleanup(link->consumer, func); @@ -1330,7 +1330,7 @@ static void dpm_async_suspend_superior(struct device *dev, async_func_t func) idx = device_links_read_lock(); /* Start processing the device's "async" suppliers. */ - list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_async_with_cleanup(link->supplier, func); From 04e1f683cd28dc9407b238543871a6e09a570dc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20Kempczy=C5=84ski?= Date: Wed, 20 Aug 2025 10:39:04 +0200 Subject: [PATCH 0835/1307] drm/xe/xe_sync: avoid race during ufence signaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marking ufence as signalled after copy_to_user() is too late. Worker thread which signals ufence by memory write might be raced with another userspace vm-bind call. In map/unmap scenario unmap may still see ufence is not signalled causing -EBUSY. Change the order of marking / write to user-fence fixes this issue. Fixes: 977e5b82e090 ("drm/xe: Expose user fence from xe_sync_entry") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5536 Signed-off-by: Zbigniew Kempczyński Cc: Matthew Brost Cc: Matthew Auld Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250820083903.2109891-2-zbigniew.kempczynski@intel.com (cherry picked from commit 8ae04fe9ffc93d6bc3bc63ac08375427d69cee06) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index f87276df18f2..82872a51f098 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -77,6 +77,7 @@ static void user_fence_worker(struct work_struct *w) { struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); + WRITE_ONCE(ufence->signalled, 1); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) @@ -91,7 +92,6 @@ static void user_fence_worker(struct work_struct *w) * Wake up waiters only after updating the ufence state, allowing the UMD * to safely reuse the same ufence without encountering -EBUSY errors. */ - WRITE_ONCE(ufence->signalled, 1); wake_up_all(&ufence->xe->ufence_wq); user_fence_put(ufence); } From 7551865cd12af2dc47e5a174eebcfb0b94b5449b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 21 Aug 2025 16:30:43 +0200 Subject: [PATCH 0836/1307] drm/xe/vm: Don't pin the vm_resv during validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pinning has the odd side-effect that unlocking *any* resv during validation triggers an "unlocking pinned lock" warning. Cc: Matthew Brost Fixes: 5cc3325584c4 ("drm/xe: Rework eviction rejection of bound external bos") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250821143045.106005-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 0a51bf3e54dd8b77e6f1febbbb66def0660862d2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 5 ++--- drivers/gpu/drm/xe/xe_vm.h | 15 ++------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 18f27da47a36..d3ef79ebceee 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2438,7 +2438,6 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) .no_wait_gpu = false, .gfp_retry_mayfail = true, }; - struct pin_cookie cookie; int ret; if (vm) { @@ -2449,10 +2448,10 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } - cookie = xe_vm_set_validating(vm, allow_res_evict); + xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); - xe_vm_clear_validating(vm, allow_res_evict, cookie); + xe_vm_clear_validating(vm, allow_res_evict); return ret; } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 2f213737c7e5..2ecb417c19a2 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -315,22 +315,14 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. - * - * Return: A pin cookie that should be used for xe_vm_clear_validating(). */ -static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, - bool allow_res_evict) +static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict) { - struct pin_cookie cookie = {}; - if (vm && !allow_res_evict) { xe_vm_assert_held(vm); - cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); /* Pairs with READ_ONCE in xe_vm_is_validating() */ WRITE_ONCE(vm->validating, current); } - - return cookie; } /** @@ -338,17 +330,14 @@ static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, * @vm: Pointer to the vm or NULL * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same * value as for xe_vm_set_validation(). - * @cookie: Cookie obtained from xe_vm_set_validating(). * * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. */ -static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, - struct pin_cookie cookie) +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict) { if (vm && !allow_res_evict) { - lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); /* Pairs with READ_ONCE in xe_vm_is_validating() */ WRITE_ONCE(vm->validating, NULL); } From 2b55ddf36229e0278c956215784ab1feeff510aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 21 Aug 2025 16:30:45 +0200 Subject: [PATCH 0837/1307] drm/xe/vm: Clear the scratch_pt pointer on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid triggering a dereference of an error pointer on cleanup in xe_vm_free_scratch() by clearing any scratch_pt error pointer. Signed-off-by: Thomas Hellström Fixes: 06951c2ee72d ("drm/xe: Use NULL PTEs as scratch PTEs") Cc: Brian Welty Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250821143045.106005-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 358ee50ab565f3c8ea32480e9d03127a81ba32f8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ec04bef8ae40..d60c4b115304 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1610,8 +1610,12 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); + if (IS_ERR(vm->scratch_pt[id][i])) { + int err = PTR_ERR(vm->scratch_pt[id][i]); + + vm->scratch_pt[id][i] = NULL; + return err; + } xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); } From 16ca06aa2c2218cb21907c0c45a746958c944def Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 25 Aug 2025 08:28:41 -0700 Subject: [PATCH 0838/1307] drm/xe: Don't trigger rebind on initial dma-buf validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the first validate of an imported dma-buf (initial bind), the device has no GPU mappings, so a rebind is unnecessary. Rebinding here is harmful in multi-GPU setups and for VMs using preempt-fence mode, as it would evict in-flight GPU work. v2: - Drop dma_buf_validated, check for XE_PL_SYSTEM (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20250825152841.3837378-1-matthew.brost@intel.com (cherry picked from commit ffdf968762e4fb3cdae54e811ec3525e67440a60) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d3ef79ebceee..1be2415966df 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -812,7 +812,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } if (ttm_bo->type == ttm_bo_type_sg) { - ret = xe_bo_move_notify(bo, ctx); + if (new_mem->mem_type == XE_PL_SYSTEM) + ret = xe_bo_move_notify(bo, ctx); if (!ret) ret = xe_bo_move_dmabuf(ttm_bo, new_mem); return ret; From 75671d90fde8c78e940e15a1366a50ece56c6b69 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 25 Aug 2025 15:57:42 +0000 Subject: [PATCH 0839/1307] drm/xe: switch to local xbasename() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b0a2ee5567ab ("drm/xe: prepare xe_gen_wa_oob to be multi-use") introduced a call to basename(). The GNU version of this function is not portable and fails to build with alternative libc implementations like musl or bionic. This causes the following build error: drivers/gpu/drm/xe/xe_gen_wa_oob.c:130:12: error: assignment to ‘const char *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] 130 | fn = basename(fn); | ^ While a POSIX version of basename() could be used, it would require a separate header plus the behavior differs from GNU version in that it might modify its argument. Not great. Instead, implement a local xbasename() helper based on strrchr() that provides the same functionality and avoids portability issues. Fixes: b0a2ee5567ab ("drm/xe: prepare xe_gen_wa_oob to be multi-use") Suggested-by: Lucas De Marchi Reviewed-by: Tiffany Yang Signed-off-by: Carlos Llamas Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250825155743.1132433-1-cmllamas@google.com Signed-off-by: Lucas De Marchi (cherry picked from commit 41be792f5baaf90d744a9a9e82994ce560ca9582) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gen_wa_oob.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index 6581cb0f0e59..247e41c1c48d 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -123,11 +123,19 @@ static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) return 0; } +/* Avoid GNU vs POSIX basename() discrepancy, just use our own */ +static const char *xbasename(const char *s) +{ + const char *p = strrchr(s, '/'); + + return p ? p + 1 : s; +} + static int fn_to_prefix(const char *fn, char *prefix, size_t size) { size_t len; - fn = basename(fn); + fn = xbasename(fn); len = strlen(fn); if (len > size - 1) From 198f36f902ec7e99b645382505f74b87a4523ed9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Aug 2025 11:27:19 -0700 Subject: [PATCH 0840/1307] blk-zoned: Fix a lockdep complaint about recursive locking If preparing a write bio fails then blk_zone_wplug_bio_work() calls bio_endio() with zwplug->lock held. If a device mapper driver is stacked on top of the zoned block device then this results in nested locking of zwplug->lock. The resulting lockdep complaint is a false positive because this is nested locking and not recursive locking. Suppress this false positive by calling blk_zone_wplug_bio_io_error() without holding zwplug->lock. This is safe because no code in blk_zone_wplug_bio_io_error() depends on zwplug->lock being held. This patch suppresses the following lockdep complaint: WARNING: possible recursive locking detected -------------------------------------------- kworker/3:0H/46 is trying to acquire lock: ffffff882968b830 (&zwplug->lock){-...}-{2:2}, at: blk_zone_write_plug_bio_endio+0x64/0x1f0 but task is already holding lock: ffffff88315bc230 (&zwplug->lock){-...}-{2:2}, at: blk_zone_wplug_bio_work+0x8c/0x48c other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&zwplug->lock); lock(&zwplug->lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/3:0H/46: #0: ffffff8809486758 ((wq_completion)sdd_zwplugs){+.+.}-{0:0}, at: process_one_work+0x1bc/0x65c #1: ffffffc085de3d70 ((work_completion)(&zwplug->bio_work)){+.+.}-{0:0}, at: process_one_work+0x1e4/0x65c #2: ffffff88315bc230 (&zwplug->lock){-...}-{2:2}, at: blk_zone_wplug_bio_work+0x8c/0x48c stack backtrace: CPU: 3 UID: 0 PID: 46 Comm: kworker/3:0H Tainted: G W OE 6.12.38-android16-5-maybe-dirty-4k #1 8b362b6f76e3645a58cd27d86982bce10d150025 Tainted: [W]=WARN, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Spacecraft board based on MALIBU (DT) Workqueue: sdd_zwplugs blk_zone_wplug_bio_work Call trace: dump_backtrace+0xfc/0x17c show_stack+0x18/0x28 dump_stack_lvl+0x40/0xa0 dump_stack+0x18/0x24 print_deadlock_bug+0x38c/0x398 __lock_acquire+0x13e8/0x2e1c lock_acquire+0x134/0x2b4 _raw_spin_lock_irqsave+0x5c/0x80 blk_zone_write_plug_bio_endio+0x64/0x1f0 bio_endio+0x9c/0x240 __dm_io_complete+0x214/0x260 clone_endio+0xe8/0x214 bio_endio+0x218/0x240 blk_zone_wplug_bio_work+0x204/0x48c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 Cc: stable@vger.kernel.org Cc: Damien Le Moal Cc: Christoph Hellwig Fixes: dd291d77cc90 ("block: Introduce zone write plugging") Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250825182720.1697203-1-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ef43aaca49f4..5e2a5788dc3b 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1286,14 +1286,14 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) struct block_device *bdev; unsigned long flags; struct bio *bio; + bool prepared; /* * Submit the next plugged BIO. If we do not have any, clear * the plugged flag. */ - spin_lock_irqsave(&zwplug->lock, flags); - again: + spin_lock_irqsave(&zwplug->lock, flags); bio = bio_list_pop(&zwplug->bio_list); if (!bio) { zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; @@ -1304,13 +1304,14 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no, bio->bi_iter.bi_sector, bio_sectors(bio)); - if (!blk_zone_wplug_prepare_bio(zwplug, bio)) { + prepared = blk_zone_wplug_prepare_bio(zwplug, bio); + spin_unlock_irqrestore(&zwplug->lock, flags); + + if (!prepared) { blk_zone_wplug_bio_io_error(zwplug, bio); goto again; } - spin_unlock_irqrestore(&zwplug->lock, flags); - bdev = bio->bi_bdev; /* From 051b02b17a8b383ee033db211f90f24b91ac7006 Mon Sep 17 00:00:00 2001 From: Aaron Erhardt Date: Tue, 26 Aug 2025 16:10:54 +0200 Subject: [PATCH 0841/1307] ALSA: hda/realtek: Fix headset mic for TongFang X6[AF]R5xxY Add a PCI quirk to enable microphone detection on the headphone jack of TongFang X6AR5xxY and X6FR5xxY devices. Signed-off-by: Aaron Erhardt Cc: Link: https://patch.msgid.link/20250826141054.1201482-1-aer@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 0323606b3d6d..85bb8c4d3b17 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7147,6 +7147,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d05, 0x300f, "TongFang X6AR5xxY", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d05, 0x3019, "TongFang X6FR5xxY", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), From e3ef9445cd9d90e43de0bd3cd55d437773dfd139 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 26 Aug 2025 22:00:32 +0530 Subject: [PATCH 0842/1307] block: validate QoS before calling __rq_qos_done_bio() If a bio has BIO_QOS_xxx set, it doesn't guarantee that q->rq_qos is also present at-least for stacked block devices. For instance, in case of NVMe when multipath is enabled, the bottom device may have QoS enabled but top device doesn't. So always validate QoS is enabled and q->rq_qos is present before calling __rq_qos_done_bio(). Fixes: 370ac285f23a ("block: avoid cpu_hotplug_lock depedency on freeze_lock") Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/all/3a07b752-06a4-4eee-b302-f4669feb859d@linux.ibm.com/ Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250826163128.1952394-1-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-rq-qos.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 1fe22000a379..b538f2c0febc 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -149,12 +149,15 @@ static inline void rq_qos_done_bio(struct bio *bio) q = bdev_get_queue(bio->bi_bdev); /* - * If a bio has BIO_QOS_xxx set, it implicitly implies that - * q->rq_qos is present. So, we skip re-checking q->rq_qos - * here as an extra optimization and directly call - * __rq_qos_done_bio(). + * A BIO may carry BIO_QOS_* flags even if the associated request_queue + * does not have rq_qos enabled. This can happen with stacked block + * devices — for example, NVMe multipath, where it's possible that the + * bottom device has QoS enabled but the top device does not. Therefore, + * always verify that q->rq_qos is present and QoS is enabled before + * calling __rq_qos_done_bio(). */ - __rq_qos_done_bio(q->rq_qos, bio); + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) + __rq_qos_done_bio(q->rq_qos, bio); } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) From e81a7f65288c7e2cfb7e7890f648e099fd885ab3 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Fri, 22 Aug 2025 11:13:24 +0200 Subject: [PATCH 0843/1307] net: usb: qmi_wwan: add Telit Cinterion LE910C4-WWX new compositions Add the following Telit Cinterion LE910C4-WWX new compositions: 0x1034: tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1034 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1037: tty (diag) + tty (Telit custom) + tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 15 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1037 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x1038: tty (Telit custom) + tty (AT) + tty (AT) + rmnet T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1038 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=93f617e7 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Link: https://patch.msgid.link/20250822091324.39558-1-Fabio.Porcedda@telit.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index e56901bb6ebc..11352d85475a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1355,6 +1355,9 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1034, 2)}, /* Telit LE910C4-WWX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1037, 4)}, /* Telit LE910C4-WWX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1038, 3)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x103a, 0)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ From 882e57cbc7204662f6c5672d5b04336c1d790b03 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 25 Aug 2025 08:55:43 +0200 Subject: [PATCH 0844/1307] phy: mscc: Fix when PTP clock is register and unregister It looks like that every time when the interface was set down and up the driver was creating a new ptp clock. On top of this the function ptp_clock_unregister was never called. Therefore fix this by calling ptp_clock_register and initialize the mii_ts struct inside the probe function and call ptp_clock_unregister when driver is removed. Fixes: 7d272e63e0979d ("net: phy: mscc: timestamping and PHC support") Signed-off-by: Horatiu Vultur Reviewed-by: Vadim Fedorenko Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250825065543.2916334-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc.h | 4 ++++ drivers/net/phy/mscc/mscc_main.c | 4 +--- drivers/net/phy/mscc/mscc_ptp.c | 34 ++++++++++++++++++++------------ 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 58c6d47fbe04..2bfe314ef881 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -481,6 +481,7 @@ static inline void vsc8584_config_macsec_intr(struct phy_device *phydev) void vsc85xx_link_change_notify(struct phy_device *phydev); void vsc8584_config_ts_intr(struct phy_device *phydev); int vsc8584_ptp_init(struct phy_device *phydev); +void vsc8584_ptp_deinit(struct phy_device *phydev); int vsc8584_ptp_probe_once(struct phy_device *phydev); int vsc8584_ptp_probe(struct phy_device *phydev); irqreturn_t vsc8584_handle_ts_interrupt(struct phy_device *phydev); @@ -495,6 +496,9 @@ static inline int vsc8584_ptp_init(struct phy_device *phydev) { return 0; } +static inline void vsc8584_ptp_deinit(struct phy_device *phydev) +{ +} static inline int vsc8584_ptp_probe_once(struct phy_device *phydev) { return 0; diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index f1c9ce351ab4..24c75903f535 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2337,9 +2337,7 @@ static int vsc85xx_probe(struct phy_device *phydev) static void vsc85xx_remove(struct phy_device *phydev) { - struct vsc8531_private *priv = phydev->priv; - - skb_queue_purge(&priv->rx_skbs_list); + vsc8584_ptp_deinit(phydev); } /* Microsemi VSC85xx PHYs */ diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index de6c7312e8f2..72847320cb65 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1298,7 +1298,6 @@ static void vsc8584_set_input_clk_configured(struct phy_device *phydev) static int __vsc8584_init_ptp(struct phy_device *phydev) { - struct vsc8531_private *vsc8531 = phydev->priv; static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 }; static const u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 }; u32 val; @@ -1515,17 +1514,7 @@ static int __vsc8584_init_ptp(struct phy_device *phydev) vsc85xx_ts_eth_cmp1_sig(phydev); - vsc8531->mii_ts.rxtstamp = vsc85xx_rxtstamp; - vsc8531->mii_ts.txtstamp = vsc85xx_txtstamp; - vsc8531->mii_ts.hwtstamp = vsc85xx_hwtstamp; - vsc8531->mii_ts.ts_info = vsc85xx_ts_info; - phydev->mii_ts = &vsc8531->mii_ts; - - memcpy(&vsc8531->ptp->caps, &vsc85xx_clk_caps, sizeof(vsc85xx_clk_caps)); - - vsc8531->ptp->ptp_clock = ptp_clock_register(&vsc8531->ptp->caps, - &phydev->mdio.dev); - return PTR_ERR_OR_ZERO(vsc8531->ptp->ptp_clock); + return 0; } void vsc8584_config_ts_intr(struct phy_device *phydev) @@ -1552,6 +1541,16 @@ int vsc8584_ptp_init(struct phy_device *phydev) return 0; } +void vsc8584_ptp_deinit(struct phy_device *phydev) +{ + struct vsc8531_private *vsc8531 = phydev->priv; + + if (vsc8531->ptp->ptp_clock) { + ptp_clock_unregister(vsc8531->ptp->ptp_clock); + skb_queue_purge(&vsc8531->rx_skbs_list); + } +} + irqreturn_t vsc8584_handle_ts_interrupt(struct phy_device *phydev) { struct vsc8531_private *priv = phydev->priv; @@ -1612,7 +1611,16 @@ int vsc8584_ptp_probe(struct phy_device *phydev) vsc8531->ptp->phydev = phydev; - return 0; + vsc8531->mii_ts.rxtstamp = vsc85xx_rxtstamp; + vsc8531->mii_ts.txtstamp = vsc85xx_txtstamp; + vsc8531->mii_ts.hwtstamp = vsc85xx_hwtstamp; + vsc8531->mii_ts.ts_info = vsc85xx_ts_info; + phydev->mii_ts = &vsc8531->mii_ts; + + memcpy(&vsc8531->ptp->caps, &vsc85xx_clk_caps, sizeof(vsc85xx_clk_caps)); + vsc8531->ptp->ptp_clock = ptp_clock_register(&vsc8531->ptp->caps, + &phydev->mdio.dev); + return PTR_ERR_OR_ZERO(vsc8531->ptp->ptp_clock); } int vsc8584_ptp_probe_once(struct phy_device *phydev) From 26c1f55f7ec8d1a4bde8c50e4ee04e3c8c6b27e8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 25 Aug 2025 08:57:53 -0700 Subject: [PATCH 0845/1307] MAINTAINERS: retire Boris from TLS maintainers There's a steady stream of TLS changes and bugs. We need active maintainers in this area, and Boris hasn't been participating much in upstream work. Move him to CREDITS. While at it also add Dave Watson there who was the author of the initial SW implementation, AFAIU. Link: https://patch.msgid.link/20250825155753.2178045-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- CREDITS | 7 +++++++ MAINTAINERS | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index a357f9cbb05d..a687c3c35c4c 100644 --- a/CREDITS +++ b/CREDITS @@ -3222,6 +3222,10 @@ D: AIC5800 IEEE 1394, RAW I/O on 1394 D: Starter of Linux1394 effort S: ask per mail for current address +N: Boris Pismenny +E: borisp@mellanox.com +D: Kernel TLS implementation and offload support. + N: Nicolas Pitre E: nico@fluxnic.net D: StrongARM SA1100 support integrator & hacker @@ -4168,6 +4172,9 @@ S: 1513 Brewster Dr. S: Carrollton, TX 75010 S: USA +N: Dave Watson +D: Kernel TLS implementation. + N: Tim Waugh E: tim@cyberelk.net D: Co-architect of the parallel-port sharing system diff --git a/MAINTAINERS b/MAINTAINERS index 2720544cd91f..1897d8b45df4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17818,7 +17818,6 @@ F: net/ipv6/syncookies.c F: net/ipv6/tcp*.c NETWORKING [TLS] -M: Boris Pismenny M: John Fastabend M: Jakub Kicinski L: netdev@vger.kernel.org From 16c8a3a67ec799fc731919e3e51be9af6cdf541d Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 25 Aug 2025 13:21:34 -0400 Subject: [PATCH 0846/1307] net: macb: Fix offset error in gem_update_stats hw_stats now has only one variable for tx_octets/rx_octets, so we should only increment p once, not twice. This would cause the statistics to be reported under the wrong categories in `ethtool -S --all-groups` (which uses hw_stats) but not `ethtool -S` (which uses ethtool_stats). Signed-off-by: Sean Anderson Fixes: f6af690a295a ("net: cadence: macb: Report standard stats") Link: https://patch.msgid.link/20250825172134.681861-1-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index b29c3beae0b2..106885451147 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3090,7 +3090,7 @@ static void gem_update_stats(struct macb *bp) /* Add GEM_OCTTXH, GEM_OCTRXH */ val = bp->macb_reg_readl(bp, offset + 4); bp->ethtool_stats[i] += ((u64)val) << 32; - *(p++) += ((u64)val) << 32; + *p += ((u64)val) << 32; } } From d9b0ca1334d8a9a03bef45e95825564c56ca3367 Mon Sep 17 00:00:00 2001 From: Boon Khai Ng Date: Mon, 25 Aug 2025 15:13:21 +0800 Subject: [PATCH 0847/1307] MAINTAINERS: Update maintainer information for Altera Triple Speed Ethernet Driver The previous maintainer, Joyce Ooi, is no longer with the company, and her email is no longer reachable. As a result, the maintainer information for the Altera Triple Speed Ethernet Driver has been updated. Changes: - Replaced Joyce Ooi's email with Boon Khai Ng's email address. - Kept the component's status as "Maintained". Signed-off-by: Boon Khai Ng Link: https://patch.msgid.link/20250825071321.30131-1-boon.khai.ng@altera.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1897d8b45df4..c5b47955d2a6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -937,7 +937,7 @@ S: Maintained F: drivers/gpio/gpio-altera.c ALTERA TRIPLE SPEED ETHERNET DRIVER -M: Joyce Ooi +M: Boon Khai Ng L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/altera/ From 2747328ba2714f1a7454208dbbc1dc0631990b4a Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 25 Aug 2025 10:59:25 -0700 Subject: [PATCH 0848/1307] bnxt_en: Fix memory corruption when FW resources change during ifdown bnxt_set_dflt_rings() assumes that it is always called before any TC has been created. So it doesn't take bp->num_tc into account and assumes that it is always 0 or 1. In the FW resource or capability change scenario, the FW will return flags in bnxt_hwrm_if_change() that will cause the driver to reinitialize and call bnxt_cancel_reservations(). This will lead to bnxt_init_dflt_ring_mode() calling bnxt_set_dflt_rings() and bp->num_tc may be greater than 1. This will cause bp->tx_ring[] to be sized too small and cause memory corruption in bnxt_alloc_cp_rings(). Fix it by properly scaling the TX rings by bp->num_tc in the code paths mentioned above. Add 2 helper functions to determine bp->tx_nr_rings and bp->tx_nr_rings_per_tc. Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.") Reviewed-by: Kalesh AP Reviewed-by: Andy Gospodarek Signed-off-by: Sreekanth Reddy Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250825175927.459987-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 207a8bb36ae5..1f5c06f1296b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12851,6 +12851,17 @@ static int bnxt_set_xps_mapping(struct bnxt *bp) return rc; } +static int bnxt_tx_nr_rings(struct bnxt *bp) +{ + return bp->num_tc ? bp->tx_nr_rings_per_tc * bp->num_tc : + bp->tx_nr_rings_per_tc; +} + +static int bnxt_tx_nr_rings_per_tc(struct bnxt *bp) +{ + return bp->num_tc ? bp->tx_nr_rings / bp->num_tc : bp->tx_nr_rings; +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -16325,7 +16336,7 @@ static void bnxt_trim_dflt_sh_rings(struct bnxt *bp) bp->cp_nr_rings = min_t(int, bp->tx_nr_rings_per_tc, bp->rx_nr_rings); bp->rx_nr_rings = bp->cp_nr_rings; bp->tx_nr_rings_per_tc = bp->cp_nr_rings; - bp->tx_nr_rings = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); } static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) @@ -16357,7 +16368,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) bnxt_trim_dflt_sh_rings(bp); else bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; - bp->tx_nr_rings = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings; if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) { @@ -16370,7 +16381,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "Unable to reserve tx rings\n"); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); if (sh) bnxt_trim_dflt_sh_rings(bp); @@ -16379,7 +16390,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "2nd rings reservation failed.\n"); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { bp->rx_nr_rings++; @@ -16413,7 +16424,7 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) if (rc) goto init_dflt_ring_err; - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); bnxt_set_dflt_rfs(bp); From 1ee581c24dfdcbc6de25aac95a48c1f08e9a542c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Aug 2025 10:59:26 -0700 Subject: [PATCH 0849/1307] bnxt_en: Adjust TX rings if reservation is less than requested Before we accept an ethtool request to increase a resource (such as rings), we call the FW to check that the requested resource is likely available first before we commit. But it is still possible that the actual reservation or allocation can fail. The existing code is missing the logic to adjust the TX rings in case the reserved TX rings are less than requested. Add a warning message (a similar message for RX rings already exists) and add the logic to adjust the TX rings. Without this fix, the number of TX rings reported to the stack can exceed the actual TX rings and ethtool -l will report more than the actual TX rings. Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250825175927.459987-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1f5c06f1296b..86fc9d340dab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8024,6 +8024,11 @@ static int __bnxt_reserve_rings(struct bnxt *bp) hwr.rx = rx_rings << 1; tx_cp = bnxt_num_tx_to_cp(bp, hwr.tx); hwr.cp = sh ? max_t(int, tx_cp, rx_rings) : tx_cp + rx_rings; + if (hwr.tx != bp->tx_nr_rings) { + netdev_warn(bp->dev, + "Able to reserve only %d out of %d requested TX rings\n", + hwr.tx, bp->tx_nr_rings); + } bp->tx_nr_rings = hwr.tx; /* If we cannot reserve all the RX rings, reset the RSS map only @@ -12879,6 +12884,13 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (rc) return rc; + /* Make adjustments if reserved TX rings are less than requested */ + bp->tx_nr_rings -= bp->tx_nr_rings_xdp; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + if (bp->tx_nr_rings_xdp) { + bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings += bp->tx_nr_rings_xdp; + } rc = bnxt_alloc_mem(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); From b4fc8faacfea2538184a1dbd616ae9447a361f3d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Aug 2025 10:59:27 -0700 Subject: [PATCH 0850/1307] bnxt_en: Fix stats context reservation logic The HW resource reservation logic allows the L2 driver to use the RoCE resources if the RoCE driver is not registered. When calculating the stats contexts available for L2, we should not blindly subtract the stats contexts reserved for RoCE unless the RoCE driver is registered. This bug may cause the L2 rings to be less than the number requested when we are close to running out of stats contexts. Fixes: 2e4592dc9bee ("bnxt_en: Change MSIX/NQs allocation policy") Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250825175927.459987-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 86fc9d340dab..31e3d825b4bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8016,7 +8016,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) } rx_rings = min_t(int, rx_rings, hwr.grp); hwr.cp = min_t(int, hwr.cp, bp->cp_nr_rings); - if (hwr.stat > bnxt_get_ulp_stat_ctxs(bp)) + if (bnxt_ulp_registered(bp->edev) && + hwr.stat > bnxt_get_ulp_stat_ctxs(bp)) hwr.stat -= bnxt_get_ulp_stat_ctxs(bp); hwr.cp = min_t(int, hwr.cp, hwr.stat); rc = bnxt_trim_rings(bp, &rx_rings, &hwr.tx, hwr.cp, sh); From 2c0a959bebdc1ada13cf9a8242f177c5400299e6 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 25 Aug 2025 17:34:24 +0300 Subject: [PATCH 0851/1307] net/mlx5: HWS, Fix memory leak in hws_pool_buddy_init error path In the error path of hws_pool_buddy_init(), the buddy allocator cleanup doesn't free the allocator structure itself, causing a memory leak. Add the missing kfree() to properly release all allocated memory. Fixes: c61afff94373 ("net/mlx5: HWS, added memory management handling") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c index 7e37d6e9eb83..7b5071c3df36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c @@ -124,6 +124,7 @@ static int hws_pool_buddy_init(struct mlx5hws_pool *pool) mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n", pool->type, pool->alloc_log_sz); mlx5hws_buddy_cleanup(buddy); + kfree(buddy); return -ENOMEM; } From a630f83592cdad1253523a1b760cfe78fef6cd9c Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 25 Aug 2025 17:34:25 +0300 Subject: [PATCH 0852/1307] net/mlx5: HWS, Fix memory leak in hws_action_get_shared_stc_nic error flow When an invalid stc_type is provided, the function allocates memory for shared_stc but jumps to unlock_and_out without freeing it, causing a memory leak. Fix by jumping to free_shared_stc label instead to ensure proper cleanup. Fixes: 504e536d9010 ("net/mlx5: HWS, added actions handling") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index 396804369b00..6b36a4a7d895 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -117,7 +117,7 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, mlx5hws_err(ctx, "No such stc_type: %d\n", stc_type); pr_warn("HWS: Invalid stc_type: %d\n", stc_type); ret = -EINVAL; - goto unlock_and_out; + goto free_shared_stc; } ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type, From 24b6e53140475b56cadcccd4e82a93aa5bacf1eb Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 25 Aug 2025 17:34:26 +0300 Subject: [PATCH 0853/1307] net/mlx5: HWS, Fix uninitialized variables in mlx5hws_pat_calc_nop error flow In mlx5hws_pat_calc_nop(), src_field and dst_field are passed to hws_action_modify_get_target_fields() which should set their values. However, if an invalid action type is encountered, these variables remain uninitialized and are later used to update prev_src_field and prev_dst_field. Initialize both variables to INVALID_FIELD to ensure they have defined values in all code paths. Fixes: 01e035fd0380 ("net/mlx5: HWS, handle modify header actions dependency") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c index 51e4c551e0ef..622fd579f140 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c @@ -527,7 +527,6 @@ int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, u32 *nop_locations, __be64 *new_pat) { u16 prev_src_field = INVALID_FIELD, prev_dst_field = INVALID_FIELD; - u16 src_field, dst_field; u8 action_type; bool dependent; size_t i, j; @@ -539,6 +538,9 @@ int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, return 0; for (i = 0, j = 0; i < num_actions; i++, j++) { + u16 src_field = INVALID_FIELD; + u16 dst_field = INVALID_FIELD; + if (j >= max_actions) return -EINVAL; From 00a50e4e8974cbf5d6a1dc91cfa5cce4aa7af05a Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 25 Aug 2025 17:34:27 +0300 Subject: [PATCH 0854/1307] net/mlx5: HWS, Fix pattern destruction in mlx5hws_pat_get_pattern error path In mlx5hws_pat_get_pattern(), when mlx5hws_pat_add_pattern_to_cache() fails, the function attempts to clean up the pattern created by mlx5hws_cmd_header_modify_pattern_create(). However, it incorrectly uses *pattern_id which hasn't been set yet, instead of the local ptrn_id variable that contains the actual pattern ID. This results in attempting to destroy a pattern using uninitialized data from the output parameter, rather than the valid pattern ID returned by the firmware. Use ptrn_id instead of *pattern_id in the cleanup path to properly destroy the created pattern. Fixes: aefc15a0fa1c ("net/mlx5: HWS, added modify header pattern and args handling") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c index 622fd579f140..d56271a9e4f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c @@ -279,7 +279,7 @@ int mlx5hws_pat_get_pattern(struct mlx5hws_context *ctx, return ret; clean_pattern: - mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, *pattern_id); + mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, ptrn_id); out_unlock: mutex_unlock(&ctx->pattern_cache->lock); return ret; From 34cc6a54914f478c93e176450fae6313404f9f74 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 25 Aug 2025 17:34:28 +0300 Subject: [PATCH 0855/1307] net/mlx5: Reload auxiliary drivers on fw_activate The devlink reload fw_activate command performs firmware activation followed by driver reload, while devlink reload driver_reinit triggers only driver reload. However, the driver reload logic differs between the two modes, as on driver_reinit mode mlx5 also reloads auxiliary drivers, while in fw_activate mode the auxiliary drivers are suspended where applicable. Additionally, following the cited commit, if the device has multiple PFs, the behavior during fw_activate may vary between PFs: one PF may suspend auxiliary drivers, while another reloads them. Align devlink dev reload fw_activate behavior with devlink dev reload driver_reinit, to reload all auxiliary drivers. Fixes: 72ed5d5624af ("net/mlx5: Suspend auxiliary devices only in case of PCI device suspend") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Reviewed-by: Akiva Goldberger Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3ffa3fbacd16..26091e7536d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -160,7 +160,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - mlx5_unload_one_devl_locked(dev, true); + mlx5_unload_one_devl_locked(dev, false); err = mlx5_health_wait_pci_up(dev); if (err) NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); From 902a8bc23a24882200f57cadc270e15a2cfaf2bb Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 25 Aug 2025 17:34:29 +0300 Subject: [PATCH 0856/1307] net/mlx5: Fix lockdep assertion on sync reset unload event Fix lockdep assertion triggered during sync reset unload event. When the sync reset flow is initiated using the devlink reload fw_activate option, the PF already holds the devlink lock while handling unload event. In this case, delegate sync reset unload event handling back to the devlink callback process to avoid double-locking and resolve the lockdep warning. Kernel log: WARNING: CPU: 9 PID: 1578 at devl_assert_locked+0x31/0x40 [...] Call Trace: mlx5_unload_one_devl_locked+0x2c/0xc0 [mlx5_core] mlx5_sync_reset_unload_event+0xaf/0x2f0 [mlx5_core] process_one_work+0x222/0x640 worker_thread+0x199/0x350 kthread+0x10b/0x230 ? __pfx_worker_thread+0x10/0x10 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x8e/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: 7a9770f1bfea ("net/mlx5: Handle sync reset unload event") Signed-off-by: Moshe Shemesh Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- .../ethernet/mellanox/mlx5/core/fw_reset.c | 126 ++++++++++-------- .../ethernet/mellanox/mlx5/core/fw_reset.h | 1 + 3 files changed, 72 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 26091e7536d3..2c0e0c16ca90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -160,7 +160,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - mlx5_unload_one_devl_locked(dev, false); + mlx5_sync_reset_unload_flow(dev, true); err = mlx5_health_wait_pci_up(dev); if (err) NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 69933addd921..38b9b184ae01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -12,7 +12,8 @@ enum { MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, MLX5_FW_RESET_FLAGS_PENDING_COMP, MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, - MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, + MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, }; struct mlx5_fw_reset { @@ -219,7 +220,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); } -static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; struct devlink *devlink = priv_to_devlink(dev); @@ -228,8 +229,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - if (!unloaded) - mlx5_unload_one(dev, false); + mlx5_sync_reset_unload_flow(dev, false); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else @@ -272,7 +272,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) mlx5_sync_reset_clear_reset_requested(dev, false); mlx5_enter_error_state(dev, true); - mlx5_fw_reset_complete_reload(dev, false); + mlx5_fw_reset_complete_reload(dev); } #define MLX5_RESET_POLL_INTERVAL (HZ / 10) @@ -586,65 +586,23 @@ static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method) return err; } -static void mlx5_sync_reset_now_event(struct work_struct *work) +void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked) { - struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, - reset_now_work); - struct mlx5_core_dev *dev = fw_reset->dev; - int err; - - if (mlx5_sync_reset_clear_reset_requested(dev, false)) - return; - - mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); - - err = mlx5_cmd_fast_teardown_hca(dev); - if (err) { - mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); - goto done; - } - - err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); - if (err) { - mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err); - set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); - } - - mlx5_enter_error_state(dev, true); -done: - fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev, false); -} - -static void mlx5_sync_reset_unload_event(struct work_struct *work) -{ - struct mlx5_fw_reset *fw_reset; - struct mlx5_core_dev *dev; + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; unsigned long timeout; int poll_freq = 20; bool reset_action; u8 rst_state; int err; - fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); - dev = fw_reset->dev; - - if (mlx5_sync_reset_clear_reset_requested(dev, false)) - return; - - mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); - - err = mlx5_cmd_fast_teardown_hca(dev); - if (err) - mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); - else - mlx5_enter_error_state(dev, true); - - if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) + if (locked) mlx5_unload_one_devl_locked(dev, false); else mlx5_unload_one(dev, false); + if (!test_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags)) + return; + mlx5_set_fw_rst_ack(dev); mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); @@ -672,17 +630,73 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work) goto done; } - mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state); + mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", + rst_state); if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); if (err) { - mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err); + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", + err); fw_reset->ret = err; } } done: - mlx5_fw_reset_complete_reload(dev, true); + clear_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); +} + +static void mlx5_sync_reset_now_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_now_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) { + mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); + goto done; + } + + err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); + if (err) { + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err); + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); + } + + mlx5_enter_error_state(dev, true); +done: + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + +static void mlx5_sync_reset_unload_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset; + struct mlx5_core_dev *dev; + int err; + + fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); + dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + set_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); + mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) + mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); + else + mlx5_enter_error_state(dev, true); + + mlx5_fw_reset_complete_reload(dev); } static void mlx5_sync_reset_abort_event(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index ea527d06a85f..d5b28525c960 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -12,6 +12,7 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); +void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked); int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, struct netlink_ext_ack *extack); void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); From 26e42ec7712d392d561964514b1f253b1a96f42d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 25 Aug 2025 17:34:30 +0300 Subject: [PATCH 0857/1307] net/mlx5: Nack sync reset when SFs are present If PF (Physical Function) has SFs (Sub-Functions), since the SFs are not taking part in the synchronization flow, sync reset can lead to fatal error on the SFs, as the function will be closed unexpectedly from the SF point of view. Add a check to prevent sync reset when there are SFs on a PF device which is not ECPF, as ECPF is teardowned gracefully before reset. Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now") Signed-off-by: Moshe Shemesh Reviewed-by: Parav Pandit Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-8-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h | 6 ++++++ 3 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 38b9b184ae01..22995131824a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -6,6 +6,7 @@ #include "fw_reset.h" #include "diag/fw_tracer.h" #include "lib/tout.h" +#include "sf/sf.h" enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, @@ -428,6 +429,11 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev, return false; } + if (!mlx5_core_is_ecpf(dev) && !mlx5_sf_table_empty(dev)) { + mlx5_core_warn(dev, "SFs should be removed before reset\n"); + return false; + } + #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) if (reset_method != MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET) { err = mlx5_check_hotplug_interrupt(dev, bridge); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 0864ba625c07..3304f25cc805 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -518,3 +518,13 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) WARN_ON(!xa_empty(&table->function_ids)); kfree(table); } + +bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return true; + + return xa_empty(&table->function_ids); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 860f9ddb7107..89559a37997a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -17,6 +17,7 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev); int mlx5_sf_table_init(struct mlx5_core_dev *dev); void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); +bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev); int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *add_attr, @@ -61,6 +62,11 @@ static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) { } +static inline bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev) +{ + return true; +} + #endif #endif From cf9a8627b9a369ba01d37be6f71b297beb688faa Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 25 Aug 2025 17:34:31 +0300 Subject: [PATCH 0858/1307] net/mlx5: Prevent flow steering mode changes in switchdev mode Changing flow steering modes is not allowed when eswitch is in switchdev mode. This fix ensures that any steering mode change, including to firmware steering, is correctly blocked while eswitch mode is switchdev. Fixes: e890acd5ff18 ("net/mlx5: Add devlink flow_steering_mode parameter") Signed-off-by: Moshe Shemesh Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d87392360dbd..cb165085a4c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3734,6 +3734,13 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, char *value = val.vstr; u8 eswitch_mode; + eswitch_mode = mlx5_eswitch_mode(dev); + if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Changing fs mode is not supported when eswitch offloads enabled."); + return -EOPNOTSUPP; + } + if (!strcmp(value, "dmfs")) return 0; @@ -3759,14 +3766,6 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, return -EINVAL; } - eswitch_mode = mlx5_eswitch_mode(dev); - if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { - NL_SET_ERR_MSG_FMT_MOD(extack, - "Moving to %s is not supported when eswitch offloads enabled.", - value); - return -EOPNOTSUPP; - } - return 0; } From ceddedc969f0532b7c62ca971ee50d519d2bc0cb Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Mon, 25 Aug 2025 17:34:32 +0300 Subject: [PATCH 0859/1307] net/mlx5e: Update and set Xon/Xoff upon MTU set Xon/Xoff sizes are derived from calculation that include the MTU size. Set Xon/Xoff when MTU is set. If Xon/Xoff fails, set the previous MTU. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Alexei Lazar Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-10-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/port_buffer.h | 12 ++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 17 ++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h index f4a19ffbb641..66d276a1be83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -66,11 +66,23 @@ struct mlx5e_port_buffer { struct mlx5e_bufferx_reg buffer[MLX5E_MAX_NETWORK_BUFFER]; }; +#ifdef CONFIG_MLX5_CORE_EN_DCB int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, struct ieee_pfc *pfc, u32 *buffer_size, u8 *prio2buffer); +#else +static inline int +mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + void *pfc, + u32 *buffer_size, + u8 *prio2buffer) +{ + return 0; +} +#endif int mlx5e_port_query_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 21bb88c5d3dc..15eded36b872 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -49,6 +49,7 @@ #include "en.h" #include "en/dim.h" #include "en/txrx.h" +#include "en/port_buffer.h" #include "en_tc.h" #include "en_rep.h" #include "en_accel/ipsec.h" @@ -3040,9 +3041,11 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) struct mlx5e_params *params = &priv->channels.params; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - u16 mtu; + u16 mtu, prev_mtu; int err; + mlx5e_query_mtu(mdev, params, &prev_mtu); + err = mlx5e_set_mtu(mdev, params, params->sw_mtu); if (err) return err; @@ -3052,6 +3055,18 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", __func__, mtu, params->sw_mtu); + if (mtu != prev_mtu && MLX5_BUFFER_SUPPORTED(mdev)) { + err = mlx5e_port_manual_buffer_config(priv, 0, mtu, + NULL, NULL, NULL); + if (err) { + netdev_warn(netdev, "%s: Failed to set Xon/Xoff values with MTU %d (err %d), setting back to previous MTU %d\n", + __func__, mtu, err, prev_mtu); + + mlx5e_set_mtu(mdev, params, prev_mtu); + return err; + } + } + params->sw_mtu = mtu; return 0; } From d24341740fe48add8a227a753e68b6eedf4b385a Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Mon, 25 Aug 2025 17:34:33 +0300 Subject: [PATCH 0860/1307] net/mlx5e: Update and set Xon/Xoff upon port speed set Xon/Xoff sizes are derived from calculations that include the port speed. These settings need to be updated and applied whenever the port speed is changed. The port speed is typically set after the physical link goes down and is negotiated as part of the link-up process between the two connected interfaces. Xon/Xoff parameters being updated at the point where the new negotiated speed is established. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Alexei Lazar Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-11-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 15eded36b872..e680673ffb72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -139,6 +139,8 @@ void mlx5e_update_carrier(struct mlx5e_priv *priv) if (up) { netdev_info(priv->netdev, "Link up\n"); netif_carrier_on(priv->netdev); + mlx5e_port_manual_buffer_config(priv, 0, priv->netdev->mtu, + NULL, NULL, NULL); } else { netdev_info(priv->netdev, "Link down\n"); netif_carrier_off(priv->netdev); From aca0c31af61e0d5cf1675a0cbd29460b95ae693c Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Mon, 25 Aug 2025 17:34:34 +0300 Subject: [PATCH 0861/1307] net/mlx5e: Set local Xoff after FW update The local Xoff value is being set before the firmware (FW) update. In case of a failure where the FW is not updated with the new value, there is no fallback to the previous value. Update the local Xoff value after the FW has been successfully set. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Alexei Lazar Reviewed-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250825143435.598584-12-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 3efa8bf1d14e..4720523813b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -575,7 +575,6 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; } - priv->dcbx.xoff = xoff; /* Apply the settings */ if (update_buffer) { @@ -584,6 +583,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return err; } + priv->dcbx.xoff = xoff; + if (update_prio2buffer) err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer); From 4f23382841e67174211271a454811dd17c0ef3c5 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Mon, 25 Aug 2025 12:36:52 +0800 Subject: [PATCH 0862/1307] net: stmmac: xgmac: Do not enable RX FIFO Overflow interrupts Enabling RX FIFO Overflow interrupts is counterproductive and causes an interrupt storm when RX FIFO overflows. Disabling this interrupt has no side effect and eliminates interrupt storms when the RX FIFO overflows. Commit 8a7cb245cf28 ("net: stmmac: Do not enable RX FIFO overflow interrupts") disables RX FIFO overflow interrupts for DWMAC4 IP and removes the corresponding handling of this interrupt. This patch is doing the same thing for XGMAC IP. Fixes: 2142754f8b9c ("net: stmmac: Add MAC related callbacks for XGMAC2") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250825-xgmac-minor-fixes-v3-1-c225fe4444c0@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 5dcc95bc0ad2..7201a3884265 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -203,10 +203,6 @@ static void dwxgmac2_dma_rx_mode(struct stmmac_priv *priv, void __iomem *ioaddr, } writel(value, ioaddr + XGMAC_MTL_RXQ_OPMODE(channel)); - - /* Enable MTL RX overflow */ - value = readl(ioaddr + XGMAC_MTL_QINTEN(channel)); - writel(value | XGMAC_RXOIE, ioaddr + XGMAC_MTL_QINTEN(channel)); } static void dwxgmac2_dma_tx_mode(struct stmmac_priv *priv, void __iomem *ioaddr, From 42ef11b2bff5b6a2910c28d2ea47cc00e0fbcaec Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Mon, 25 Aug 2025 12:36:53 +0800 Subject: [PATCH 0863/1307] net: stmmac: xgmac: Correct supported speed modes Correct supported speed modes as per the XGMAC databook. Commit 9cb54af214a7 ("net: stmmac: Fix IP-cores specific MAC capabilities") removes support for 10M, 100M and 1000HD. 1000HD is not supported by XGMAC IP, but it does support 10M and 100M FD mode for XGMAC version >= 2_20, and it also supports 10M and 100M HD mode if the HDSEL bit is set in the MAC_HW_FEATURE0 reg. This commit enables support for 10M and 100M speed modes for XGMAC IP based on XGMAC version and MAC capabilities. Fixes: 9cb54af214a7 ("net: stmmac: Fix IP-cores specific MAC capabilities") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20250825-xgmac-minor-fixes-v3-2-c225fe4444c0@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 13 +++++++++++-- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 5 +++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 6cadf8de4fdf..00e929bf280b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -49,6 +49,14 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN); } +static void dwxgmac2_update_caps(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.mbps_10_100) + priv->hw->link.caps &= ~(MAC_10 | MAC_100); + else if (!priv->dma_cap.half_duplex) + priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD); +} + static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable) { u32 tx = readl(ioaddr + XGMAC_TX_CONFIG); @@ -1424,6 +1432,7 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, + .update_caps = dwxgmac2_update_caps, .set_mac = dwxgmac2_set_mac, .rx_ipc = dwxgmac2_rx_ipc, .rx_queue_enable = dwxgmac2_rx_queue_enable, @@ -1532,8 +1541,8 @@ int dwxgmac2_setup(struct stmmac_priv *priv) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | - MAC_1000FD | MAC_2500FD | MAC_5000FD | - MAC_10000FD; + MAC_10 | MAC_100 | MAC_1000FD | + MAC_2500FD | MAC_5000FD | MAC_10000FD; mac->link.duplex = 0; mac->link.speed10 = XGMAC_CONFIG_SS_10_MII; mac->link.speed100 = XGMAC_CONFIG_SS_100_MII; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 7201a3884265..4d6bb995d8d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -382,8 +382,11 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, struct dma_features *dma_cap) { + struct stmmac_priv *priv; u32 hw_cap; + priv = container_of(dma_cap, struct stmmac_priv, dma_cap); + /* MAC HW feature 0 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE0); dma_cap->edma = (hw_cap & XGMAC_HWFEAT_EDMA) >> 31; @@ -406,6 +409,8 @@ static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->vlhash = (hw_cap & XGMAC_HWFEAT_VLHASH) >> 4; dma_cap->half_duplex = (hw_cap & XGMAC_HWFEAT_HDSEL) >> 3; dma_cap->mbps_1000 = (hw_cap & XGMAC_HWFEAT_GMIISEL) >> 1; + if (dma_cap->mbps_1000 && priv->synopsys_id >= DWXGMAC_CORE_2_20) + dma_cap->mbps_10_100 = 1; /* MAC HW feature 1 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1); From b1eded580ab28119de0b0f21efe37ee2b4419144 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Mon, 25 Aug 2025 12:36:54 +0800 Subject: [PATCH 0864/1307] net: stmmac: Set CIC bit only for TX queues with COE Currently, in the AF_XDP transmit paths, the CIC bit of TX Desc3 is set for all packets. Setting this bit for packets transmitting through queues that don't support checksum offloading causes the TX DMA to get stuck after transmitting some packets. This patch ensures the CIC bit of TX Desc3 is set only if the TX queue supports checksum offloading. Fixes: 132c32ee5bc0 ("net: stmmac: Add TX via XDP zero-copy socket") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20250825-xgmac-minor-fixes-v3-3-c225fe4444c0@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f1abf4242cd2..7b16d1207b80 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2584,6 +2584,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue); struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; + bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported; struct xsk_buff_pool *pool = tx_q->xsk_pool; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc = NULL; @@ -2671,7 +2672,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) } stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len, - true, priv->mode, true, true, + csum, priv->mode, true, true, xdp_desc.len); stmmac_enable_dma_transmission(priv, priv->ioaddr, queue); @@ -4983,6 +4984,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, { struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; + bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc; dma_addr_t dma_addr; @@ -5034,7 +5036,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, stmmac_set_desc_addr(priv, tx_desc, dma_addr); stmmac_prepare_tx_desc(priv, tx_desc, 1, xdpf->len, - true, priv->mode, true, true, + csum, priv->mode, true, true, xdpf->len); tx_q->tx_count_frames++; From 9448ccd853368582efa9db05db344f8bb9dffe0f Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Mon, 25 Aug 2025 04:56:27 -0700 Subject: [PATCH 0865/1307] net: hv_netvsc: fix loss of early receive events from host during channel open. The hv_netvsc driver currently enables NAPI after opening the primary and subchannels. This ordering creates a race: if the Hyper-V host places data in the host -> guest ring buffer and signals the channel before napi_enable() has been called, the channel callback will run but napi_schedule_prep() will return false. As a result, the NAPI poller never gets scheduled, the data in the ring buffer is not consumed, and the receive queue may remain permanently stuck until another interrupt happens to arrive. Fix this by enabling NAPI and registering it with the RX/TX queues before vmbus channel is opened. This guarantees that any early host signal after open will correctly trigger NAPI scheduling and the ring buffer will be drained. Fixes: 76bb5db5c749d ("netvsc: fix use after free on module removal") Signed-off-by: Dipayaan Roy Link: https://patch.msgid.link/20250825115627.GA32189@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 17 ++++++++--------- drivers/net/hyperv/rndis_filter.c | 23 ++++++++++++++++------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 720104661d7f..60a4629fe6ba 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1812,6 +1812,11 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, /* Enable NAPI handler before init callbacks */ netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll); + napi_enable(&net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, + &net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, + &net_device->chan_table[0].napi); /* Open the channel */ device->channel->next_request_id_callback = vmbus_next_request_id; @@ -1831,12 +1836,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, /* Channel is opened */ netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); - napi_enable(&net_device->chan_table[0].napi); - netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, - &net_device->chan_table[0].napi); - netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, - &net_device->chan_table[0].napi); - /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device, net_device, device_info); if (ret != 0) { @@ -1854,14 +1853,14 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, close: RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); - netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL); - netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL); - napi_disable(&net_device->chan_table[0].napi); /* Now, we can close the channel safely */ vmbus_close(device->channel); cleanup: + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL); + napi_disable(&net_device->chan_table[0].napi); netif_napi_del(&net_device->chan_table[0].napi); cleanup2: diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 9e73959e61ee..c35f9685b6bf 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1252,17 +1252,26 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE; + /* Enable napi before opening the vmbus channel to avoid races + * as the host placing data on the host->guest ring may be left + * out if napi was not enabled. + */ + napi_enable(&nvchan->napi); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, + &nvchan->napi); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, + &nvchan->napi); + ret = vmbus_open(new_sc, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); - if (ret == 0) { - napi_enable(&nvchan->napi); - netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, - &nvchan->napi); - netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, - &nvchan->napi); - } else { + if (ret != 0) { netdev_notice(ndev, "sub channel open failed: %d\n", ret); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, + NULL); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, + NULL); + napi_disable(&nvchan->napi); } if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn) From 2c3ca8cc55a3afc7a4fa99ed8f5f5d05dd2e65b3 Mon Sep 17 00:00:00 2001 From: Cryolitia PukNgae Date: Wed, 27 Aug 2025 11:29:02 +0800 Subject: [PATCH 0866/1307] ALSA: usb-audio: move mixer_quirks' min_mute into common quirk We have found more and more devices that have the same problem, that the mixer's minimum value is muted. Accroding to pipewire's MR[1] and Arch Linux wiki[2], this should be a very common problem in USB audio devices. Move the quirk into common quirk,as a preparation of more devices' quirk's patch coming on the road[3]. 1. https://gitlab.freedesktop.org/pipewire/pipewire/-/merge_requests/2514 2. https://wiki.archlinux.org/index.php?title=PipeWire&oldid=804138#No_sound_from_USB_DAC_until_30%_volume 3. On the road, in the physical sense. We have been buying ton of these devices for testing the problem. Tested-by: Guoli An Signed-off-by: Cryolitia PukNgae Link: https://patch.msgid.link/20250827-sound-quirk-min-mute-v1-1-4717aa8a4f6a@uniontech.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 10 +++------- sound/usb/quirks.c | 12 ++++++++++-- sound/usb/usbaudio.h | 4 ++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 6b47b3145d2c..3df537fdb9f1 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -4608,16 +4608,12 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, if (unitid == 7 && cval->control == UAC_FU_VOLUME) snd_dragonfly_quirk_db_scale(mixer, cval, kctl); break; + } + /* lowest playback value is muted on some devices */ - case USB_ID(0x0572, 0x1b09): /* Conexant Systems (Rockwell), Inc. */ - case USB_ID(0x0d8c, 0x000c): /* C-Media */ - case USB_ID(0x0d8c, 0x0014): /* C-Media */ - case USB_ID(0x19f7, 0x0003): /* RODE NT-USB */ - case USB_ID(0x2d99, 0x0026): /* HECATE G2 GAMING HEADSET */ + if (mixer->chip->quirk_flags & QUIRK_FLAG_MIXER_MIN_MUTE) if (strstr(kctl->id.name, "Playback")) cval->min_mute = 1; - break; - } /* ALSA-ify some Plantronics headset control names */ if (USB_ID_VENDOR(mixer->chip->usb_id) == 0x047f && diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e75b0b1df6eb..8bc1e247cdf1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2199,6 +2199,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x0572, 0x1b09, /* Conexant Systems (Rockwell), Inc. */ + QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x05a3, 0x9420, /* ELP HD USB Camera */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x05a7, 0x1020, /* Bose Companion 5 */ @@ -2245,8 +2247,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x0c45, 0x636b, /* Microdia JP001 USB Camera */ QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x0d8c, 0x0014, /* USB Audio Device */ - QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0d8c, 0x000c, /* C-Media */ + QUIRK_FLAG_MIXER_MIN_MUTE), + DEVICE_FLG(0x0d8c, 0x0014, /* C-Media */ + QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ @@ -2293,6 +2297,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x1901, 0x0191, /* GE B850V3 CP2114 audio interface */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0003, /* RODE NT-USB */ + QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1bcf, 0x2281, /* HD Webcam */ @@ -2353,6 +2359,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x2d99, 0x0026, /* HECATE G2 GAMING HEADSET */ + QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x2fc6, 0xf0b7, /* iBasso DC07 Pro */ QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 158ec053dc44..1ef4d39978df 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -196,6 +196,9 @@ extern bool snd_usb_skip_validation; * for the given endpoint. * QUIRK_FLAG_MIC_RES_16 and QUIRK_FLAG_MIC_RES_384 * Set the fixed resolution for Mic Capture Volume (mostly for webcams) + * QUIRK_FLAG_MIXER_MIN_MUTE + * Set minimum volume control value as mute for devices where the lowest + * playback value represents muted state instead of minimum audible volume */ #define QUIRK_FLAG_GET_SAMPLE_RATE (1U << 0) @@ -222,5 +225,6 @@ extern bool snd_usb_skip_validation; #define QUIRK_FLAG_FIXED_RATE (1U << 21) #define QUIRK_FLAG_MIC_RES_16 (1U << 22) #define QUIRK_FLAG_MIC_RES_384 (1U << 23) +#define QUIRK_FLAG_MIXER_MIN_MUTE (1U << 24) #endif /* __USBAUDIO_H */ From fcf8239ad6a5de54fa7ce18e464c6b5951b982cb Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 20 Aug 2025 11:58:57 +0200 Subject: [PATCH 0867/1307] x86/microcode/AMD: Handle the case of no BIOS microcode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Machines can be shipped without any microcode in the BIOS. Which means, the microcode patch revision is 0. Handle that gracefully. Fixes: 94838d230a6c ("x86/microcode/AMD: Use the family,model,stepping encoded in the patch ID") Reported-by: Vítek Vávra Signed-off-by: Borislav Petkov (AMD) Cc: --- arch/x86/kernel/cpu/microcode/amd.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 097e39327942..514f63340880 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -171,8 +171,28 @@ static int cmp_id(const void *key, const void *elem) return 1; } +static u32 cpuid_to_ucode_rev(unsigned int val) +{ + union zen_patch_rev p = {}; + union cpuid_1_eax c; + + c.full = val; + + p.stepping = c.stepping; + p.model = c.model; + p.ext_model = c.ext_model; + p.ext_fam = c.ext_fam; + + return p.ucode_rev; +} + static bool need_sha_check(u32 cur_rev) { + if (!cur_rev) { + cur_rev = cpuid_to_ucode_rev(bsp_cpuid_1_eax); + pr_info_once("No current revision, generating the lowest one: 0x%x\n", cur_rev); + } + switch (cur_rev >> 8) { case 0x80012: return cur_rev <= 0x800126f; break; case 0x80082: return cur_rev <= 0x800820f; break; @@ -749,8 +769,6 @@ static struct ucode_patch *cache_find_patch(struct ucode_cpu_info *uci, u16 equi n.equiv_cpu = equiv_cpu; n.patch_id = uci->cpu_sig.rev; - WARN_ON_ONCE(!n.patch_id); - list_for_each_entry(p, µcode_cache, plist) if (patch_cpus_equivalent(p, &n, false)) return p; From 87b07a1fbc6b5c23d3b3584ab4288bc9106d3274 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 15 Jul 2025 15:33:25 -0700 Subject: [PATCH 0868/1307] wifi: mt76: mt7996: Initialize hdr before passing to skb_put_data() A new warning in clang [1] points out a couple of places where a hdr variable is not initialized then passed along to skb_put_data(). drivers/net/wireless/mediatek/mt76/mt7996/mcu.c:1894:21: warning: variable 'hdr' is uninitialized when passed as a const pointer argument here [-Wuninitialized-const-pointer] 1894 | skb_put_data(skb, &hdr, sizeof(hdr)); | ^~~ drivers/net/wireless/mediatek/mt76/mt7996/mcu.c:3386:21: warning: variable 'hdr' is uninitialized when passed as a const pointer argument here [-Wuninitialized-const-pointer] 3386 | skb_put_data(skb, &hdr, sizeof(hdr)); | ^~~ Zero initialize these headers as done in other places in the driver when there is nothing stored in the header. Cc: stable@vger.kernel.org Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Link: https://github.com/llvm/llvm-project/commit/00dacf8c22f065cb52efb14cd091d441f19b319e [1] Closes: https://github.com/ClangBuiltLinux/linux/issues/2104 Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20250715-mt7996-fix-uninit-const-pointer-v1-1-b5d8d11d7b78@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 3593fd40c51b..fe1b34386de2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -1879,8 +1879,8 @@ mt7996_mcu_get_mmps_mode(enum ieee80211_smps_mode smps) int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, void *data, u16 version) { + struct uni_header hdr = {}; struct ra_fixed_rate *req; - struct uni_header hdr; struct sk_buff *skb; struct tlv *tlv; int len; @@ -3372,7 +3372,7 @@ int mt7996_mcu_set_hdr_trans(struct mt7996_dev *dev, bool hdr_trans) { struct { u8 __rsv[4]; - } __packed hdr; + } __packed hdr = {}; struct hdr_trans_blacklist *req_blacklist; struct hdr_trans_en *req_en; struct sk_buff *skb; From 87f38519d27a514c9909f84b8f1334125df9778e Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Wed, 16 Jul 2025 18:54:01 +0200 Subject: [PATCH 0869/1307] wifi: mt76: mt7921: don't disconnect when CSA to DFS chan When station mode, don't disconnect when we get channel switch from AP to DFS channel. Most APs send CSA request after pass background CAC. In other case we should disconnect after detect beacon miss. Without patch when we get CSA to DFS channel get: "kernel: wlo1: preparing for channel switch failed, disconnecting" Fixes: 8aa2f59260eb ("wifi: mt76: mt7921: introduce CSA support") Signed-off-by: Janusz Dziedzic Link: https://patch.msgid.link/20250716165443.28354-1-janusz.dziedzic@gmail.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 40954e64c7fc..5881040ac195 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -1459,11 +1459,8 @@ static int mt7921_pre_channel_switch(struct ieee80211_hw *hw, if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) return -EOPNOTSUPP; - /* Avoid beacon loss due to the CAC(Channel Availability Check) time - * of the AP. - */ if (!cfg80211_chandef_usable(hw->wiphy, &chsw->chandef, - IEEE80211_CHAN_RADAR)) + IEEE80211_CHAN_DISABLED)) return -EOPNOTSUPP; return 0; From 9f15701370ec15fbf1f6a1cbbf584b0018d036b5 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Sun, 27 Jul 2025 07:04:13 -0700 Subject: [PATCH 0870/1307] wifi: mt76: mt7925: fix locking in mt7925_change_vif_links() &dev->mt76.mutex lock is taken using mt792x_mutex_acquire(dev) but not released in one of the error paths, add the unlock to fix it. Fixes: 5cd0bd815c8a ("wifi: mt76: mt7925: fix NULL deref check in mt7925_change_vif_links") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202503031055.3ZRqxhAl-lkp@intel.com/ Signed-off-by: Harshit Mogalapalli Link: https://patch.msgid.link/20250727140416.1153406-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index a8d25b7d47d0..103909307518 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -2069,8 +2069,10 @@ mt7925_change_vif_links(struct ieee80211_hw *hw, struct ieee80211_vif *vif, GFP_KERNEL); mlink = devm_kzalloc(dev->mt76.dev, sizeof(*mlink), GFP_KERNEL); - if (!mconf || !mlink) + if (!mconf || !mlink) { + mt792x_mutex_release(dev); return -ENOMEM; + } } mconfs[link_id] = mconf; From 55424e7b9eeb141d9c8d8a8740ee131c28490425 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 28 Jul 2025 13:26:12 +0800 Subject: [PATCH 0871/1307] wifi: mt76: mt7925: fix the wrong bss cleanup for SAP When in SAP mode, if a STA disconnect, the SAP's BSS should not be cleared. Fixes: 0ebb60da8416 ("wifi: mt76: mt7925: adjust rm BSS flow to prevent next connection failure") Cc: stable@vger.kernel.org Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250728052612.39751-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 103909307518..b0e053b15227 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1191,6 +1191,9 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, struct mt792x_bss_conf *mconf; struct mt792x_link_sta *mlink; + if (vif->type == NL80211_IFTYPE_AP) + break; + link_sta = mt792x_sta_to_link_sta(vif, sta, link_id); if (!link_sta) continue; From c22769de25095c6777e8acb68a1349a3257fc955 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 18 Aug 2025 10:02:03 +0800 Subject: [PATCH 0872/1307] wifi: mt76: mt7925u: use connac3 tx aggr check in tx complete MT7925 is a connac3 device; using the connac2 helper mis-parses TXWI and breaks AMPDU/BA accounting. Use the connac3-specific helper mt7925_tx_check_aggr() instead, Cc: stable@vger.kernel.org Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Reported-by: Nick Morrow Tested-by: Nick Morrow Tested-on: Netgear A9000 USB WiFi adapter Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250818020203.992338-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index 75823c9fd3a1..b581ab9427f2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -1449,7 +1449,7 @@ void mt7925_usb_sdio_tx_complete_skb(struct mt76_dev *mdev, sta = wcid_to_sta(wcid); if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE))) - mt76_connac2_tx_check_aggr(sta, txwi); + mt7925_tx_check_aggr(sta, e->skb, wcid); skb_pull(e->skb, headroom); mt76_tx_complete_skb(mdev, e->wcid, e->skb); From dd6e89cad9951acef3723f3f21b2e892a23b371b Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 18 Aug 2025 11:02:01 +0800 Subject: [PATCH 0873/1307] wifi: mt76: mt7925: skip EHT MLD TLV on non-MLD and pass conn_state for sta_cmd Return early in mt7925_mcu_sta_eht_mld_tlv() for non-MLD vifs to avoid bogus MLD TLVs, and pass the proper connection state to sta_basic TLV. Cc: stable@vger.kernel.org Fixes: cb1353ef3473 ("wifi: mt76: mt7925: integrate *mlo_sta_cmd and *sta_cmd") Reported-by: Tal Inbar Tested-by: Tal Inbar Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250818030201.997940-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 300c863f0e3e..cd457be26523 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1834,13 +1834,13 @@ mt7925_mcu_sta_eht_mld_tlv(struct sk_buff *skb, struct tlv *tlv; u16 eml_cap; + if (!ieee80211_vif_is_mld(vif)) + return; + tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_EHT_MLD, sizeof(*eht_mld)); eht_mld = (struct sta_rec_eht_mld *)tlv; eht_mld->mld_type = 0xff; - if (!ieee80211_vif_is_mld(vif)) - return; - ext_capa = cfg80211_get_iftype_ext_capa(wiphy, ieee80211_vif_type_p2p(vif)); if (!ext_capa) @@ -1912,6 +1912,7 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, struct mt76_dev *dev = phy->dev; struct mt792x_bss_conf *mconf; struct sk_buff *skb; + int conn_state; mconf = mt792x_vif_to_link(mvif, info->wcid->link_id); @@ -1920,10 +1921,13 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, if (IS_ERR(skb)) return PTR_ERR(skb); + conn_state = info->enable ? CONN_STATE_PORT_SECURE : + CONN_STATE_DISCONNECT; + if (info->enable && info->link_sta) { mt76_connac_mcu_sta_basic_tlv(dev, skb, info->link_conf, info->link_sta, - info->enable, info->newly); + conn_state, info->newly); mt7925_mcu_sta_phy_tlv(skb, info->vif, info->link_sta); mt7925_mcu_sta_ht_tlv(skb, info->link_sta); mt7925_mcu_sta_vht_tlv(skb, info->link_sta); From 4c2334587b0a13b8f4eda1336ae657297fcd743b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Aug 2025 14:11:06 +0200 Subject: [PATCH 0874/1307] wifi: mt76: prevent non-offchannel mgmt tx during scan/roc Only put probe request packets in the offchannel queue if IEEE80211_TX_CTRL_DONT_USE_RATE_MASK is set and IEEE80211_TX_CTL_TX_OFFCHAN is unset. Fixes: 0b3be9d1d34e ("wifi: mt76: add separate tx scheduling queue for off-channel tx") Reported-by: Chad Monroe Link: https://patch.msgid.link/20250813121106.81559-2-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index e6cf16706667..03b042fdf997 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -332,6 +332,7 @@ mt76_tx(struct mt76_phy *phy, struct ieee80211_sta *sta, struct mt76_wcid *wcid, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; struct sk_buff_head *head; if (mt76_testmode_enabled(phy)) { @@ -349,7 +350,8 @@ mt76_tx(struct mt76_phy *phy, struct ieee80211_sta *sta, info->hw_queue |= FIELD_PREP(MT_TX_HW_QUEUE_PHY, phy->band_idx); if ((info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) || - (info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) + ((info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK) && + ieee80211_is_probe_req(hdr->frame_control))) head = &wcid->tx_offchannel; else head = &wcid->tx_pending; From f30906c55a400a9b7fc677e3f4c614b9069bd4a8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 13 Aug 2025 14:11:05 +0200 Subject: [PATCH 0875/1307] wifi: mt76: mt7996: disable beacons when going offchannel Avoid leaking beacons on unrelated channels during scanning/roc Fixes: c56d6edebc1f ("wifi: mt76: mt7996: use emulated hardware scan support") Reported-by: Chad Monroe Link: https://patch.msgid.link/20250813121106.81559-1-nbd@nbd.name Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/mac.c | 46 +++++++++++-------- .../net/wireless/mediatek/mt76/mt7996/main.c | 5 ++ .../net/wireless/mediatek/mt76/mt7996/mcu.c | 11 +++-- .../wireless/mediatek/mt76/mt7996/mt7996.h | 1 + 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 226534490792..a9f7e5626dcd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1694,17 +1694,37 @@ mt7996_wait_reset_state(struct mt7996_dev *dev, u32 state) static void mt7996_update_vif_beacon(void *priv, u8 *mac, struct ieee80211_vif *vif) { - struct ieee80211_hw *hw = priv; + struct ieee80211_bss_conf *link_conf; + struct mt7996_phy *phy = priv; + struct mt7996_dev *dev = phy->dev; + unsigned int link_id; + switch (vif->type) { case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: - mt7996_mcu_add_beacon(hw, vif, &vif->bss_conf); break; default: - break; + return; } + + for_each_vif_active_link(vif, link_conf, link_id) { + struct mt7996_vif_link *link; + + link = mt7996_vif_link(dev, vif, link_id); + if (!link || link->phy != phy) + continue; + + mt7996_mcu_add_beacon(dev->mt76.hw, vif, link_conf); + } +} + +void mt7996_mac_update_beacons(struct mt7996_phy *phy) +{ + ieee80211_iterate_active_interfaces(phy->mt76->hw, + IEEE80211_IFACE_ITER_RESUME_ALL, + mt7996_update_vif_beacon, phy); } static void @@ -1712,25 +1732,15 @@ mt7996_update_beacons(struct mt7996_dev *dev) { struct mt76_phy *phy2, *phy3; - ieee80211_iterate_active_interfaces(dev->mt76.hw, - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7996_update_vif_beacon, dev->mt76.hw); + mt7996_mac_update_beacons(&dev->phy); phy2 = dev->mt76.phys[MT_BAND1]; - if (!phy2) - return; - - ieee80211_iterate_active_interfaces(phy2->hw, - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7996_update_vif_beacon, phy2->hw); + if (phy2) + mt7996_mac_update_beacons(phy2->priv); phy3 = dev->mt76.phys[MT_BAND2]; - if (!phy3) - return; - - ieee80211_iterate_active_interfaces(phy3->hw, - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7996_update_vif_beacon, phy3->hw); + if (phy3) + mt7996_mac_update_beacons(phy3->priv); } void mt7996_tx_token_put(struct mt7996_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 92b57bcce749..84f731b387d2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -516,6 +516,9 @@ int mt7996_set_channel(struct mt76_phy *mphy) struct mt7996_phy *phy = mphy->priv; int ret; + if (mphy->offchannel) + mt7996_mac_update_beacons(phy); + ret = mt7996_mcu_set_chan_info(phy, UNI_CHANNEL_SWITCH); if (ret) goto out; @@ -533,6 +536,8 @@ int mt7996_set_channel(struct mt76_phy *mphy) mt7996_mac_reset_counters(phy); phy->noise = 0; + if (!mphy->offchannel) + mt7996_mac_update_beacons(phy); out: ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index fe1b34386de2..0be03eb3cf46 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -2755,13 +2755,15 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { struct mt7996_dev *dev = mt7996_hw_dev(hw); - struct mt76_vif_link *mlink = mt76_vif_conf_link(&dev->mt76, vif, link_conf); + struct mt7996_vif_link *link = mt7996_vif_conf_link(dev, vif, link_conf); + struct mt76_vif_link *mlink = link ? &link->mt76 : NULL; struct ieee80211_mutable_offsets offs; struct ieee80211_tx_info *info; struct sk_buff *skb, *rskb; struct tlv *tlv; struct bss_bcn_content_tlv *bcn; int len, extra_len = 0; + bool enabled = link_conf->enable_beacon; if (link_conf->nontransmitted) return 0; @@ -2769,13 +2771,16 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!mlink) return -EINVAL; + if (link->phy && link->phy->mt76->offchannel) + enabled = false; + rskb = __mt7996_mcu_alloc_bss_req(&dev->mt76, mlink, MT7996_MAX_BSS_OFFLOAD_SIZE); if (IS_ERR(rskb)) return PTR_ERR(rskb); skb = ieee80211_beacon_get_template(hw, vif, &offs, link_conf->link_id); - if (link_conf->enable_beacon && !skb) { + if (enabled && !skb) { dev_kfree_skb(rskb); return -EINVAL; } @@ -2794,7 +2799,7 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, len = ALIGN(sizeof(*bcn) + MT_TXD_SIZE + extra_len, 4); tlv = mt7996_mcu_add_uni_tlv(rskb, UNI_BSS_INFO_BCN_CONTENT, len); bcn = (struct bss_bcn_content_tlv *)tlv; - bcn->enable = link_conf->enable_beacon; + bcn->enable = enabled; if (!bcn->enable) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 33ac16b64ef1..8509d508e1e1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -732,6 +732,7 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi, struct sk_buff *skb, struct mt76_wcid *wcid, struct ieee80211_key_conf *key, int pid, enum mt76_txq_id qid, u32 changed); +void mt7996_mac_update_beacons(struct mt7996_phy *phy); void mt7996_mac_set_coverage_class(struct mt7996_phy *phy); void mt7996_mac_work(struct work_struct *work); void mt7996_mac_reset_work(struct work_struct *work); From 4be3b46ec5190dc79cd38e3750480b2c66a791ad Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Fri, 8 Aug 2025 13:29:48 +0000 Subject: [PATCH 0876/1307] wifi: mt76: mt7996: use the correct vif link for scanning/roc restore fix which was dropped during MLO rework Fixes: f0b0b239b8f3 ("wifi: mt76: mt7996: rework mt7996_mac_write_txwi() for MLO support") Signed-off-by: Chad Monroe Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/180fffd409aa57f535a3d2c1951e41ae398ce09e.1754659732.git.chad@monroe.io Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index a9f7e5626dcd..d6531b74be1f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -903,8 +903,12 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi, IEEE80211_TX_CTRL_MLO_LINK); mvif = vif ? (struct mt7996_vif *)vif->drv_priv : NULL; - if (mvif) - mlink = rcu_dereference(mvif->mt76.link[link_id]); + if (mvif) { + if (wcid->offchannel) + mlink = rcu_dereference(mvif->mt76.offchannel_link); + if (!mlink) + mlink = rcu_dereference(mvif->mt76.link[link_id]); + } if (mlink) { omac_idx = mlink->omac_idx; From 0300545b8a113e96ee260a7c142be846d391a620 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:47 +0200 Subject: [PATCH 0877/1307] wifi: mt76: mt7996: fix crash on some tx status reports When a wcid can't be found, link_sta can be stale from a previous batch. The code currently assumes that if link_sta is set, wcid is also non-zero. Fix wcid NULL pointer dereference by resetting link_sta when a wcid entry can't be found. Fixes: 62da647a2b20 ("wifi: mt76: mt7996: Add MLO support to mt7996_tx_check_aggr()") Link: https://patch.msgid.link/20250827085352.51636-1-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index d6531b74be1f..837deb41ae13 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1247,8 +1247,10 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len) idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); - if (!sta) + if (!sta) { + link_sta = NULL; goto next; + } link_sta = rcu_dereference(sta->link[wcid->link_id]); if (!link_sta) From a3c99ef88a084e1c2b99dd56bbfa7f89c9be3e92 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:48 +0200 Subject: [PATCH 0878/1307] wifi: mt76: do not add non-sta wcid entries to the poll list Polling and airtime reporting is valid for station entries only Link: https://patch.msgid.link/20250827085352.51636-2-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 3afe4c4cd7bb..6b2641a9ae9a 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1690,7 +1690,7 @@ EXPORT_SYMBOL_GPL(mt76_wcid_cleanup); void mt76_wcid_add_poll(struct mt76_dev *dev, struct mt76_wcid *wcid) { - if (test_bit(MT76_MCU_RESET, &dev->phy.state)) + if (test_bit(MT76_MCU_RESET, &dev->phy.state) || !wcid->sta) return; spin_lock_bh(&dev->sta_poll_lock); From 4a522b01e368eec58d182ecc47d24f49a39e440d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:49 +0200 Subject: [PATCH 0879/1307] wifi: mt76: mt7996: add missing check for rx wcid entries Non-station wcid entries must not be passed to the rx functions. In case of the global wcid entry, it could even lead to corruption in the wcid array due to pointer being casted to struct mt7996_sta_link using container_of. Fixes: 7464b12b7d92 ("wifi: mt76: mt7996: rework mt7996_rx_get_wcid to support MLO") Link: https://patch.msgid.link/20250827085352.51636-3-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 837deb41ae13..b3fcca9bbb95 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -62,7 +62,7 @@ static struct mt76_wcid *mt7996_rx_get_wcid(struct mt7996_dev *dev, int i; wcid = mt76_wcid_ptr(dev, idx); - if (!wcid) + if (!wcid || !wcid->sta) return NULL; if (!mt7996_band_valid(dev, band_idx)) From 065c79df595af21d6d1b27d642860faa1d938774 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:50 +0200 Subject: [PATCH 0880/1307] wifi: mt76: mt7915: fix list corruption after hardware restart Since stations are recreated from scratch, all lists that wcids are added to must be cleared before calling ieee80211_restart_hw. Set wcid->sta = 0 for each wcid entry in order to ensure that they are not added again before they are ready. Fixes: 8a55712d124f ("wifi: mt76: mt7915: enable full system reset support") Link: https://patch.msgid.link/20250827085352.51636-4-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 37 +++++++++++++++++++ drivers/net/wireless/mediatek/mt76/mt76.h | 1 + .../net/wireless/mediatek/mt76/mt7915/mac.c | 12 +++--- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 6b2641a9ae9a..0e0d7b3bfe42 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -818,6 +818,43 @@ void mt76_free_device(struct mt76_dev *dev) } EXPORT_SYMBOL_GPL(mt76_free_device); +static void mt76_reset_phy(struct mt76_phy *phy) +{ + if (!phy) + return; + + INIT_LIST_HEAD(&phy->tx_list); +} + +void mt76_reset_device(struct mt76_dev *dev) +{ + int i; + + rcu_read_lock(); + for (i = 0; i < ARRAY_SIZE(dev->wcid); i++) { + struct mt76_wcid *wcid; + + wcid = rcu_dereference(dev->wcid[i]); + if (!wcid) + continue; + + wcid->sta = 0; + mt76_wcid_cleanup(dev, wcid); + rcu_assign_pointer(dev->wcid[i], NULL); + } + rcu_read_unlock(); + + INIT_LIST_HEAD(&dev->wcid_list); + INIT_LIST_HEAD(&dev->sta_poll_list); + dev->vif_mask = 0; + memset(dev->wcid_mask, 0, sizeof(dev->wcid_mask)); + + mt76_reset_phy(&dev->phy); + for (i = 0; i < ARRAY_SIZE(dev->phys); i++) + mt76_reset_phy(dev->phys[i]); +} +EXPORT_SYMBOL_GPL(mt76_reset_device); + struct mt76_phy *mt76_vif_phy(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 8dd5c29fb75b..127637454c82 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1243,6 +1243,7 @@ int mt76_register_device(struct mt76_dev *dev, bool vht, struct ieee80211_rate *rates, int n_rates); void mt76_unregister_device(struct mt76_dev *dev); void mt76_free_device(struct mt76_dev *dev); +void mt76_reset_device(struct mt76_dev *dev); void mt76_unregister_phy(struct mt76_phy *phy); struct mt76_phy *mt76_alloc_radio_phy(struct mt76_dev *dev, unsigned int size, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 6639976afcee..1c0d310146d6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -1460,17 +1460,15 @@ mt7915_mac_full_reset(struct mt7915_dev *dev) if (i == 10) dev_err(dev->mt76.dev, "chip full reset failed\n"); - spin_lock_bh(&dev->mt76.sta_poll_lock); - while (!list_empty(&dev->mt76.sta_poll_list)) - list_del_init(dev->mt76.sta_poll_list.next); - spin_unlock_bh(&dev->mt76.sta_poll_lock); - - memset(dev->mt76.wcid_mask, 0, sizeof(dev->mt76.wcid_mask)); - dev->mt76.vif_mask = 0; dev->phy.omac_mask = 0; if (phy2) phy2->omac_mask = 0; + mt76_reset_device(&dev->mt76); + + INIT_LIST_HEAD(&dev->sta_rc_list); + INIT_LIST_HEAD(&dev->twt_list); + i = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7915_WTBL_STA); dev->mt76.global_wcid.idx = i; dev->recovery.hw_full_reset = false; From bdeac7815629c1a32b8784922368742e183747ea Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:51 +0200 Subject: [PATCH 0881/1307] wifi: mt76: free pending offchannel tx frames on wcid cleanup Avoid leaking them or keeping the wcid on the tx list Fixes: 0b3be9d1d34e ("wifi: mt76: add separate tx scheduling queue for off-channel tx") Link: https://patch.msgid.link/20250827085352.51636-5-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 0e0d7b3bfe42..59adf3312617 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1716,6 +1716,10 @@ void mt76_wcid_cleanup(struct mt76_dev *dev, struct mt76_wcid *wcid) skb_queue_splice_tail_init(&wcid->tx_pending, &list); spin_unlock(&wcid->tx_pending.lock); + spin_lock(&wcid->tx_offchannel.lock); + skb_queue_splice_tail_init(&wcid->tx_offchannel, &list); + spin_unlock(&wcid->tx_offchannel.lock); + spin_unlock_bh(&phy->tx_lock); while ((skb = __skb_dequeue(&list)) != NULL) { From 49fba87205bec14a0f6bd997635bf3968408161e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Aug 2025 10:53:52 +0200 Subject: [PATCH 0882/1307] wifi: mt76: fix linked list corruption Never leave scheduled wcid entries on the temporary on-stack list Fixes: 0b3be9d1d34e ("wifi: mt76: add separate tx scheduling queue for off-channel tx") Link: https://patch.msgid.link/20250827085352.51636-6-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 03b042fdf997..8ab5840fee57 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -646,6 +646,7 @@ mt76_txq_schedule_pending_wcid(struct mt76_phy *phy, struct mt76_wcid *wcid, static void mt76_txq_schedule_pending(struct mt76_phy *phy) { LIST_HEAD(tx_list); + int ret = 0; if (list_empty(&phy->tx_list)) return; @@ -657,13 +658,13 @@ static void mt76_txq_schedule_pending(struct mt76_phy *phy) list_splice_init(&phy->tx_list, &tx_list); while (!list_empty(&tx_list)) { struct mt76_wcid *wcid; - int ret; wcid = list_first_entry(&tx_list, struct mt76_wcid, tx_list); list_del_init(&wcid->tx_list); spin_unlock(&phy->tx_lock); - ret = mt76_txq_schedule_pending_wcid(phy, wcid, &wcid->tx_offchannel); + if (ret >= 0) + ret = mt76_txq_schedule_pending_wcid(phy, wcid, &wcid->tx_offchannel); if (ret >= 0 && !phy->offchannel) ret = mt76_txq_schedule_pending_wcid(phy, wcid, &wcid->tx_pending); spin_lock(&phy->tx_lock); @@ -672,9 +673,6 @@ static void mt76_txq_schedule_pending(struct mt76_phy *phy) !skb_queue_empty(&wcid->tx_offchannel) && list_empty(&wcid->tx_list)) list_add_tail(&wcid->tx_list, &phy->tx_list); - - if (ret < 0) - break; } spin_unlock(&phy->tx_lock); From c2415c407a2cde01290d52ce2a1f81b0616379a3 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Mon, 25 Aug 2025 07:57:29 +0000 Subject: [PATCH 0883/1307] x86/cpu/topology: Use initial APIC ID from XTOPOLOGY leaf on AMD/HYGON Prior to the topology parsing rewrite and the switchover to the new parsing logic for AMD processors in c749ce393b8f ("x86/cpu: Use common topology code for AMD"), the initial_apicid on these platforms was: - First initialized to the LocalApicId from CPUID leaf 0x1 EBX[31:24]. - Then overwritten by the ExtendedLocalApicId in CPUID leaf 0xb EDX[31:0] on processors that supported topoext. With the new parsing flow introduced in f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser"), parse_8000_001e() now unconditionally overwrites the initial_apicid already parsed during cpu_parse_topology_ext(). Although this has not been a problem on baremetal platforms, on virtualized AMD guests that feature more than 255 cores, QEMU zeros out the CPUID leaf 0x8000001e on CPUs with CoreID > 255 to prevent collision of these IDs in EBX[7:0] which can only represent a maximum of 255 cores [1]. This results in the following FW_BUG being logged when booting a guest with more than 255 cores: [Firmware Bug]: CPU 512: APIC ID mismatch. CPUID: 0x0000 APIC: 0x0200 AMD64 Architecture Programmer's Manual Volume 2: System Programming Pub. 24593 Rev. 3.42 [2] Section 16.12 "x2APIC_ID" mentions the Extended Enumeration leaf 0xb (Fn0000_000B_EDX[31:0])(which was later superseded by the extended leaf 0x80000026) provides the full x2APIC ID under all circumstances unlike the one reported by CPUID leaf 0x8000001e EAX which depends on the mode in which APIC is configured. Rely on the APIC ID parsed during cpu_parse_topology_ext() from CPUID leaf 0x80000026 or 0xb and only use the APIC ID from leaf 0x8000001e if cpu_parse_topology_ext() failed (has_topoext is false). On platforms that support the 0xb leaf (Zen2 or later, AMD guests on QEMU) or the extended leaf 0x80000026 (Zen4 or later), the initial_apicid is now set to the value parsed from EDX[31:0]. On older AMD/Hygon platforms that do not support the 0xb leaf but support the TOPOEXT extension (families 0x15, 0x16, 0x17[Zen1], and Hygon), retain current behavior where the initial_apicid is set using the 0x8000001e leaf. Issue debugged by Naveen N Rao (AMD) and Sairaj Kodilkar . [ bp: Massage commit message. ] Fixes: c749ce393b8f ("x86/cpu: Use common topology code for AMD") Suggested-by: Thomas Gleixner Signed-off-by: K Prateek Nayak Signed-off-by: Borislav Petkov (AMD) Tested-by: Naveen N Rao (AMD) Cc: stable@vger.kernel.org Link: https://github.com/qemu/qemu/commit/35ac5dfbcaa4b [1] Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 [2] Link: https://lore.kernel.org/20250825075732.10694-2-kprateek.nayak@amd.com --- arch/x86/kernel/cpu/topology_amd.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 843b1655ab45..827dd0dbb6e9 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -81,20 +81,25 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext) cpuid_leaf(0x8000001e, &leaf); - tscan->c->topo.initial_apicid = leaf.ext_apic_id; - /* - * If leaf 0xb is available, then the domain shifts are set - * already and nothing to do here. Only valid for family >= 0x17. + * If leaf 0xb/0x26 is available, then the APIC ID and the domain + * shifts are set already. */ - if (!has_topoext && tscan->c->x86 >= 0x17) { - /* - * Leaf 0x80000008 set the CORE domain shift already. - * Update the SMT domain, but do not propagate it. - */ - unsigned int nthreads = leaf.core_nthreads + 1; + if (!has_topoext) { + tscan->c->topo.initial_apicid = leaf.ext_apic_id; - topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); + /* + * Leaf 0x8000008 sets the CORE domain shift but not the + * SMT domain shift. On CPUs with family >= 0x17, there + * might be hyperthreads. + */ + if (tscan->c->x86 >= 0x17) { + /* Update the SMT domain, but do not propagate it. */ + unsigned int nthreads = leaf.core_nthreads + 1; + + topology_update_dom(tscan, TOPO_SMT_DOMAIN, + get_count_order(nthreads), nthreads); + } } store_node(tscan, leaf.nnodes_per_socket + 1, leaf.node_id); From 479a54ab92087318514c82428a87af2d7af1a576 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Fri, 22 Aug 2025 11:52:19 +0800 Subject: [PATCH 0884/1307] netfilter: br_netfilter: do not check confirmed bit in br_nf_local_in() after confirm When send a broadcast packet to a tap device, which was added to a bridge, br_nf_local_in() is called to confirm the conntrack. If another conntrack with the same hash value is added to the hash table, which can be triggered by a normal packet to a non-bridge device, the below warning may happen. ------------[ cut here ]------------ WARNING: CPU: 1 PID: 96 at net/bridge/br_netfilter_hooks.c:632 br_nf_local_in+0x168/0x200 CPU: 1 UID: 0 PID: 96 Comm: tap_send Not tainted 6.17.0-rc2-dirty #44 PREEMPT(voluntary) RIP: 0010:br_nf_local_in+0x168/0x200 Call Trace: nf_hook_slow+0x3e/0xf0 br_pass_frame_up+0x103/0x180 br_handle_frame_finish+0x2de/0x5b0 br_nf_hook_thresh+0xc0/0x120 br_nf_pre_routing_finish+0x168/0x3a0 br_nf_pre_routing+0x237/0x5e0 br_handle_frame+0x1ec/0x3c0 __netif_receive_skb_core+0x225/0x1210 __netif_receive_skb_one_core+0x37/0xa0 netif_receive_skb+0x36/0x160 tun_get_user+0xa54/0x10c0 tun_chr_write_iter+0x65/0xb0 vfs_write+0x305/0x410 ksys_write+0x60/0xd0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f ---[ end trace 0000000000000000 ]--- To solve the hash conflict, nf_ct_resolve_clash() try to merge the conntracks, and update skb->_nfct. However, br_nf_local_in() still use the old ct from local variable 'nfct' after confirm(), which leads to this warning. If confirm() does not insert the conntrack entry and return NF_DROP, the warning may also occur. There is no need to reserve the WARN_ON_ONCE, just remove it. Link: https://lore.kernel.org/netdev/20250820043329.2902014-1-wangliang74@huawei.com/ Fixes: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") Suggested-by: Florian Westphal Signed-off-by: Wang Liang Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_hooks.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 94cbe967d1c1..083e2fe96441 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -626,9 +626,6 @@ static unsigned int br_nf_local_in(void *priv, break; } - ct = container_of(nfct, struct nf_conn, ct_general); - WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); - return ret; } #endif From 54416fd76770bd04fc3c501810e8d673550bab26 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 18 Aug 2025 13:22:20 +0200 Subject: [PATCH 0885/1307] netfilter: conntrack: helper: Replace -EEXIST by -EBUSY The helper registration return value is passed-through by module_init callbacks which modprobe confuses with the harmless -EEXIST returned when trying to load an already loaded module. Make sure modprobe fails so users notice their helper has not been registered and won't work. Suggested-by: Christophe Leroy Fixes: 12f7a505331e ("netfilter: add user-space connection tracking helper infrastructure") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4ed5878cb25b..ceb48c3ca0a4 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -368,7 +368,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) (cur->tuple.src.l3num == NFPROTO_UNSPEC || cur->tuple.src.l3num == me->tuple.src.l3num) && cur->tuple.dst.protonum == me->tuple.dst.protonum) { - ret = -EEXIST; + ret = -EBUSY; goto out; } } @@ -379,7 +379,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { - ret = -EEXIST; + ret = -EBUSY; goto out; } } From 9d81ba6d49a7457784f0b6a71046818b86ec7e44 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Wed, 27 Aug 2025 09:45:55 +0800 Subject: [PATCH 0886/1307] fuse: Block access to folio overlimit syz reported a slab-out-of-bounds Write in fuse_dev_do_write. When the number of bytes to be retrieved is truncated to the upper limit by fc->max_pages and there is an offset, the oob is triggered. Add a loop termination condition to prevent overruns. Fixes: 3568a9569326 ("fuse: support large folios for retrieves") Reported-by: syzbot+2d215d165f9354b9c4ea@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2d215d165f9354b9c4ea Tested-by: syzbot+2d215d165f9354b9c4ea@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Reviewed-by: Joanne Koong Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e80cd8f2c049..5150aa25e64b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1893,7 +1893,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, index = outarg->offset >> PAGE_SHIFT; - while (num) { + while (num && ap->num_folios < num_pages) { struct folio *folio; unsigned int folio_offset; unsigned int nr_bytes; From f600bddbcf79acd13d4a0d93aed4ee2fe29f927d Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Wed, 27 Aug 2025 12:34:03 +0800 Subject: [PATCH 0887/1307] ALSA: hda/tas2781: Fix EFI name for calibration beginning with 1 instead of 0 A bug reported by one of my customers that EFI name beginning with 0 instead of 1. Fixes: 4fe238513407 ("ALSA: hda/tas2781: Move and unified the calibrated-data getting function for SPI and I2C into the tas2781_hda lib") Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250827043404.644-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index e34b17f0c9b9..7143926c2c30 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -327,8 +327,8 @@ static int tas2563_save_calibration(struct tas2781_hda *h) data[offset] = i; offset++; for (j = 0; j < TASDEV_CALIB_N; ++j) { - ret = snprintf(var8, sizeof(var8), vars[j], i); - + /* EFI name for calibration started with 1, not 0 */ + ret = snprintf(var8, sizeof(var8), vars[j], i + 1); if (ret < 0 || ret >= sizeof(var8) - 1) { dev_err(p->dev, "%s: Read %s failed\n", __func__, var8); From c64eff368ac676e8540344d27a3de47e0ad90d21 Mon Sep 17 00:00:00 2001 From: Qingyue Zhang Date: Wed, 27 Aug 2025 19:43:39 +0800 Subject: [PATCH 0888/1307] io_uring/kbuf: fix signedness in this_len calculation When importing and using buffers, buf->len is considered unsigned. However, buf->len is converted to signed int when committing. This can lead to unexpected behavior if the buffer is large enough to be interpreted as a negative value. Make min_t calculation unsigned. Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") Co-developed-by: Suoxing Zhang Signed-off-by: Suoxing Zhang Signed-off-by: Qingyue Zhang Link: https://lore.kernel.org/r/tencent_4DBB3674C0419BEC2C0C525949DA410CA307@qq.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index f2d2cc319faa..81a13338dfab 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -39,7 +39,7 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) u32 this_len; buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); - this_len = min_t(int, len, buf->len); + this_len = min_t(u32, len, buf->len); buf->len -= this_len; if (buf->len) { buf->addr += this_len; From dcb34659028f856c423a29ef9b4e2571d203444d Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Sat, 23 Aug 2025 17:58:55 +0900 Subject: [PATCH 0889/1307] net: rose: split remove and free operations in rose_remove_neigh() The current rose_remove_neigh() performs two distinct operations: 1. Removes rose_neigh from rose_neigh_list 2. Frees the rose_neigh structure Split these operations into separate functions to improve maintainability and prepare for upcoming refcount_t conversion. The timer cleanup remains in rose_remove_neigh() because free operations can be called from timer itself. This patch introduce rose_neigh_put() to handle the freeing of rose_neigh structures and modify rose_remove_neigh() to handle removal only. Signed-off-by: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250823085857.47674-2-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- include/net/rose.h | 8 ++++++++ net/rose/rose_route.c | 15 ++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/net/rose.h b/include/net/rose.h index 23267b4efcfa..174b4f605d84 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -151,6 +151,14 @@ struct rose_sock { #define rose_sk(sk) ((struct rose_sock *)(sk)) +static inline void rose_neigh_put(struct rose_neigh *rose_neigh) +{ + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); + kfree(rose_neigh->digipeat); + kfree(rose_neigh); +} + /* af_rose.c */ extern ax25_address rose_callsign; extern int sysctl_rose_restart_request_timeout; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index b72bf8a08d48..0c44c416f485 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -234,20 +234,12 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) if ((s = rose_neigh_list) == rose_neigh) { rose_neigh_list = rose_neigh->next; - if (rose_neigh->ax25) - ax25_cb_put(rose_neigh->ax25); - kfree(rose_neigh->digipeat); - kfree(rose_neigh); return; } while (s != NULL && s->next != NULL) { if (s->next == rose_neigh) { s->next = rose_neigh->next; - if (rose_neigh->ax25) - ax25_cb_put(rose_neigh->ax25); - kfree(rose_neigh->digipeat); - kfree(rose_neigh); return; } @@ -331,8 +323,10 @@ static int rose_del_node(struct rose_route_struct *rose_route, if (rose_node->neighbour[i] == rose_neigh) { rose_neigh->count--; - if (rose_neigh->count == 0 && rose_neigh->use == 0) + if (rose_neigh->count == 0 && rose_neigh->use == 0) { rose_remove_neigh(rose_neigh); + rose_neigh_put(rose_neigh); + } rose_node->count--; @@ -513,6 +507,7 @@ void rose_rt_device_down(struct net_device *dev) } rose_remove_neigh(s); + rose_neigh_put(s); } spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); @@ -569,6 +564,7 @@ static int rose_clear_routes(void) if (s->use == 0 && !s->loopback) { s->count = 0; rose_remove_neigh(s); + rose_neigh_put(s); } } @@ -1301,6 +1297,7 @@ void __exit rose_rt_free(void) rose_neigh = rose_neigh->next; rose_remove_neigh(s); + rose_neigh_put(s); } while (rose_node != NULL) { From d860d1faa6b2ce3becfdb8b0c2b048ad31800061 Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Sat, 23 Aug 2025 17:58:56 +0900 Subject: [PATCH 0890/1307] net: rose: convert 'use' field to refcount_t The 'use' field in struct rose_neigh is used as a reference counter but lacks atomicity. This can lead to race conditions where a rose_neigh structure is freed while still being referenced by other code paths. For example, when rose_neigh->use becomes zero during an ioctl operation via rose_rt_ioctl(), the structure may be removed while its timer is still active, potentially causing use-after-free issues. This patch changes the type of 'use' from unsigned short to refcount_t and updates all code paths to use rose_neigh_hold() and rose_neigh_put() which operate reference counts atomically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250823085857.47674-3-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- include/net/rose.h | 18 +++++++++++++----- net/rose/af_rose.c | 13 +++++++------ net/rose/rose_in.c | 12 ++++++------ net/rose/rose_route.c | 33 ++++++++++++++++++--------------- net/rose/rose_timer.c | 2 +- 5 files changed, 45 insertions(+), 33 deletions(-) diff --git a/include/net/rose.h b/include/net/rose.h index 174b4f605d84..2b5491bbf39a 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -8,6 +8,7 @@ #ifndef _ROSE_H #define _ROSE_H +#include #include #include #include @@ -96,7 +97,7 @@ struct rose_neigh { ax25_cb *ax25; struct net_device *dev; unsigned short count; - unsigned short use; + refcount_t use; unsigned int number; char restarted; char dce_mode; @@ -151,12 +152,19 @@ struct rose_sock { #define rose_sk(sk) ((struct rose_sock *)(sk)) +static inline void rose_neigh_hold(struct rose_neigh *rose_neigh) +{ + refcount_inc(&rose_neigh->use); +} + static inline void rose_neigh_put(struct rose_neigh *rose_neigh) { - if (rose_neigh->ax25) - ax25_cb_put(rose_neigh->ax25); - kfree(rose_neigh->digipeat); - kfree(rose_neigh); + if (refcount_dec_and_test(&rose_neigh->use)) { + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); + kfree(rose_neigh->digipeat); + kfree(rose_neigh); + } } /* af_rose.c */ diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4e72b636a46a..543f9e8ebb69 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -170,7 +170,7 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) if (rose->neighbour == neigh) { rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); rose->neighbour = NULL; } } @@ -212,7 +212,7 @@ static void rose_kill_by_device(struct net_device *dev) if (rose->device == dev) { rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; } @@ -655,7 +655,7 @@ static int rose_release(struct socket *sock) break; case ROSE_STATE_2: - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); release_sock(sk); rose_disconnect(sk, 0, -1, -1); lock_sock(sk); @@ -823,6 +823,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose->lci = rose_new_lci(rose->neighbour); if (!rose->lci) { err = -ENETUNREACH; + rose_neigh_put(rose->neighbour); goto out_release; } @@ -834,12 +835,14 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le dev = rose_dev_first(); if (!dev) { err = -ENETUNREACH; + rose_neigh_put(rose->neighbour); goto out_release; } user = ax25_findbyuid(current_euid()); if (!user) { err = -EINVAL; + rose_neigh_put(rose->neighbour); dev_put(dev); goto out_release; } @@ -874,8 +877,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose->state = ROSE_STATE_1; - rose->neighbour->use++; - rose_write_internal(sk, ROSE_CALL_REQUEST); rose_start_heartbeat(sk); rose_start_t1timer(sk); @@ -1077,7 +1078,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros GFP_ATOMIC); make_rose->facilities = facilities; - make_rose->neighbour->use++; + rose_neigh_hold(make_rose->neighbour); if (rose_sk(sk)->defer) { make_rose->state = ROSE_STATE_5; diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 3e99181e759f..0276b393f0e5 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -56,7 +56,7 @@ static int rose_state1_machine(struct sock *sk, struct sk_buff *skb, int framety case ROSE_CLEAR_REQUEST: rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; default: @@ -79,12 +79,12 @@ static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int framety case ROSE_CLEAR_REQUEST: rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; case ROSE_CLEAR_CONFIRMATION: rose_disconnect(sk, 0, -1, -1); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; default: @@ -121,7 +121,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety case ROSE_CLEAR_REQUEST: rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; case ROSE_RR: @@ -234,7 +234,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety case ROSE_CLEAR_REQUEST: rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); break; default: @@ -254,7 +254,7 @@ static int rose_state5_machine(struct sock *sk, struct sk_buff *skb, int framety if (frametype == ROSE_CLEAR_REQUEST) { rose_write_internal(sk, ROSE_CLEAR_CONFIRMATION); rose_disconnect(sk, 0, skb->data[3], skb->data[4]); - rose_sk(sk)->neighbour->use--; + rose_neigh_put(rose_sk(sk)->neighbour); } return 0; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 0c44c416f485..8efb9033c057 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -93,11 +93,11 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, rose_neigh->ax25 = NULL; rose_neigh->dev = dev; rose_neigh->count = 0; - rose_neigh->use = 0; rose_neigh->dce_mode = 0; rose_neigh->loopback = 0; rose_neigh->number = rose_neigh_no++; rose_neigh->restarted = 0; + refcount_set(&rose_neigh->use, 1); skb_queue_head_init(&rose_neigh->queue); @@ -255,10 +255,10 @@ static void rose_remove_route(struct rose_route *rose_route) struct rose_route *s; if (rose_route->neigh1 != NULL) - rose_route->neigh1->use--; + rose_neigh_put(rose_route->neigh1); if (rose_route->neigh2 != NULL) - rose_route->neigh2->use--; + rose_neigh_put(rose_route->neigh2); if ((s = rose_route_list) == rose_route) { rose_route_list = rose_route->next; @@ -323,7 +323,7 @@ static int rose_del_node(struct rose_route_struct *rose_route, if (rose_node->neighbour[i] == rose_neigh) { rose_neigh->count--; - if (rose_neigh->count == 0 && rose_neigh->use == 0) { + if (rose_neigh->count == 0) { rose_remove_neigh(rose_neigh); rose_neigh_put(rose_neigh); } @@ -375,11 +375,11 @@ void rose_add_loopback_neigh(void) sn->ax25 = NULL; sn->dev = NULL; sn->count = 0; - sn->use = 0; sn->dce_mode = 1; sn->loopback = 1; sn->number = rose_neigh_no++; sn->restarted = 1; + refcount_set(&sn->use, 1); skb_queue_head_init(&sn->queue); @@ -561,8 +561,7 @@ static int rose_clear_routes(void) s = rose_neigh; rose_neigh = rose_neigh->next; - if (s->use == 0 && !s->loopback) { - s->count = 0; + if (!s->loopback) { rose_remove_neigh(s); rose_neigh_put(s); } @@ -680,6 +679,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, for (i = 0; i < node->count; i++) { if (node->neighbour[i]->restarted) { res = node->neighbour[i]; + rose_neigh_hold(node->neighbour[i]); goto out; } } @@ -691,6 +691,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, for (i = 0; i < node->count; i++) { if (!rose_ftimer_running(node->neighbour[i])) { res = node->neighbour[i]; + rose_neigh_hold(node->neighbour[i]); goto out; } failed = 1; @@ -780,13 +781,13 @@ static void rose_del_route_by_neigh(struct rose_neigh *rose_neigh) } if (rose_route->neigh1 == rose_neigh) { - rose_route->neigh1->use--; + rose_neigh_put(rose_route->neigh1); rose_route->neigh1 = NULL; rose_transmit_clear_request(rose_route->neigh2, rose_route->lci2, ROSE_OUT_OF_ORDER, 0); } if (rose_route->neigh2 == rose_neigh) { - rose_route->neigh2->use--; + rose_neigh_put(rose_route->neigh2); rose_route->neigh2 = NULL; rose_transmit_clear_request(rose_route->neigh1, rose_route->lci1, ROSE_OUT_OF_ORDER, 0); } @@ -915,7 +916,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) rose_clear_queues(sk); rose->cause = ROSE_NETWORK_CONGESTION; rose->diagnostic = 0; - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); rose->neighbour = NULL; rose->lci = 0; rose->state = ROSE_STATE_0; @@ -1040,12 +1041,12 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) if ((new_lci = rose_new_lci(new_neigh)) == 0) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 71); - goto out; + goto put_neigh; } if ((rose_route = kmalloc(sizeof(*rose_route), GFP_ATOMIC)) == NULL) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 120); - goto out; + goto put_neigh; } rose_route->lci1 = lci; @@ -1058,8 +1059,8 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) rose_route->lci2 = new_lci; rose_route->neigh2 = new_neigh; - rose_route->neigh1->use++; - rose_route->neigh2->use++; + rose_neigh_hold(rose_route->neigh1); + rose_neigh_hold(rose_route->neigh2); rose_route->next = rose_route_list; rose_route_list = rose_route; @@ -1071,6 +1072,8 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) rose_transmit_link(skb, rose_route->neigh2); res = 1; +put_neigh: + rose_neigh_put(new_neigh); out: spin_unlock_bh(&rose_route_list_lock); spin_unlock_bh(&rose_neigh_list_lock); @@ -1186,7 +1189,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v) (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), rose_neigh->dev ? rose_neigh->dev->name : "???", rose_neigh->count, - rose_neigh->use, + refcount_read(&rose_neigh->use) - 1, (rose_neigh->dce_mode) ? "DCE" : "DTE", (rose_neigh->restarted) ? "yes" : "no", ax25_display_timer(&rose_neigh->t0timer) / HZ, diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index 020369c49587..bb60a1654d61 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -180,7 +180,7 @@ static void rose_timer_expiry(struct timer_list *t) break; case ROSE_STATE_2: /* T3 */ - rose->neighbour->use--; + rose_neigh_put(rose->neighbour); rose_disconnect(sk, ETIMEDOUT, -1, -1); break; From da9c9c877597170b929a6121a68dcd3dd9a80f45 Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Sat, 23 Aug 2025 17:58:57 +0900 Subject: [PATCH 0891/1307] net: rose: include node references in rose_neigh refcount Current implementation maintains two separate reference counting mechanisms: the 'count' field in struct rose_neigh tracks references from rose_node structures, while the 'use' field (now refcount_t) tracks references from rose_sock. This patch merges these two reference counting systems using 'use' field for proper reference management. Specifically, this patch adds incrementing and decrementing of rose_neigh->use when rose_neigh->count is incremented or decremented. This patch also modifies rose_rt_free(), rose_rt_device_down() and rose_clear_route() to properly release references to rose_neigh objects before freeing a rose_node through rose_remove_node(). These changes ensure rose_neigh structures are properly freed only when all references, including those from rose_node structures, are released. As a result, this resolves a slab-use-after-free issue reported by Syzbot. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+942297eecf7d2d61d1f1@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=942297eecf7d2d61d1f1 Signed-off-by: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250823085857.47674-4-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- net/rose/rose_route.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 8efb9033c057..1adee1fbc2ed 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -178,6 +178,7 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, } } rose_neigh->count++; + rose_neigh_hold(rose_neigh); goto out; } @@ -187,6 +188,7 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, rose_node->neighbour[rose_node->count] = rose_neigh; rose_node->count++; rose_neigh->count++; + rose_neigh_hold(rose_neigh); } out: @@ -322,6 +324,7 @@ static int rose_del_node(struct rose_route_struct *rose_route, for (i = 0; i < rose_node->count; i++) { if (rose_node->neighbour[i] == rose_neigh) { rose_neigh->count--; + rose_neigh_put(rose_neigh); if (rose_neigh->count == 0) { rose_remove_neigh(rose_neigh); @@ -430,6 +433,7 @@ int rose_add_loopback_node(const rose_address *address) rose_node_list = rose_node; rose_loopback_neigh->count++; + rose_neigh_hold(rose_loopback_neigh); out: spin_unlock_bh(&rose_node_list_lock); @@ -461,6 +465,7 @@ void rose_del_loopback_node(const rose_address *address) rose_remove_node(rose_node); rose_loopback_neigh->count--; + rose_neigh_put(rose_loopback_neigh); out: spin_unlock_bh(&rose_node_list_lock); @@ -500,6 +505,7 @@ void rose_rt_device_down(struct net_device *dev) memmove(&t->neighbour[i], &t->neighbour[i + 1], sizeof(t->neighbour[0]) * (t->count - i)); + rose_neigh_put(s); } if (t->count <= 0) @@ -543,6 +549,7 @@ static int rose_clear_routes(void) { struct rose_neigh *s, *rose_neigh; struct rose_node *t, *rose_node; + int i; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); @@ -553,8 +560,12 @@ static int rose_clear_routes(void) while (rose_node != NULL) { t = rose_node; rose_node = rose_node->next; - if (!t->loopback) + + if (!t->loopback) { + for (i = 0; i < rose_node->count; i++) + rose_neigh_put(t->neighbour[i]); rose_remove_node(t); + } } while (rose_neigh != NULL) { @@ -1189,7 +1200,7 @@ static int rose_neigh_show(struct seq_file *seq, void *v) (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), rose_neigh->dev ? rose_neigh->dev->name : "???", rose_neigh->count, - refcount_read(&rose_neigh->use) - 1, + refcount_read(&rose_neigh->use) - rose_neigh->count - 1, (rose_neigh->dce_mode) ? "DCE" : "DTE", (rose_neigh->restarted) ? "yes" : "no", ax25_display_timer(&rose_neigh->t0timer) / HZ, @@ -1294,6 +1305,7 @@ void __exit rose_rt_free(void) struct rose_neigh *s, *rose_neigh = rose_neigh_list; struct rose_node *t, *rose_node = rose_node_list; struct rose_route *u, *rose_route = rose_route_list; + int i; while (rose_neigh != NULL) { s = rose_neigh; @@ -1307,6 +1319,8 @@ void __exit rose_rt_free(void) t = rose_node; rose_node = rose_node->next; + for (i = 0; i < t->count; i++) + rose_neigh_put(t->neighbour[i]); rose_remove_node(t); } From 8b3641dfb6f902407495c63b9b64482b32319b66 Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Tue, 19 Aug 2025 14:21:59 -0500 Subject: [PATCH 0892/1307] x86/bugs: Add attack vector controls for SSB Attack vector controls for SSB were missed in the initial attack vector series. The default mitigation for SSB requires user-space opt-in so it is only relevant for user->user attacks. Check with attack vector controls when the command is auto - i.e., no explicit user selection has been done. Fixes: 2d31d2874663 ("x86/bugs: Define attack vectors relevant for each bug") Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250819192200.2003074-5-david.kaplan@amd.com --- .../admin-guide/hw-vuln/attack_vector_controls.rst | 5 +---- arch/x86/kernel/cpu/bugs.c | 9 +++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst index 6dd0800146f6..5964901d66e3 100644 --- a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst +++ b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst @@ -215,7 +215,7 @@ Spectre_v2 X X Spectre_v2_user X X * (Note 1) SRBDS X X X X SRSO X X X X -SSB (Note 4) +SSB X TAA X X X X * (Note 2) TSA X X X X =============== ============== ============ ============= ============== ============ ======== @@ -229,9 +229,6 @@ Notes: 3 -- Disables SMT if cross-thread mitigations are fully enabled, the CPU is vulnerable, and STIBP is not supported - 4 -- Speculative store bypass is always enabled by default (no kernel - mitigation applied) unless overridden with spec_store_bypass_disable option - When an attack-vector is disabled, all mitigations for the vulnerabilities listed in the above table are disabled, unless mitigation is required for a different enabled attack-vector or a mitigation is explicitly selected via a diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 49ef1b832c1a..af838b8d845c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -416,6 +416,10 @@ static bool __init should_mitigate_vuln(unsigned int bug) cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST) || (smt_mitigations != SMT_MITIGATIONS_OFF); + + case X86_BUG_SPEC_STORE_BYPASS: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER); + default: WARN(1, "Unknown bug %x\n", bug); return false; @@ -2710,6 +2714,11 @@ static void __init ssb_select_mitigation(void) ssb_mode = SPEC_STORE_BYPASS_DISABLE; break; case SPEC_STORE_BYPASS_CMD_AUTO: + if (should_mitigate_vuln(X86_BUG_SPEC_STORE_BYPASS)) + ssb_mode = SPEC_STORE_BYPASS_PRCTL; + else + ssb_mode = SPEC_STORE_BYPASS_NONE; + break; case SPEC_STORE_BYPASS_CMD_PRCTL: ssb_mode = SPEC_STORE_BYPASS_PRCTL; break; From 27f5e0c1321ee280189cea16044de2e157dc4bb9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Jun 2025 11:37:16 -0400 Subject: [PATCH 0893/1307] drm/amdgpu/gfx11: set MQD as appriopriate for queue types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the MQD as appropriate for the kernel vs user queues. Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 063d6683208722b1875f888a45084e3d112701ac) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c01c241a1b06..456ba758fa94 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4129,6 +4129,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #endif if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -4281,8 +4283,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; From 29f155c5e82fe35ff85b1f13612cb8c2dbe1dca3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Jun 2025 11:38:14 -0400 Subject: [PATCH 0894/1307] drm/amdgpu/gfx12: set MQD as appriopriate for queue types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the MQD as appropriate for the kernel vs user queues. Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 7b9110f2897957efd9715b52fc01986509729db3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 3e138527d534..fd44d5503e28 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3026,6 +3026,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #endif if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -3175,8 +3177,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; From ac4ed2da4c1305a1a002415058aa7deaf49ffe3e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 25 Aug 2025 13:40:22 -0400 Subject: [PATCH 0895/1307] Revert "drm/amdgpu: fix incorrect vm flags to map bo" This reverts commit b08425fa77ad2f305fe57a33dceb456be03b653f. Revert this to align with 6.17 because the fixes tag was wrong on this commit. Signed-off-by: Alex Deucher (cherry picked from commit be33e8a239aac204d7e9e673c4220ef244eb1ba3) --- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index dfb6cfd83760..02138aa55793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -88,8 +88,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, - AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE); + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); From 5dff50802b285da8284a7bf17ae2fdc6f1357023 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 25 Aug 2025 12:54:01 +0800 Subject: [PATCH 0896/1307] drm/amd/amdgpu: disable hwmon power1_cap* for gfx 11.0.3 on vf mode the PPSMC_MSG_GetPptLimit msg is not valid for gfx 11.0.3 on vf mode, so skiped to create power1_cap* hwmon sysfs node. Signed-off-by: Yang Wang Reviewed-by: Asad Kamal Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit e82a8d441038d8cb10b63047a9e705c42479d156) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 4b64851fdb42..5fbfe7333b54 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3458,14 +3458,16 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; /* not implemented yet for APUs other than GC 10.3.1 (vangogh) and 9.4.3 */ - if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)) && - (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4)))) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) - return 0; + if (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr) { + if (adev->family == AMDGPU_FAMILY_SI || + ((adev->flags & AMD_IS_APU) && gc_ver != IP_VERSION(10, 3, 1) && + (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4))) || + (amdgpu_sriov_vf(adev) && gc_ver == IP_VERSION(11, 0, 3))) + return 0; + } /* not implemented yet for APUs having < GC 9.3.0 (Renoir) */ if (((adev->family == AMDGPU_FAMILY_SI) || From ee38ea0ae4ed13fe33e033dc98d11e76bc7167cd Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Tue, 26 Aug 2025 17:30:58 +0800 Subject: [PATCH 0897/1307] drm/amdgpu: update firmware version checks for user queue support The minimum firmware versions required for user queue functionality have been increased to address an issue where the queue privilege state was lost during queue connect operations. The problem occurred because the privilege state was being restored to its initial value at the beginning of the function, overwriting the state that was properly set during the queue connect case. This commit updates the minimum version requirements: - ME firmware from 2390 to 2420 - PFP firmware from 2530 to 2580 - MEC firmware from 2600 to 2650 - MES firmware remains at 120 These updated firmware versions contain the necessary fixes to properly maintain queue privilege state throughout connect operations. Fixes: 61ca97e9590c ("drm/amdgpu: Add fw minimum version check for usermode queue") Acked-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 5f976c9939f0d5916d2b8ef3156a6d1799781df1) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 456ba758fa94..c85de8c8f6f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1612,9 +1612,9 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): if (!adev->gfx.disable_uq && - adev->gfx.me_fw_version >= 2390 && - adev->gfx.pfp_fw_version >= 2530 && - adev->gfx.mec_fw_version >= 2600 && + adev->gfx.me_fw_version >= 2420 && + adev->gfx.pfp_fw_version >= 2580 && + adev->gfx.mec_fw_version >= 2650 && adev->mes.fw_version[0] >= 120) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; From c767d74a9cdd1042046d02319d16b85d9aa8a8aa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 22 Aug 2025 12:12:37 -0400 Subject: [PATCH 0898/1307] drm/amdgpu/userq: fix error handling of invalid doorbell If the doorbell is invalid, be sure to set the r to an error state so the function returns an error. Reviewed-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher (cherry picked from commit 7e2a5b0a9a165a7c51274aa01b18be29491b4345) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index c3ace8030530..8190c24a649a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -471,6 +471,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) if (index == (uint64_t)-EINVAL) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); kfree(queue); + r = -EINVAL; goto unlock; } From ab529e6ca1f67bcf31f3ea80c72bffde2e9e053e Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Thu, 28 Aug 2025 02:24:19 +0800 Subject: [PATCH 0899/1307] fs/smb: Fix inconsistent refcnt update A possible inconsistent update of refcount was identified in `smb2_compound_op`. Such inconsistent update could lead to possible resource leaks. Why it is a possible bug: 1. In the comment section of the function, it clearly states that the reference to `cfile` should be dropped after calling this function. 2. Every control flow path would check and drop the reference to `cfile`, except the patched one. 3. Existing callers would not handle refcount update of `cfile` if -ENOMEM is returned. To fix the bug, an extra goto label "out" is added, to make sure that the cleanup logic would always be respected. As the problem is caused by the allocation failure of `vars`, the cleanup logic between label "finished" and "out" can be safely ignored. According to the definition of function `is_replayable_error`, the error code of "-ENOMEM" is not recoverable. Therefore, the replay logic also gets ignored. Signed-off-by: Shuhao Fu Acked-by: Paulo Alcantara (Red Hat) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 2a0316c514e4..31c13fb5b85b 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -207,8 +207,10 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, server = cifs_pick_channel(ses); vars = kzalloc(sizeof(*vars), GFP_ATOMIC); - if (vars == NULL) - return -ENOMEM; + if (vars == NULL) { + rc = -ENOMEM; + goto out; + } rqst = &vars->rqst[0]; rsp_iov = &vars->rsp_iov[0]; @@ -864,6 +866,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, smb2_should_replay(tcon, &retries, &cur_sleep)) goto replay_again; +out: if (cfile) cifsFileInfo_put(cfile); From bcd6f8954dc4a3aa32edda5602e43a0174dc8f0f Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 26 Aug 2025 14:50:46 -0700 Subject: [PATCH 0900/1307] MAINTAINERS: rmnet: Update email addresses Switch to oss.qualcomm.com ids. Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Link: https://patch.msgid.link/20250826215046.865530-1-subash.a.kasiviswanathan@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c5b47955d2a6..3337577ce545 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20847,8 +20847,8 @@ S: Maintained F: drivers/firmware/qcom/qcom_qseecom_uefisecapp.c QUALCOMM RMNET DRIVER -M: Subash Abhinov Kasiviswanathan -M: Sean Tranchetti +M: Subash Abhinov Kasiviswanathan +M: Sean Tranchetti L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst From 2e8750469242cad8f01f320131fd5a6f540dbb99 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 14:13:14 +0000 Subject: [PATCH 0901/1307] sctp: initialize more fields in sctp_v6_from_sk() syzbot found that sin6_scope_id was not properly initialized, leading to undefined behavior. Clear sin6_scope_id and sin6_flowinfo. BUG: KMSAN: uninit-value in __sctp_v6_cmp_addr+0x887/0x8c0 net/sctp/ipv6.c:649 __sctp_v6_cmp_addr+0x887/0x8c0 net/sctp/ipv6.c:649 sctp_inet6_cmp_addr+0x4f2/0x510 net/sctp/ipv6.c:983 sctp_bind_addr_conflict+0x22a/0x3b0 net/sctp/bind_addr.c:390 sctp_get_port_local+0x21eb/0x2440 net/sctp/socket.c:8452 sctp_get_port net/sctp/socket.c:8523 [inline] sctp_listen_start net/sctp/socket.c:8567 [inline] sctp_inet_listen+0x710/0xfd0 net/sctp/socket.c:8636 __sys_listen_socket net/socket.c:1912 [inline] __sys_listen net/socket.c:1927 [inline] __do_sys_listen net/socket.c:1932 [inline] __se_sys_listen net/socket.c:1930 [inline] __x64_sys_listen+0x343/0x4c0 net/socket.c:1930 x64_sys_call+0x271d/0x3e20 arch/x86/include/generated/asm/syscalls_64.h:51 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xd9/0x210 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Local variable addr.i.i created at: sctp_get_port net/sctp/socket.c:8515 [inline] sctp_listen_start net/sctp/socket.c:8567 [inline] sctp_inet_listen+0x650/0xfd0 net/sctp/socket.c:8636 __sys_listen_socket net/socket.c:1912 [inline] __sys_listen net/socket.c:1927 [inline] __do_sys_listen net/socket.c:1932 [inline] __se_sys_listen net/socket.c:1930 [inline] __x64_sys_listen+0x343/0x4c0 net/socket.c:1930 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+e69f06a0f30116c68056@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68adc0a2.050a0220.37038e.00c4.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Acked-by: Xin Long Link: https://patch.msgid.link/20250826141314.1802610-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sctp/ipv6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 3336dcfb4515..568ff8797c39 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -547,7 +547,9 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; + addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = sk->sk_v6_rcv_saddr; + addr->v6.sin6_scope_id = 0; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ From 9b8c88f875c04d4cb9111bd5dd9291c7e9691bf5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 13:44:35 +0000 Subject: [PATCH 0902/1307] l2tp: do not use sock_hold() in pppol2tp_session_get_sock() pppol2tp_session_get_sock() is using RCU, it must be ready for sk_refcnt being zero. Commit ee40fb2e1eb5 ("l2tp: protect sock pointer of struct pppol2tp_session with RCU") was correct because it had a call_rcu(..., pppol2tp_put_sk) which was later removed in blamed commit. pppol2tp_recv() can use pppol2tp_session_get_sock() as well. Fixes: c5cbaef992d6 ("l2tp: refactor ppp socket/session relationship") Signed-off-by: Eric Dumazet Cc: James Chapman Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20250826134435.1683435-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_ppp.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index fc5c2fd8f34c..5e12e7ce17d8 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -129,22 +129,12 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = { static const struct proto_ops pppol2tp_ops; -/* Retrieves the pppol2tp socket associated to a session. - * A reference is held on the returned socket, so this function must be paired - * with sock_put(). - */ +/* Retrieves the pppol2tp socket associated to a session. */ static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session) { struct pppol2tp_session *ps = l2tp_session_priv(session); - struct sock *sk; - rcu_read_lock(); - sk = rcu_dereference(ps->sk); - if (sk) - sock_hold(sk); - rcu_read_unlock(); - - return sk; + return rcu_dereference(ps->sk); } /* Helpers to obtain tunnel/session contexts from sockets. @@ -206,14 +196,13 @@ static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) { - struct pppol2tp_session *ps = l2tp_session_priv(session); - struct sock *sk = NULL; + struct sock *sk; /* If the socket is bound, send it in to PPP's input queue. Otherwise * queue it on the session socket. */ rcu_read_lock(); - sk = rcu_dereference(ps->sk); + sk = pppol2tp_session_get_sock(session); if (!sk) goto no_sock; @@ -510,13 +499,14 @@ static void pppol2tp_show(struct seq_file *m, void *arg) struct l2tp_session *session = arg; struct sock *sk; + rcu_read_lock(); sk = pppol2tp_session_get_sock(session); if (sk) { struct pppox_sock *po = pppox_sk(sk); seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); - sock_put(sk); } + rcu_read_unlock(); } static void pppol2tp_session_init(struct l2tp_session *session) @@ -1530,6 +1520,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) port = ntohs(inet->inet_sport); } + rcu_read_lock(); sk = pppol2tp_session_get_sock(session); if (sk) { state = sk->sk_state; @@ -1565,8 +1556,8 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) struct pppox_sock *po = pppox_sk(sk); seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); - sock_put(sk); } + rcu_read_unlock(); } static int pppol2tp_seq_show(struct seq_file *m, void *v) From 1cc8a5b534e5f9b5e129e54ee2e63c9f5da4f39a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2025 17:21:49 +0000 Subject: [PATCH 0903/1307] net: rose: fix a typo in rose_clear_routes() syzbot crashed in rose_clear_routes(), after a recent patch typo. KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 0 UID: 0 PID: 10591 Comm: syz.3.1856 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:rose_clear_routes net/rose/rose_route.c:565 [inline] RIP: 0010:rose_rt_ioctl+0x162/0x1250 net/rose/rose_route.c:760 rose_ioctl+0x3ce/0x8b0 net/rose/af_rose.c:1381 sock_do_ioctl+0xd9/0x300 net/socket.c:1238 sock_ioctl+0x576/0x790 net/socket.c:1359 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:598 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:584 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: da9c9c877597 ("net: rose: include node references in rose_neigh refcount") Reported-by: syzbot+2eb8d1719f7cfcfa6840@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68af3e29.a70a0220.3cafd4.002e.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250827172149.5359-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/rose/rose_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 1adee1fbc2ed..a1e9b05ef6f5 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -562,7 +562,7 @@ static int rose_clear_routes(void) rose_node = rose_node->next; if (!t->loopback) { - for (i = 0; i < rose_node->count; i++) + for (i = 0; i < t->count; i++) rose_neigh_put(t->neighbour[i]); rose_remove_node(t); } From 2ddaa562b465921a5d1da3fc939993b92b953e20 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 25 Aug 2025 15:56:06 -0700 Subject: [PATCH 0904/1307] fbnic: Fixup rtnl_lock and devl_lock handling related to mailbox code The exception handling path for the __fbnic_pm_resume function had a bug in that it was taking the devlink lock and then exiting to exception handling instead of waiting until after it released the lock to do so. In order to handle that I am swapping the placement of the unlock and the exception handling jump to label so that we don't trigger a deadlock by holding the lock longer than we need to. In addition this change applies the same ordering to the rtnl_lock/unlock calls in the same function as it should make the code easier to follow if it adheres to a consistent pattern. Fixes: 82534f446daa ("eth: fbnic: Add devlink dev flash support") Signed-off-by: Alexander Duyck Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/175616256667.1963577.5543500806256052549.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index b70e4cadb37b..a7784deea88f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -443,11 +443,10 @@ static int __fbnic_pm_resume(struct device *dev) /* Re-enable mailbox */ err = fbnic_fw_request_mbx(fbd); + devl_unlock(priv_to_devlink(fbd)); if (err) goto err_free_irqs; - devl_unlock(priv_to_devlink(fbd)); - /* Only send log history if log buffer is empty to prevent duplicate * log entries. */ @@ -464,20 +463,20 @@ static int __fbnic_pm_resume(struct device *dev) rtnl_lock(); - if (netif_running(netdev)) { + if (netif_running(netdev)) err = __fbnic_open(fbn); - if (err) - goto err_free_mbx; - } rtnl_unlock(); + if (err) + goto err_free_mbx; return 0; err_free_mbx: fbnic_fw_log_disable(fbd); - rtnl_unlock(); + devl_lock(priv_to_devlink(fbd)); fbnic_fw_free_mbx(fbd); + devl_unlock(priv_to_devlink(fbd)); err_free_irqs: fbnic_free_irqs(fbd); err_invalidate_uc_addr: From 6ede14a2c6365e7e5d855643c7c8390b5268c467 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 25 Aug 2025 15:56:13 -0700 Subject: [PATCH 0905/1307] fbnic: Move phylink resume out of service_task and into open/close The fbnic driver was presenting with the following locking assert coming out of a PM resume: [ 42.208116][ T164] RTNL: assertion failed at drivers/net/phy/phylink.c (2611) [ 42.208492][ T164] WARNING: CPU: 1 PID: 164 at drivers/net/phy/phylink.c:2611 phylink_resume+0x190/0x1e0 [ 42.208872][ T164] Modules linked in: [ 42.209140][ T164] CPU: 1 UID: 0 PID: 164 Comm: bash Not tainted 6.17.0-rc2-virtme #134 PREEMPT(full) [ 42.209496][ T164] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.17.0-5.fc42 04/01/2014 [ 42.209861][ T164] RIP: 0010:phylink_resume+0x190/0x1e0 [ 42.210057][ T164] Code: 83 e5 01 0f 85 b0 fe ff ff c6 05 1c cd 3e 02 01 90 ba 33 0a 00 00 48 c7 c6 20 3a 1d a5 48 c7 c7 e0 3e 1d a5 e8 21 b8 90 fe 90 <0f> 0b 90 90 e9 86 fe ff ff e8 42 ea 1f ff e9 e2 fe ff ff 48 89 ef [ 42.210708][ T164] RSP: 0018:ffffc90000affbd8 EFLAGS: 00010296 [ 42.210983][ T164] RAX: 0000000000000000 RBX: ffff8880078d8400 RCX: 0000000000000000 [ 42.211235][ T164] RDX: 0000000000000000 RSI: 1ffffffff4f10938 RDI: 0000000000000001 [ 42.211466][ T164] RBP: 0000000000000000 R08: ffffffffa2ae79ea R09: fffffbfff4b3eb84 [ 42.211707][ T164] R10: 0000000000000003 R11: 0000000000000000 R12: ffff888007ad8000 [ 42.211997][ T164] R13: 0000000000000002 R14: ffff888006a18800 R15: ffffffffa34c59e0 [ 42.212234][ T164] FS: 00007f0dc8e39740(0000) GS:ffff88808f51f000(0000) knlGS:0000000000000000 [ 42.212505][ T164] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 42.212704][ T164] CR2: 00007f0dc8e9fe10 CR3: 000000000b56d003 CR4: 0000000000772ef0 [ 42.213227][ T164] PKRU: 55555554 [ 42.213366][ T164] Call Trace: [ 42.213483][ T164] [ 42.213565][ T164] __fbnic_pm_attach.isra.0+0x8e/0xa0 [ 42.213725][ T164] pci_reset_function+0x116/0x1d0 [ 42.213895][ T164] reset_store+0xa0/0x100 [ 42.214025][ T164] ? pci_dev_reset_attr_is_visible+0x50/0x50 [ 42.214221][ T164] ? sysfs_file_kobj+0xc1/0x1e0 [ 42.214374][ T164] ? sysfs_kf_write+0x65/0x160 [ 42.214526][ T164] kernfs_fop_write_iter+0x2f8/0x4c0 [ 42.214677][ T164] ? kernfs_vma_page_mkwrite+0x1f0/0x1f0 [ 42.214836][ T164] new_sync_write+0x308/0x6f0 [ 42.214987][ T164] ? __lock_acquire+0x34c/0x740 [ 42.215135][ T164] ? new_sync_read+0x6f0/0x6f0 [ 42.215288][ T164] ? lock_acquire.part.0+0xbc/0x260 [ 42.215440][ T164] ? ksys_write+0xff/0x200 [ 42.215590][ T164] ? perf_trace_sched_switch+0x6d0/0x6d0 [ 42.215742][ T164] vfs_write+0x65e/0xbb0 [ 42.215876][ T164] ksys_write+0xff/0x200 [ 42.215994][ T164] ? __ia32_sys_read+0xc0/0xc0 [ 42.216141][ T164] ? do_user_addr_fault+0x269/0x9f0 [ 42.216292][ T164] ? rcu_is_watching+0x15/0xd0 [ 42.216442][ T164] do_syscall_64+0xbb/0x360 [ 42.216591][ T164] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 42.216784][ T164] RIP: 0033:0x7f0dc8ea9986 A bit of digging showed that we were invoking the phylink_resume as a part of the fbnic_up path when we were enabling the service task while not holding the RTNL lock. We should be enabling this sooner as a part of the ndo_open path and then just letting the service task come online later. This will help to enforce the correct locking and brings the phylink interface online at the same time as the network interface, instead of at a later time. I tested this on QEMU to verify this was working by putting the system to sleep using "echo mem > /sys/power/state" to put the system to sleep in the guest and then using the command "system_wakeup" in the QEMU monitor. Fixes: 69684376eed5 ("eth: fbnic: Add link detection") Signed-off-by: Alexander Duyck Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/175616257316.1963577.12238158800417771119.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_netdev.c | 4 ++++ drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index e67e99487a27..40581550da1a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -52,6 +52,8 @@ int __fbnic_open(struct fbnic_net *fbn) fbnic_bmc_rpc_init(fbd); fbnic_rss_reinit(fbd, fbn); + phylink_resume(fbn->phylink); + return 0; time_stop: fbnic_time_stop(fbn); @@ -84,6 +86,8 @@ static int fbnic_stop(struct net_device *netdev) { struct fbnic_net *fbn = netdev_priv(netdev); + phylink_suspend(fbn->phylink, fbnic_bmc_present(fbn->fbd)); + fbnic_down(fbn); fbnic_pcs_free_irq(fbn->fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index a7784deea88f..28e23e3ffca8 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -118,14 +118,12 @@ static void fbnic_service_task_start(struct fbnic_net *fbn) struct fbnic_dev *fbd = fbn->fbd; schedule_delayed_work(&fbd->service_task, HZ); - phylink_resume(fbn->phylink); } static void fbnic_service_task_stop(struct fbnic_net *fbn) { struct fbnic_dev *fbd = fbn->fbd; - phylink_suspend(fbn->phylink, fbnic_bmc_present(fbd)); cancel_delayed_work(&fbd->service_task); } From 131897c65e2b86cf14bec7379f44aa8fbb407526 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sun, 24 Aug 2025 23:11:57 +0800 Subject: [PATCH 0906/1307] erofs: fix invalid algorithm for encoded extents The current algorithm sanity checks do not properly apply to new encoded extents. Unify the algorithm check with Z_EROFS_COMPRESSION(_RUNTIME)_MAX and ensure consistency with sbi->available_compr_algs. Reported-and-tested-by: syzbot+5a398eb460ddaa6f242f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/68a8bd20.050a0220.37038e.005a.GAE@google.com Fixes: 1d191b4ca51d ("erofs: implement encoded extent metadata") Thanks-to: Edward Adam Davis Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 67 +++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index a93efd95c555..798223e6da9c 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -394,10 +394,10 @@ static int z_erofs_map_blocks_fo(struct inode *inode, .map = map, .in_mbox = erofs_inode_in_metabox(inode), }; - int err = 0; - unsigned int endoff, afmt; + unsigned int endoff; unsigned long initial_lcn; unsigned long long ofs, end; + int err; ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la; if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) && @@ -482,20 +482,15 @@ static int z_erofs_map_blocks_fo(struct inode *inode, err = -EFSCORRUPTED; goto unmap_out; } - afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ? - Z_EROFS_COMPRESSION_INTERLACED : - Z_EROFS_COMPRESSION_SHIFTED; + if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) + map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED; + else + map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; + } else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) { + map->m_algorithmformat = vi->z_algorithmtype[1]; } else { - afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ? - vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; - if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { - erofs_err(sb, "inconsistent algorithmtype %u for nid %llu", - afmt, vi->nid); - err = -EFSCORRUPTED; - goto unmap_out; - } + map->m_algorithmformat = vi->z_algorithmtype[0]; } - map->m_algorithmformat = afmt; if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && @@ -626,9 +621,9 @@ static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = inode->i_sb; - int err, headnr; - erofs_off_t pos; struct z_erofs_map_header *h; + erofs_off_t pos; + int err = 0; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { /* @@ -642,7 +637,6 @@ static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map) if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE)) return -ERESTARTSYS; - err = 0; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) goto out_unlock; @@ -679,15 +673,6 @@ static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map) else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) vi->z_idata_size = le16_to_cpu(h->h_idata_size); - headnr = 0; - if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX || - vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) { - erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel", - headnr + 1, vi->z_algorithmtype[headnr], vi->nid); - err = -EOPNOTSUPP; - goto out_unlock; - } - if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { @@ -726,6 +711,30 @@ static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map) return err; } +static int z_erofs_map_sanity_check(struct inode *inode, + struct erofs_map_blocks *map) +{ + struct erofs_sb_info *sbi = EROFS_I_SB(inode); + + if (!(map->m_flags & EROFS_MAP_ENCODED)) + return 0; + if (unlikely(map->m_algorithmformat >= Z_EROFS_COMPRESSION_RUNTIME_MAX)) { + erofs_err(inode->i_sb, "unknown algorithm %d @ pos %llu for nid %llu, please upgrade kernel", + map->m_algorithmformat, map->m_la, EROFS_I(inode)->nid); + return -EOPNOTSUPP; + } + if (unlikely(map->m_algorithmformat < Z_EROFS_COMPRESSION_MAX && + !(sbi->available_compr_algs & (1 << map->m_algorithmformat)))) { + erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", + map->m_algorithmformat, EROFS_I(inode)->nid); + return -EFSCORRUPTED; + } + if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE || + map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE)) + return -EOPNOTSUPP; + return 0; +} + int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags) { @@ -746,10 +755,8 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, else err = z_erofs_map_blocks_fo(inode, map, flags); } - if (!err && (map->m_flags & EROFS_MAP_ENCODED) && - unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE || - map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE)) - err = -EOPNOTSUPP; + if (!err) + err = z_erofs_map_sanity_check(inode, map); if (err) map->m_llen = 0; } From 82b8166171bdebbc74717e4a0cfb4b89cd0510aa Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Tue, 26 Aug 2025 15:54:18 +0800 Subject: [PATCH 0907/1307] ata: ahci_xgene: Use int type for 'rc' to store error codes Use int instead of u32 for the 'rc' variable in xgene_ahci_softreset() to store negative error codes returned by ahci_do_softreset(). In xgene_ahci_pmp_softreset(), remove the redundant 'rc' variable and directly return the result of the ahci_do_softreset() call instead. Signed-off-by: Qianfeng Rong Signed-off-by: Damien Le Moal --- drivers/ata/ahci_xgene.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index 5d5a51a77f5d..6b8844646fcd 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -450,7 +450,6 @@ static int xgene_ahci_pmp_softreset(struct ata_link *link, unsigned int *class, { int pmp = sata_srst_pmp(link); struct ata_port *ap = link->ap; - u32 rc; void __iomem *port_mmio = ahci_port_base(ap); u32 port_fbs; @@ -463,9 +462,7 @@ static int xgene_ahci_pmp_softreset(struct ata_link *link, unsigned int *class, port_fbs |= pmp << PORT_FBS_DEV_OFFSET; writel(port_fbs, port_mmio + PORT_FBS); - rc = ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready); - - return rc; + return ahci_do_softreset(link, class, pmp, deadline, ahci_check_ready); } /** @@ -500,7 +497,7 @@ static int xgene_ahci_softreset(struct ata_link *link, unsigned int *class, u32 port_fbs; u32 port_fbs_save; u32 retry = 1; - u32 rc; + int rc; port_fbs_save = readl(port_mmio + PORT_FBS); From ee4d098cbc9160f573b5c1b5a51d6158efdb2896 Mon Sep 17 00:00:00 2001 From: Yin Tirui Date: Tue, 19 Aug 2025 15:55:10 +0800 Subject: [PATCH 0908/1307] of_numa: fix uninitialized memory nodes causing kernel panic When there are memory-only nodes (nodes without CPUs), these nodes are not properly initialized, causing kernel panic during boot. of_numa_init of_numa_parse_cpu_nodes node_set(nid, numa_nodes_parsed); of_numa_parse_memory_nodes In of_numa_parse_cpu_nodes, numa_nodes_parsed gets updated only for nodes containing CPUs. Memory-only nodes should have been updated in of_numa_parse_memory_nodes, but they weren't. Subsequently, when free_area_init() attempts to access NODE_DATA() for these uninitialized memory nodes, the kernel panics due to NULL pointer dereference. This can be reproduced on ARM64 QEMU with 1 CPU and 2 memory nodes: qemu-system-aarch64 \ -cpu host -nographic \ -m 4G -smp 1 \ -machine virt,accel=kvm,gic-version=3,iommu=smmuv3 \ -object memory-backend-ram,size=2G,id=mem0 \ -object memory-backend-ram,size=2G,id=mem1 \ -numa node,nodeid=0,memdev=mem0 \ -numa node,nodeid=1,memdev=mem1 \ -kernel $IMAGE \ -hda $DISK \ -append "console=ttyAMA0 root=/dev/vda rw earlycon" [ 0.000000] Booting Linux on physical CPU 0x0000000000 [0x481fd010] [ 0.000000] Linux version 6.17.0-rc1-00001-gabb4b3daf18c-dirty (yintirui@local) (gcc (GCC) 12.3.1, GNU ld (GNU Binutils) 2.41) #52 SMP PREEMPT Mon Aug 18 09:49:40 CST 2025 [ 0.000000] KASLR enabled [ 0.000000] random: crng init done [ 0.000000] Machine model: linux,dummy-virt [ 0.000000] efi: UEFI not found. [ 0.000000] earlycon: pl11 at MMIO 0x0000000009000000 (options '') [ 0.000000] printk: legacy bootconsole [pl11] enabled [ 0.000000] OF: reserved mem: Reserved memory: No reserved-memory node in the DT [ 0.000000] NODE_DATA(0) allocated [mem 0xbfffd9c0-0xbfffffff] [ 0.000000] node 1 must be removed before remove section 23 [ 0.000000] Zone ranges: [ 0.000000] DMA [mem 0x0000000040000000-0x00000000ffffffff] [ 0.000000] DMA32 empty [ 0.000000] Normal [mem 0x0000000100000000-0x000000013fffffff] [ 0.000000] Movable zone start for each node [ 0.000000] Early memory node ranges [ 0.000000] node 0: [mem 0x0000000040000000-0x00000000bfffffff] [ 0.000000] node 1: [mem 0x00000000c0000000-0x000000013fffffff] [ 0.000000] Initmem setup node 0 [mem 0x0000000040000000-0x00000000bfffffff] [ 0.000000] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0 [ 0.000000] Mem abort info: [ 0.000000] ESR = 0x0000000096000004 [ 0.000000] EC = 0x25: DABT (current EL), IL = 32 bits [ 0.000000] SET = 0, FnV = 0 [ 0.000000] EA = 0, S1PTW = 0 [ 0.000000] FSC = 0x04: level 0 translation fault [ 0.000000] Data abort info: [ 0.000000] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 0.000000] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 0.000000] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 0.000000] [00000000000000a0] user address but active_mm is swapper [ 0.000000] Internal error: Oops: 0000000096000004 [#1] SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.17.0-rc1-00001-g760c6dabf762-dirty #54 PREEMPT [ 0.000000] Hardware name: linux,dummy-virt (DT) [ 0.000000] pstate: 800000c5 (Nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 0.000000] pc : free_area_init+0x50c/0xf9c [ 0.000000] lr : free_area_init+0x5c0/0xf9c [ 0.000000] sp : ffffa02ca0f33c00 [ 0.000000] x29: ffffa02ca0f33cb0 x28: 0000000000000000 x27: 0000000000000000 [ 0.000000] x26: 4ec4ec4ec4ec4ec5 x25: 00000000000c0000 x24: 00000000000c0000 [ 0.000000] x23: 0000000000040000 x22: 0000000000000000 x21: ffffa02ca0f3b368 [ 0.000000] x20: ffffa02ca14c7b98 x19: 0000000000000000 x18: 0000000000000002 [ 0.000000] x17: 000000000000cacc x16: 0000000000000001 x15: 0000000000000001 [ 0.000000] x14: 0000000080000000 x13: 0000000000000018 x12: 0000000000000002 [ 0.000000] x11: ffffa02ca0fd4f00 x10: ffffa02ca14bab20 x9 : ffffa02ca14bab38 [ 0.000000] x8 : 00000000000c0000 x7 : 0000000000000001 x6 : 0000000000000002 [ 0.000000] x5 : 0000000140000000 x4 : ffffa02ca0f33c90 x3 : ffffa02ca0f33ca0 [ 0.000000] x2 : ffffa02ca0f33c98 x1 : 0000000080000000 x0 : 0000000000000001 [ 0.000000] Call trace: [ 0.000000] free_area_init+0x50c/0xf9c (P) [ 0.000000] bootmem_init+0x110/0x1dc [ 0.000000] setup_arch+0x278/0x60c [ 0.000000] start_kernel+0x70/0x748 [ 0.000000] __primary_switched+0x88/0x90 [ 0.000000] Code: d503201f b98093e0 52800016 f8607a93 (f9405260) [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! [ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- Link: https://lkml.kernel.org/r/20250819075510.2079961-1-yintirui@huawei.com Fixes: 767507654c22 ("arch_numa: switch over to numa_memblks") Signed-off-by: Yin Tirui Acked-by: David Hildenbrand Acked-by: Mike Rapoport (Microsoft) Reviewed-by: Kefeng Wang Cc: Chen Jun Cc: Dan Williams Cc: Joanthan Cameron Cc: Rob Herring Cc: Saravana Kannan Cc: Signed-off-by: Andrew Morton --- drivers/of/of_numa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 230d5f628c1b..cd2dc8e825c9 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -59,8 +59,11 @@ static int __init of_numa_parse_memory_nodes(void) r = -EINVAL; } - for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) + for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) { r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); + if (!r) + node_set(nid, numa_nodes_parsed); + } if (!i || r) { of_node_put(np); From 5cc5e030bce2ec97ae5cdb2c1b94a98b1047b3fa Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Tue, 12 Aug 2025 15:26:56 +0200 Subject: [PATCH 0909/1307] rust: mm: mark VmaNew as transparent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unsafe code in VmaNew's methods assumes that the type has the same layout as the inner `bindings::vm_area_struct`. This is not guaranteed by the default struct representation in Rust, but requires specifying the `transparent` representation. Link: https://lkml.kernel.org/r/20250812132712.61007-1-baptiste.lepers@gmail.com Fixes: dcb81aeab406 ("mm: rust: add VmaNew for f_ops->mmap()") Signed-off-by: Baptiste Lepers Reviewed-by: Alice Ryhl Cc: Alex Gaynor Cc: Andreas Hindborg Cc: Björn Roy Baron Cc: Boqun Feng Cc: Danilo Krummrich Cc: Gary Guo Cc: Jann Horn Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Miguel Ojeda Cc: Trevor Gross Cc: Signed-off-by: Andrew Morton --- rust/kernel/mm/virt.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs index 6086ca981b06..a1bfa4e19293 100644 --- a/rust/kernel/mm/virt.rs +++ b/rust/kernel/mm/virt.rs @@ -209,6 +209,7 @@ pub fn vm_insert_page(&self, address: usize, page: &Page) -> Result { /// /// For the duration of 'a, the referenced vma must be undergoing initialization in an /// `f_ops->mmap()` hook. +#[repr(transparent)] pub struct VmaNew { vma: VmaRef, } From f46e8ef8bb7b452584f2e75337b619ac51a7cadf Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Tue, 19 Aug 2025 21:41:02 +0800 Subject: [PATCH 0910/1307] ocfs2: prevent release journal inode after journal shutdown Before calling ocfs2_delete_osb(), ocfs2_journal_shutdown() has already been executed in ocfs2_dismount_volume(), so osb->journal must be NULL. Therefore, the following calltrace will inevitably fail when it reaches jbd2_journal_release_jbd_inode(). ocfs2_dismount_volume()-> ocfs2_delete_osb()-> ocfs2_free_slot_info()-> __ocfs2_free_slot_info()-> evict()-> ocfs2_evict_inode()-> ocfs2_clear_inode()-> jbd2_journal_release_jbd_inode(osb->journal->j_journal, Adding osb->journal checks will prevent null-ptr-deref during the above execution path. Link: https://lkml.kernel.org/r/tencent_357489BEAEE4AED74CBD67D246DBD2C4C606@qq.com Fixes: da5e7c87827e ("ocfs2: cleanup journal init and shutdown") Signed-off-by: Edward Adam Davis Reported-by: syzbot+47d8cb2f2cc1517e515a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=47d8cb2f2cc1517e515a Tested-by: syzbot+47d8cb2f2cc1517e515a@syzkaller.appspotmail.com Reviewed-by: Mark Tinguely Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 14bf440ea4df..6c4f78f473fb 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1281,6 +1281,9 @@ static void ocfs2_clear_inode(struct inode *inode) * the journal is flushed before journal shutdown. Thus it is safe to * have inodes get cleaned up after journal shutdown. */ + if (!osb->journal) + return; + jbd2_journal_release_jbd_inode(osb->journal->j_journal, &oi->ip_jinode); } From 9614d8bee66387501f48718fa306e17f2aa3f2f3 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 31 Jul 2025 10:44:31 -0400 Subject: [PATCH 0911/1307] mm/userfaultfd: fix kmap_local LIFO ordering for CONFIG_HIGHPTE With CONFIG_HIGHPTE on 32-bit ARM, move_pages_pte() maps PTE pages using kmap_local_page(), which requires unmapping in Last-In-First-Out order. The current code maps dst_pte first, then src_pte, but unmaps them in the same order (dst_pte, src_pte), violating the LIFO requirement. This causes the warning in kunmap_local_indexed(): WARNING: CPU: 0 PID: 604 at mm/highmem.c:622 kunmap_local_indexed+0x178/0x17c addr \!= __fix_to_virt(FIX_KMAP_BEGIN + idx) Fix this by reversing the unmap order to respect LIFO ordering. This issue follows the same pattern as similar fixes: - commit eca6828403b8 ("crypto: skcipher - fix mismatch between mapping and unmapping order") - commit 8cf57c6df818 ("nilfs2: eliminate staggered calls to kunmap in nilfs_rename") Both of which addressed the same fundamental requirement that kmap_local operations must follow LIFO ordering. Link: https://lkml.kernel.org/r/20250731144431.773923-1-sashal@kernel.org Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Sasha Levin Acked-by: David Hildenbrand Reviewed-by: Suren Baghdasaryan Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 45e6290e2e8b..aefdf3a812a1 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1453,10 +1453,15 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, folio_unlock(src_folio); folio_put(src_folio); } - if (dst_pte) - pte_unmap(dst_pte); + /* + * Unmap in reverse order (LIFO) to maintain proper kmap_local + * index ordering when CONFIG_HIGHPTE is enabled. We mapped dst_pte + * first, then src_pte, so we must unmap src_pte first, then dst_pte. + */ if (src_pte) pte_unmap(src_pte); + if (dst_pte) + pte_unmap(dst_pte); mmu_notifier_invalidate_range_end(&range); if (si) put_swap_device(si); From 5bbc2b785e63699cfcaa7adbf739f6e9b771028a Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 5 Aug 2025 13:51:40 -0400 Subject: [PATCH 0912/1307] selftests/mm: fix FORCE_READ to read input value correctly FORCE_READ() converts input value x to its pointer type then reads from address x. This is wrong. If x is a non-pointer, it would be caught it easily. But all FORCE_READ() callers are trying to read from a pointer and FORCE_READ() basically reads a pointer to a pointer instead of the original typed pointer. Almost no access violation was found, except the one from split_huge_page_test. Fix it by implementing a simplified READ_ONCE() instead. Link: https://lkml.kernel.org/r/20250805175140.241656-1-ziy@nvidia.com Fixes: 3f6bfd4789a0 ("selftests/mm: reuse FORCE_READ to replace "asm volatile("" : "+r" (XXX));"") Signed-off-by: Zi Yan Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: wang lian Reviewed-by: Wei Yang Cc: Christian Brauner Cc: Jann Horn Cc: Kairui Song Cc: Liam Howlett Cc: Mark Brown Cc: SeongJae Park Cc: Shuah Khan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 4 ++-- tools/testing/selftests/mm/guard-regions.c | 2 +- tools/testing/selftests/mm/hugetlb-madvise.c | 4 +++- tools/testing/selftests/mm/migration.c | 2 +- tools/testing/selftests/mm/pagemap_ioctl.c | 2 +- tools/testing/selftests/mm/split_huge_page_test.c | 7 +++++-- tools/testing/selftests/mm/vm_util.h | 2 +- 7 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index d30625c18259..c744c603d688 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1554,8 +1554,8 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) } /* Read from the page to populate the shared zeropage. */ - FORCE_READ(mem); - FORCE_READ(smem); + FORCE_READ(*mem); + FORCE_READ(*smem); fn(mem, smem, pagesize); munmap: diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index b0d42eb04e3a..8dd81c0a4a5a 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -145,7 +145,7 @@ static bool try_access_buf(char *ptr, bool write) if (write) *ptr = 'x'; else - FORCE_READ(ptr); + FORCE_READ(*ptr); } signal_jump_set = false; diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c index 1afe14b9dc0c..c5940c0595be 100644 --- a/tools/testing/selftests/mm/hugetlb-madvise.c +++ b/tools/testing/selftests/mm/hugetlb-madvise.c @@ -50,8 +50,10 @@ void read_fault_pages(void *addr, unsigned long nr_pages) unsigned long i; for (i = 0; i < nr_pages; i++) { + unsigned long *addr2 = + ((unsigned long *)(addr + (i * huge_page_size))); /* Prevent the compiler from optimizing out the entire loop: */ - FORCE_READ(((unsigned long *)(addr + (i * huge_page_size)))); + FORCE_READ(*addr2); } } diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index c5a73617796a..ea945eebec2f 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -110,7 +110,7 @@ void *access_mem(void *ptr) * the memory access actually happens and prevents the compiler * from optimizing away this entire loop. */ - FORCE_READ((uint64_t *)ptr); + FORCE_READ(*(uint64_t *)ptr); } return NULL; diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 0d4209eef0c3..e6face7c0166 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1525,7 +1525,7 @@ void zeropfn_tests(void) ret = madvise(mem, hpage_size, MADV_HUGEPAGE); if (!ret) { - FORCE_READ(mem); + FORCE_READ(*mem); ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0, 0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 05de1fc0005b..44a3f8a58806 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -439,8 +439,11 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, } madvise(*addr, fd_size, MADV_HUGEPAGE); - for (size_t i = 0; i < fd_size; i++) - FORCE_READ((*addr + i)); + for (size_t i = 0; i < fd_size; i++) { + char *addr2 = *addr + i; + + FORCE_READ(*addr2); + } if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c20298ae98ea..b55d1809debc 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -23,7 +23,7 @@ * anything with it in order to trigger a read page fault. We therefore must use * volatile to stop the compiler from optimising this away. */ -#define FORCE_READ(x) (*(volatile typeof(x) *)x) +#define FORCE_READ(x) (*(const volatile typeof(x) *)&(x)) extern unsigned int __page_size; extern unsigned int __page_shift; From 7a19afee6fb39df63ddea7ce78976d8c521178c6 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Fri, 1 Aug 2025 13:02:36 +0100 Subject: [PATCH 0913/1307] kunit: kasan_test: disable fortify string checker on kasan_strings() test Similar to commit 09c6304e38e4 ("kasan: test: fix compatibility with FORTIFY_SOURCE") the kernel is panicing in kasan_string(). This is due to the `src` and `ptr` not being hidden from the optimizer which would disable the runtime fortify string checker. Call trace: __fortify_panic+0x10/0x20 (P) kasan_strings+0x980/0x9b0 kunit_try_run_case+0x68/0x190 kunit_generic_run_threadfn_adapter+0x34/0x68 kthread+0x1c4/0x228 ret_from_fork+0x10/0x20 Code: d503233f a9bf7bfd 910003fd 9424b243 (d4210000) ---[ end trace 0000000000000000 ]--- note: kunit_try_catch[128] exited with irqs disabled note: kunit_try_catch[128] exited with preempt_count 1 # kasan_strings: try faulted: last ** replaying previous printk message ** # kasan_strings: try faulted: last line seen mm/kasan/kasan_test_c.c:1600 # kasan_strings: internal error occurred preventing test case from running: -4 Link: https://lkml.kernel.org/r/20250801120236.2962642-1-yeoreum.yun@arm.com Fixes: 73228c7ecc5e ("KASAN: port KASAN Tests to KUnit") Signed-off-by: Yeoreum Yun Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- mm/kasan/kasan_test_c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index e0968acc03aa..f4b17984b627 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -1578,9 +1578,11 @@ static void kasan_strings(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); src = kmalloc(KASAN_GRANULE_SIZE, GFP_KERNEL | __GFP_ZERO); strscpy(src, "f0cacc1a0000000", KASAN_GRANULE_SIZE); + OPTIMIZER_HIDE_VAR(src); /* * Make sure that strscpy() does not trigger KASAN if it overreads into From 08c7c253e032863199da4f089bd0ccab5d1a4876 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 18 Aug 2025 18:39:12 +0200 Subject: [PATCH 0914/1307] mm/kasan: fix vmalloc shadow memory (de-)population races While working on the lazy MMU mode enablement for s390 I hit pretty curious issues in the kasan code. The first is related to a custom kasan-based sanitizer aimed at catching invalid accesses to PTEs and is inspired by [1] conversation. The kasan complains on valid PTE accesses, while the shadow memory is reported as unpoisoned: [ 102.783993] ================================================================== [ 102.784008] BUG: KASAN: out-of-bounds in set_pte_range+0x36c/0x390 [ 102.784016] Read of size 8 at addr 0000780084cf9608 by task vmalloc_test/0/5542 [ 102.784019] [ 102.784040] CPU: 1 UID: 0 PID: 5542 Comm: vmalloc_test/0 Kdump: loaded Tainted: G OE 6.16.0-gcc-ipte-kasan-11657-gb2d930c4950e #340 PREEMPT [ 102.784047] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE [ 102.784049] Hardware name: IBM 8561 T01 703 (LPAR) [ 102.784052] Call Trace: [ 102.784054] [<00007fffe0147ac0>] dump_stack_lvl+0xe8/0x140 [ 102.784059] [<00007fffe0112484>] print_address_description.constprop.0+0x34/0x2d0 [ 102.784066] [<00007fffe011282c>] print_report+0x10c/0x1f8 [ 102.784071] [<00007fffe090785a>] kasan_report+0xfa/0x220 [ 102.784078] [<00007fffe01d3dec>] set_pte_range+0x36c/0x390 [ 102.784083] [<00007fffe01d41c2>] leave_ipte_batch+0x3b2/0xb10 [ 102.784088] [<00007fffe07d3650>] apply_to_pte_range+0x2f0/0x4e0 [ 102.784094] [<00007fffe07e62e4>] apply_to_pmd_range+0x194/0x3e0 [ 102.784099] [<00007fffe07e820e>] __apply_to_page_range+0x2fe/0x7a0 [ 102.784104] [<00007fffe07e86d8>] apply_to_page_range+0x28/0x40 [ 102.784109] [<00007fffe090a3ec>] __kasan_populate_vmalloc+0xec/0x310 [ 102.784114] [<00007fffe090aa36>] kasan_populate_vmalloc+0x96/0x130 [ 102.784118] [<00007fffe0833a04>] alloc_vmap_area+0x3d4/0xf30 [ 102.784123] [<00007fffe083a8ba>] __get_vm_area_node+0x1aa/0x4c0 [ 102.784127] [<00007fffe083c4f6>] __vmalloc_node_range_noprof+0x126/0x4e0 [ 102.784131] [<00007fffe083c980>] __vmalloc_node_noprof+0xd0/0x110 [ 102.784135] [<00007fffe083ca32>] vmalloc_noprof+0x32/0x40 [ 102.784139] [<00007fff608aa336>] fix_size_alloc_test+0x66/0x150 [test_vmalloc] [ 102.784147] [<00007fff608aa710>] test_func+0x2f0/0x430 [test_vmalloc] [ 102.784153] [<00007fffe02841f8>] kthread+0x3f8/0x7a0 [ 102.784159] [<00007fffe014d8b4>] __ret_from_fork+0xd4/0x7d0 [ 102.784164] [<00007fffe299c00a>] ret_from_fork+0xa/0x30 [ 102.784173] no locks held by vmalloc_test/0/5542. [ 102.784176] [ 102.784178] The buggy address belongs to the physical page: [ 102.784186] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x84cf9 [ 102.784198] flags: 0x3ffff00000000000(node=0|zone=1|lastcpupid=0x1ffff) [ 102.784212] page_type: f2(table) [ 102.784225] raw: 3ffff00000000000 0000000000000000 0000000000000122 0000000000000000 [ 102.784234] raw: 0000000000000000 0000000000000000 f200000000000001 0000000000000000 [ 102.784248] page dumped because: kasan: bad access detected [ 102.784250] [ 102.784252] Memory state around the buggy address: [ 102.784260] 0000780084cf9500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 102.784274] 0000780084cf9580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 102.784277] >0000780084cf9600: fd 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 102.784290] ^ [ 102.784293] 0000780084cf9680: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 102.784303] 0000780084cf9700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 102.784306] ================================================================== The second issue hits when the custom sanitizer above is not implemented, but the kasan itself is still active: [ 1554.438028] Unable to handle kernel pointer dereference in virtual kernel address space [ 1554.438065] Failing address: 001c0ff0066f0000 TEID: 001c0ff0066f0403 [ 1554.438076] Fault in home space mode while using kernel ASCE. [ 1554.438103] AS:00000000059d400b R2:0000000ffec5c00b R3:00000000c6c9c007 S:0000000314470001 P:00000000d0ab413d [ 1554.438158] Oops: 0011 ilc:2 [#1]SMP [ 1554.438175] Modules linked in: test_vmalloc(E+) nft_fib_inet(E) nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) nft_chain_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) nf_tables(E) sunrpc(E) pkey_pckmo(E) uvdevice(E) s390_trng(E) rng_core(E) eadm_sch(E) vfio_ccw(E) mdev(E) vfio_iommu_type1(E) vfio(E) sch_fq_codel(E) drm(E) loop(E) i2c_core(E) drm_panel_orientation_quirks(E) nfnetlink(E) ctcm(E) fsm(E) zfcp(E) scsi_transport_fc(E) diag288_wdt(E) watchdog(E) ghash_s390(E) prng(E) aes_s390(E) des_s390(E) libdes(E) sha3_512_s390(E) sha3_256_s390(E) sha512_s390(E) sha1_s390(E) sha_common(E) pkey(E) autofs4(E) [ 1554.438319] Unloaded tainted modules: pkey_uv(E):1 hmac_s390(E):2 [ 1554.438354] CPU: 1 UID: 0 PID: 1715 Comm: vmalloc_test/0 Kdump: loaded Tainted: G E 6.16.0-gcc-ipte-kasan-11657-gb2d930c4950e #350 PREEMPT [ 1554.438368] Tainted: [E]=UNSIGNED_MODULE [ 1554.438374] Hardware name: IBM 8561 T01 703 (LPAR) [ 1554.438381] Krnl PSW : 0704e00180000000 00007fffe1d3d6ae (memset+0x5e/0x98) [ 1554.438396] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 [ 1554.438409] Krnl GPRS: 0000000000000001 001c0ff0066f0000 001c0ff0066f0000 00000000000000f8 [ 1554.438418] 00000000000009fe 0000000000000009 0000000000000000 0000000000000002 [ 1554.438426] 0000000000005000 000078031ae655c8 00000feffdcf9f59 0000780258672a20 [ 1554.438433] 0000780243153500 00007f8033780000 00007fffe083a510 00007f7fee7cfa00 [ 1554.438452] Krnl Code: 00007fffe1d3d6a0: eb540008000c srlg %r5,%r4,8 00007fffe1d3d6a6: b9020055 ltgr %r5,%r5 #00007fffe1d3d6aa: a784000b brc 8,00007fffe1d3d6c0 >00007fffe1d3d6ae: 42301000 stc %r3,0(%r1) 00007fffe1d3d6b2: d2fe10011000 mvc 1(255,%r1),0(%r1) 00007fffe1d3d6b8: 41101100 la %r1,256(%r1) 00007fffe1d3d6bc: a757fff9 brctg %r5,00007fffe1d3d6ae 00007fffe1d3d6c0: 42301000 stc %r3,0(%r1) [ 1554.438539] Call Trace: [ 1554.438545] [<00007fffe1d3d6ae>] memset+0x5e/0x98 [ 1554.438552] ([<00007fffe083a510>] remove_vm_area+0x220/0x400) [ 1554.438562] [<00007fffe083a9d6>] vfree.part.0+0x26/0x810 [ 1554.438569] [<00007fff6073bd50>] fix_align_alloc_test+0x50/0x90 [test_vmalloc] [ 1554.438583] [<00007fff6073c73a>] test_func+0x46a/0x6c0 [test_vmalloc] [ 1554.438593] [<00007fffe0283ac8>] kthread+0x3f8/0x7a0 [ 1554.438603] [<00007fffe014d8b4>] __ret_from_fork+0xd4/0x7d0 [ 1554.438613] [<00007fffe299ac0a>] ret_from_fork+0xa/0x30 [ 1554.438622] INFO: lockdep is turned off. [ 1554.438627] Last Breaking-Event-Address: [ 1554.438632] [<00007fffe1d3d65c>] memset+0xc/0x98 [ 1554.438644] Kernel panic - not syncing: Fatal exception: panic_on_oops This series fixes the above issues and is a pre-requisite for the s390 lazy MMU mode implementation. test_vmalloc was used to stress-test the fixes. This patch (of 2): When vmalloc shadow memory is established the modification of the corresponding page tables is not protected by any locks. Instead, the locking is done per-PTE. This scheme however has defects. kasan_populate_vmalloc_pte() - while ptep_get() read is atomic the sequence pte_none(ptep_get()) is not. Doing that outside of the lock might lead to a concurrent PTE update and what could be seen as a shadow memory corruption as result. kasan_depopulate_vmalloc_pte() - by the time a page whose address was extracted from ptep_get() read and cached in a local variable outside of the lock is attempted to get free, could actually be freed already. To avoid these put ptep_get() itself and the code that manipulates the result of the read under lock. In addition, move freeing of the page out of the atomic context. Link: https://lkml.kernel.org/r/cover.1755528662.git.agordeev@linux.ibm.com Link: https://lkml.kernel.org/r/adb258634194593db294c0d1fb35646e894d6ead.1755528662.git.agordeev@linux.ibm.com Link: https://lore.kernel.org/linux-mm/5b0609c9-95ee-4e48-bb6d-98f57c5d2c31@arm.com/ [1] Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory") Signed-off-by: Alexander Gordeev Cc: Andrey Ryabinin Cc: Daniel Axtens Cc: Marc Rutland Cc: Ryan Roberts Signed-off-by: Andrew Morton --- mm/kasan/shadow.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index d2c70cd2afb1..4d846d146d02 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -305,9 +305,6 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, pte_t pte; int index; - if (likely(!pte_none(ptep_get(ptep)))) - return 0; - index = PFN_DOWN(addr - data->start); page = data->pages[index]; __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE); @@ -461,18 +458,19 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { - unsigned long page; - - page = (unsigned long)__va(pte_pfn(ptep_get(ptep)) << PAGE_SHIFT); + pte_t pte; + int none; spin_lock(&init_mm.page_table_lock); - - if (likely(!pte_none(ptep_get(ptep)))) { + pte = ptep_get(ptep); + none = pte_none(pte); + if (likely(!none)) pte_clear(&init_mm, addr, ptep); - free_page(page); - } spin_unlock(&init_mm.page_table_lock); + if (likely(!none)) + __free_page(pfn_to_page(pte_pfn(pte))); + return 0; } From c519c3c0a1133c408e83a383aa4dd30010aa5d71 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 18 Aug 2025 18:39:13 +0200 Subject: [PATCH 0915/1307] mm/kasan: avoid lazy MMU mode hazards Functions __kasan_populate_vmalloc() and __kasan_depopulate_vmalloc() use apply_to_pte_range(), which enters lazy MMU mode. In that mode updating PTEs may not be observed until the mode is left. That may lead to a situation in which otherwise correct reads and writes to a PTE using ptep_get(), set_pte(), pte_clear() and other access primitives bring wrong results when the vmalloc shadow memory is being (de-)populated. To avoid these hazards leave the lazy MMU mode before and re-enter it after each PTE manipulation. Link: https://lkml.kernel.org/r/0d2efb7ddddbff6b288fbffeeb10166e90771718.1755528662.git.agordeev@linux.ibm.com Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory") Signed-off-by: Alexander Gordeev Cc: Andrey Ryabinin Cc: Daniel Axtens Cc: Marc Rutland Cc: Ryan Roberts Signed-off-by: Andrew Morton --- mm/kasan/shadow.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 4d846d146d02..e2ceebf737ef 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -305,6 +305,8 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, pte_t pte; int index; + arch_leave_lazy_mmu_mode(); + index = PFN_DOWN(addr - data->start); page = data->pages[index]; __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE); @@ -317,6 +319,8 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, } spin_unlock(&init_mm.page_table_lock); + arch_enter_lazy_mmu_mode(); + return 0; } @@ -461,6 +465,8 @@ static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, pte_t pte; int none; + arch_leave_lazy_mmu_mode(); + spin_lock(&init_mm.page_table_lock); pte = ptep_get(ptep); none = pte_none(pte); @@ -471,6 +477,8 @@ static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, if (likely(!none)) __free_page(pfn_to_page(pte_pfn(pte))); + arch_enter_lazy_mmu_mode(); + return 0; } From 51337a9a3a404fde0f5337662ffc7699793dfeb5 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Thu, 21 Aug 2025 13:07:35 +0100 Subject: [PATCH 0916/1307] kasan: fix GCC mem-intrinsic prefix with sw tags GCC doesn't support "hwasan-kernel-mem-intrinsic-prefix", only "asan-kernel-mem-intrinsic-prefix"[0], while LLVM supports both. This is already taken into account when checking "CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX", but not in the KASAN Makefile adding those parameters when "CONFIG_KASAN_SW_TAGS" is enabled. Replace the version check with "CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX", which already validates that mem-intrinsic prefix parameter can be used, and choose the correct name depending on compiler. GCC 13 and above trigger "CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX" which prevents `mem{cpy,move,set}()` being redefined in "mm/kasan/shadow.c" since commit 36be5cba99f6 ("kasan: treat meminstrinsic as builtins in uninstrumented files"), as we expect the compiler to prefix those calls with `__(hw)asan_` instead. But as the option passed to GCC has been incorrect, the compiler has not been emitting those prefixes, effectively never calling the instrumented versions of `mem{cpy,move,set}()` with "CONFIG_KASAN_SW_TAGS" enabled. If "CONFIG_FORTIFY_SOURCES" is enabled, this issue would be mitigated as it redefines `mem{cpy,move,set}()` and properly aliases the `__underlying_mem*()` that will be called to the instrumented versions. Link: https://lkml.kernel.org/r/20250821120735.156244-1-ada.coupriediaz@arm.com Link: https://gcc.gnu.org/onlinedocs/gcc-13.4.0/gcc/Optimize-Options.html [0] Signed-off-by: Ada Couprie Diaz Fixes: 36be5cba99f6 ("kasan: treat meminstrinsic as builtins in uninstrumented files") Reviewed-by: Yeoreum Yun Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Marco Elver Cc: Marc Rutland Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- scripts/Makefile.kasan | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 693dbbebebba..0ba2aac3b8dc 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -86,10 +86,14 @@ kasan_params += hwasan-instrument-stack=$(stack_enable) \ hwasan-use-short-granules=0 \ hwasan-inline-all-checks=0 -# Instrument memcpy/memset/memmove calls by using instrumented __hwasan_mem*(). -ifeq ($(call clang-min-version, 150000)$(call gcc-min-version, 130000),y) - kasan_params += hwasan-kernel-mem-intrinsic-prefix=1 -endif +# Instrument memcpy/memset/memmove calls by using instrumented __(hw)asan_mem*(). +ifdef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX + ifdef CONFIG_CC_IS_GCC + kasan_params += asan-kernel-mem-intrinsic-prefix=1 + else + kasan_params += hwasan-kernel-mem-intrinsic-prefix=1 + endif +endif # CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX endif # CONFIG_KASAN_SW_TAGS From 6310c149e5dede74bb47110e0d7a38c78772c152 Mon Sep 17 00:00:00 2001 From: Brian Mak Date: Tue, 5 Aug 2025 14:15:26 -0700 Subject: [PATCH 0917/1307] kexec: add KEXEC_FILE_NO_CMA as a legal flag Commit 07d24902977e ("kexec: enable CMA based contiguous allocation") introduces logic to use CMA-based allocation in kexec by default. As part of the changes, it introduces a kexec_file_load flag to disable the use of CMA allocations from userspace. However, this flag is broken since it is missing from the list of legal flags for kexec_file_load. kexec_file_load returns EINVAL when attempting to use the flag. Fix this by adding the KEXEC_FILE_NO_CMA flag to the list of legal flags for kexec_file_load. Without this fix, kexec_file_load syscall will failed and return '-EINVAL' when KEXEC_FILE_NO_CMA is specified. Link: https://lkml.kernel.org/r/20250805211527.122367-2-makb@juniper.net Fixes: 07d24902977e ("kexec: enable CMA based contiguous allocation") Signed-off-by: Brian Mak Acked-by: Baoquan He Cc: Alexander Graf Cc: Borislav Betkov Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Rob Herring Cc: Saravana Kannan Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- include/linux/kexec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1b10a5d84b68..39fe3e6cd282 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -460,7 +460,8 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ - KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG) + KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \ + KEXEC_FILE_NO_CMA) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; From 9f68eabab9d9aaa764a8d234c4170119e6518102 Mon Sep 17 00:00:00 2001 From: Quanmin Yan Date: Thu, 21 Aug 2025 20:55:55 +0800 Subject: [PATCH 0918/1307] mm/damon/core: prevent unnecessary overflow in damos_set_effective_quota() On 32-bit systems, the throughput calculation in damos_set_effective_quota() is prone to unnecessary multiplication overflow. Using mult_frac() to fix it. Andrew Paniakin also recently found and privately reported this issue, on 64 bit systems. This can also happen on 64-bit systems, once the charged size exceeds ~17 TiB. On systems running for long time in production, this issue can actually happen. More specifically, when a DAMOS scheme having the time quota run for longtime, throughput calculation can overflow and set esz too small. As a result, speed of the scheme get unexpectedly slow. Link: https://lkml.kernel.org/r/20250821125555.3020951-1-yanquanmin1@huawei.com Fixes: 1cd243030059 ("mm/damon/schemes: implement time quota") Signed-off-by: Quanmin Yan Reported-by: Andrew Paniakin Reviewed-by: SeongJae Park Cc: Kefeng Wang Cc: ze zuo Cc: [5.16+] Signed-off-by: Andrew Morton --- mm/damon/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 70eff5cbe6ee..106ee8b0f2d5 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2073,8 +2073,8 @@ static void damos_set_effective_quota(struct damos_quota *quota) if (quota->ms) { if (quota->total_charged_ns) - throughput = quota->total_charged_sz * 1000000 / - quota->total_charged_ns; + throughput = mult_frac(quota->total_charged_sz, 1000000, + quota->total_charged_ns); else throughput = PAGE_SIZE * 1024; esz = min(throughput * quota->ms, esz); From c3576889d87b603cb66b417e08844a53c1077a37 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Thu, 7 Aug 2025 20:35:45 +0200 Subject: [PATCH 0919/1307] mm: fix accounting of memmap pages For !CONFIG_SPARSEMEM_VMEMMAP, memmap page accounting is currently done upfront in sparse_buffer_init(). However, sparse_buffer_alloc() may return NULL in failure scenario. Also, memmap pages may be allocated either from the memblock allocator during early boot or from the buddy allocator. When removed via arch_remove_memory(), accounting of memmap pages must reflect the original allocation source. To ensure correctness: * Account memmap pages after successful allocation in sparse_init_nid() and section_activate(). * Account memmap pages in section_deactivate() based on allocation source. Link: https://lkml.kernel.org/r/20250807183545.1424509-1-sumanthk@linux.ibm.com Fixes: 15995a352474 ("mm: report per-page metadata information") Signed-off-by: Sumanth Korikkar Suggested-by: David Hildenbrand Reviewed-by: Wei Yang Cc: Alexander Gordeev Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Signed-off-by: Andrew Morton --- mm/sparse-vmemmap.c | 5 ----- mm/sparse.c | 15 +++++++++------ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index fd2ab5118e13..41aa0493eb03 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -578,11 +578,6 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) - memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); - else - memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); - return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index 3c012cf83cc2..e6075b622407 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -454,9 +454,6 @@ static void __init sparse_buffer_init(unsigned long size, int nid) */ sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; -#ifndef CONFIG_SPARSEMEM_VMEMMAP - memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); -#endif } static void __init sparse_buffer_fini(void) @@ -567,6 +564,8 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, sparse_buffer_fini(); goto failed; } + memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page), + PAGE_SIZE)); sparse_init_early_section(nid, map, pnum, 0); } } @@ -680,7 +679,6 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) @@ -856,10 +854,14 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, * The memmap of early sections is always fully populated. See * section_activate() and pfn_valid() . */ - if (!section_is_early) + if (!section_is_early) { + memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE))); depopulate_section_memmap(pfn, nr_pages, altmap); - else if (memmap) + } else if (memmap) { + memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), + PAGE_SIZE))); free_map_bootmem(memmap); + } if (empty) ms->section_mem_map = (unsigned long)NULL; @@ -904,6 +906,7 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn, section_deactivate(pfn, nr_pages, altmap); return ERR_PTR(-ENOMEM); } + memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)); return memmap; } From 2ce3d282bd5050fca8577defeff08ada0d55d062 Mon Sep 17 00:00:00 2001 From: wangzijie Date: Mon, 18 Aug 2025 20:31:02 +0800 Subject: [PATCH 0920/1307] proc: fix missing pde_set_flags() for net proc files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid potential UAF issues during module removal races, we use pde_set_flags() to save proc_ops flags in PDE itself before proc_register(), and then use pde_has_proc_*() helpers instead of directly dereferencing pde->proc_ops->*. However, the pde_set_flags() call was missing when creating net related proc files. This omission caused incorrect behavior which FMODE_LSEEK was being cleared inappropriately in proc_reg_open() for net proc files. Lars reported it in this link[1]. Fix this by ensuring pde_set_flags() is called when register proc entry, and add NULL check for proc_ops in pde_set_flags(). [wangzijie1@honor.com: stash pde->proc_ops in a local const variable, per Christian] Link: https://lkml.kernel.org/r/20250821105806.1453833-1-wangzijie1@honor.com Link: https://lkml.kernel.org/r/20250818123102.959595-1-wangzijie1@honor.com Link: https://lore.kernel.org/all/20250815195616.64497967@chagall.paradoxon.rec/ [1] Fixes: ff7ec8dc1b64 ("proc: use the same treatment to check proc_lseek as ones for proc_read_iter et.al") Signed-off-by: wangzijie Reported-by: Lars Wendler Tested-by: Stefano Brivio Tested-by: Petr Vaněk Tested by: Lars Wendler Cc: Alexei Starovoitov Cc: Alexey Dobriyan Cc: Al Viro Cc: "Edgecombe, Rick P" Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Kirill A. Shutemov Cc: wangzijie Cc: Signed-off-by: Andrew Morton --- fs/proc/generic.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 76e800e38c8f..bd0c099cfdd2 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -367,6 +367,25 @@ static const struct inode_operations proc_dir_inode_operations = { .setattr = proc_notify_change, }; +static void pde_set_flags(struct proc_dir_entry *pde) +{ + const struct proc_ops *proc_ops = pde->proc_ops; + + if (!proc_ops) + return; + + if (proc_ops->proc_flags & PROC_ENTRY_PERMANENT) + pde->flags |= PROC_ENTRY_PERMANENT; + if (proc_ops->proc_read_iter) + pde->flags |= PROC_ENTRY_proc_read_iter; +#ifdef CONFIG_COMPAT + if (proc_ops->proc_compat_ioctl) + pde->flags |= PROC_ENTRY_proc_compat_ioctl; +#endif + if (proc_ops->proc_lseek) + pde->flags |= PROC_ENTRY_proc_lseek; +} + /* returns the registered entry, or frees dp and returns NULL on failure */ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, struct proc_dir_entry *dp) @@ -374,6 +393,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, if (proc_alloc_inum(&dp->low_ino)) goto out_free_entry; + pde_set_flags(dp); + write_lock(&proc_subdir_lock); dp->parent = dir; if (pde_subdir_insert(dir, dp) == false) { @@ -561,20 +582,6 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, return p; } -static void pde_set_flags(struct proc_dir_entry *pde) -{ - if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT) - pde->flags |= PROC_ENTRY_PERMANENT; - if (pde->proc_ops->proc_read_iter) - pde->flags |= PROC_ENTRY_proc_read_iter; -#ifdef CONFIG_COMPAT - if (pde->proc_ops->proc_compat_ioctl) - pde->flags |= PROC_ENTRY_proc_compat_ioctl; -#endif - if (pde->proc_ops->proc_lseek) - pde->flags |= PROC_ENTRY_proc_lseek; -} - struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct proc_ops *proc_ops, void *data) @@ -585,7 +592,6 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, if (!p) return NULL; p->proc_ops = proc_ops; - pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_data); @@ -636,7 +642,6 @@ struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, p->proc_ops = &proc_seq_ops; p->seq_ops = ops; p->state_size = state_size; - pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_seq_private); @@ -667,7 +672,6 @@ struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, return NULL; p->proc_ops = &proc_single_ops; p->single_show = show; - pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_single_data); From 7cc183f2e67d19b03ee5c13a6664b8c6cc37ff9d Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 18 Aug 2025 11:02:04 +0900 Subject: [PATCH 0921/1307] mm: move page table sync declarations to linux/pgtable.h During our internal testing, we started observing intermittent boot failures when the machine uses 4-level paging and has a large amount of persistent memory: BUG: unable to handle page fault for address: ffffe70000000034 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP NOPTI RIP: 0010:__init_single_page+0x9/0x6d Call Trace: __init_zone_device_page+0x17/0x5d memmap_init_zone_device+0x154/0x1bb pagemap_range+0x2e0/0x40f memremap_pages+0x10b/0x2f0 devm_memremap_pages+0x1e/0x60 dev_dax_probe+0xce/0x2ec [device_dax] dax_bus_probe+0x6d/0xc9 [... snip ...] It turns out that the kernel panics while initializing vmemmap (struct page array) when the vmemmap region spans two PGD entries, because the new PGD entry is only installed in init_mm.pgd, but not in the page tables of other tasks. And looking at __populate_section_memmap(): if (vmemmap_can_optimize(altmap, pgmap)) // does not sync top level page tables r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap); else // sync top level page tables in x86 r = vmemmap_populate(start, end, nid, altmap); In the normal path, vmemmap_populate() in arch/x86/mm/init_64.c synchronizes the top level page table (See commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes")) so that all tasks in the system can see the new vmemmap area. However, when vmemmap_can_optimize() returns true, the optimized path skips synchronization of top-level page tables. This is because vmemmap_populate_compound_pages() is implemented in core MM code, which does not handle synchronization of the top-level page tables. Instead, the core MM has historically relied on each architecture to perform this synchronization manually. We're not the first party to encounter a crash caused by not-sync'd top level page tables: earlier this year, Gwan-gyeong Mun attempted to address the issue [1] [2] after hitting a kernel panic when x86 code accessed the vmemmap area before the corresponding top-level entries were synced. At that time, the issue was believed to be triggered only when struct page was enlarged for debugging purposes, and the patch did not get further updates. It turns out that current approach of relying on each arch to handle the page table sync manually is fragile because 1) it's easy to forget to sync the top level page table, and 2) it's also easy to overlook that the kernel should not access the vmemmap and direct mapping areas before the sync. # The solution: Make page table sync more code robust and harder to miss To address this, Dave Hansen suggested [3] [4] introducing {pgd,p4d}_populate_kernel() for updating kernel portion of the page tables and allow each architecture to explicitly perform synchronization when installing top-level entries. With this approach, we no longer need to worry about missing the sync step, reducing the risk of future regressions. The new interface reuses existing ARCH_PAGE_TABLE_SYNC_MASK, PGTBL_P*D_MODIFIED and arch_sync_kernel_mappings() facility used by vmalloc and ioremap to synchronize page tables. pgd_populate_kernel() looks like this: static inline void pgd_populate_kernel(unsigned long addr, pgd_t *pgd, p4d_t *p4d) { pgd_populate(&init_mm, pgd, p4d); if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) arch_sync_kernel_mappings(addr, addr); } It is worth noting that vmalloc() and apply_to_range() carefully synchronizes page tables by calling p*d_alloc_track() and arch_sync_kernel_mappings(), and thus they are not affected by this patch series. This series was hugely inspired by Dave Hansen's suggestion and hence added Suggested-by: Dave Hansen. Cc stable because lack of this series opens the door to intermittent boot failures. This patch (of 3): Move ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to linux/pgtable.h so that they can be used outside of vmalloc and ioremap. Link: https://lkml.kernel.org/r/20250818020206.4517-1-harry.yoo@oracle.com Link: https://lkml.kernel.org/r/20250818020206.4517-2-harry.yoo@oracle.com Link: https://lore.kernel.org/linux-mm/20250220064105.808339-1-gwan-gyeong.mun@intel.com [1] Link: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@intel.com [2] Link: https://lore.kernel.org/linux-mm/d1da214c-53d3-45ac-a8b6-51821c5416e4@intel.com [3] Link: https://lore.kernel.org/linux-mm/4d800744-7b88-41aa-9979-b245e8bf794b@intel.com [4] Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Harry Yoo Acked-by: Kiryl Shutsemau Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: "Uladzislau Rezki (Sony)" Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: Alistair Popple Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: bibo mao Cc: Borislav Betkov Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Dev Jain Cc: Dmitriy Vyukov Cc: Gwan-gyeong Mun Cc: Ingo Molnar Cc: Jane Chu Cc: Joao Martins Cc: Joerg Roedel Cc: John Hubbard Cc: Kevin Brodsky Cc: Liam Howlett Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 16 ++++++++++++++++ include/linux/vmalloc.h | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 4c035637eeb7..ba699df6ef69 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1467,6 +1467,22 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned } #endif +/* + * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values + * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() + * needs to be called. + */ +#ifndef ARCH_PAGE_TABLE_SYNC_MASK +#define ARCH_PAGE_TABLE_SYNC_MASK 0 +#endif + +/* + * There is no default implementation for arch_sync_kernel_mappings(). It is + * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK + * is 0. + */ +void arch_sync_kernel_mappings(unsigned long start, unsigned long end); + #endif /* CONFIG_MMU */ /* diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index fdc9aeb74a44..2759dac6be44 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -219,22 +219,6 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); -/* - * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values - * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() - * needs to be called. - */ -#ifndef ARCH_PAGE_TABLE_SYNC_MASK -#define ARCH_PAGE_TABLE_SYNC_MASK 0 -#endif - -/* - * There is no default implementation for arch_sync_kernel_mappings(). It is - * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK - * is 0. - */ -void arch_sync_kernel_mappings(unsigned long start, unsigned long end); - /* * Lowlevel-APIs (not for driver use!) */ From f2d2f9598ebb0158a3fe17cda0106d7752e654a2 Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 18 Aug 2025 11:02:05 +0900 Subject: [PATCH 0922/1307] mm: introduce and use {pgd,p4d}_populate_kernel() Introduce and use {pgd,p4d}_populate_kernel() in core MM code when populating PGD and P4D entries for the kernel address space. These helpers ensure proper synchronization of page tables when updating the kernel portion of top-level page tables. Until now, the kernel has relied on each architecture to handle synchronization of top-level page tables in an ad-hoc manner. For example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes"). However, this approach has proven fragile for following reasons: 1) It is easy to forget to perform the necessary page table synchronization when introducing new changes. For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory savings for compound devmaps") overlooked the need to synchronize page tables for the vmemmap area. 2) It is also easy to overlook that the vmemmap and direct mapping areas must not be accessed before explicit page table synchronization. For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")) caused crashes by accessing the vmemmap area before calling sync_global_pgds(). To address this, as suggested by Dave Hansen, introduce _kernel() variants of the page table population helpers, which invoke architecture-specific hooks to properly synchronize page tables. These are introduced in a new header file, include/linux/pgalloc.h, so they can be called from common code. They reuse existing infrastructure for vmalloc and ioremap. Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK, and the actual synchronization is performed by arch_sync_kernel_mappings(). This change currently targets only x86_64, so only PGD and P4D level helpers are introduced. Currently, these helpers are no-ops since no architecture sets PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK. In theory, PUD and PMD level helpers can be added later if needed by other architectures. For now, 32-bit architectures (x86-32 and arm) only handle PGTBL_PMD_MODIFIED, so p*d_populate_kernel() will never affect them unless we introduce a PMD level helper. [harry.yoo@oracle.com: fix KASAN build error due to p*d_populate_kernel()] Link: https://lkml.kernel.org/r/20250822020727.202749-1-harry.yoo@oracle.com Link: https://lkml.kernel.org/r/20250818020206.4517-3-harry.yoo@oracle.com Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Harry Yoo Suggested-by: Dave Hansen Acked-by: Kiryl Shutsemau Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: Alistair Popple Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: bibo mao Cc: Borislav Betkov Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Dev Jain Cc: Dmitriy Vyukov Cc: Gwan-gyeong Mun Cc: Ingo Molnar Cc: Jane Chu Cc: Joao Martins Cc: Joerg Roedel Cc: John Hubbard Cc: Kevin Brodsky Cc: Liam Howlett Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Huth Cc: "Uladzislau Rezki (Sony)" Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/pgalloc.h | 29 +++++++++++++++++++++++++++++ include/linux/pgtable.h | 13 +++++++------ mm/kasan/init.c | 12 ++++++------ mm/percpu.c | 6 +++--- mm/sparse-vmemmap.c | 6 +++--- 5 files changed, 48 insertions(+), 18 deletions(-) create mode 100644 include/linux/pgalloc.h diff --git a/include/linux/pgalloc.h b/include/linux/pgalloc.h new file mode 100644 index 000000000000..9174fa59bbc5 --- /dev/null +++ b/include/linux/pgalloc.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PGALLOC_H +#define _LINUX_PGALLOC_H + +#include +#include + +/* + * {pgd,p4d}_populate_kernel() are defined as macros to allow + * compile-time optimization based on the configured page table levels. + * Without this, linking may fail because callers (e.g., KASAN) may rely + * on calls to these functions being optimized away when passing symbols + * that exist only for certain page table levels. + */ +#define pgd_populate_kernel(addr, pgd, p4d) \ + do { \ + pgd_populate(&init_mm, pgd, p4d); \ + if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) \ + arch_sync_kernel_mappings(addr, addr); \ + } while (0) + +#define p4d_populate_kernel(addr, p4d, pud) \ + do { \ + p4d_populate(&init_mm, p4d, pud); \ + if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED) \ + arch_sync_kernel_mappings(addr, addr); \ + } while (0) + +#endif /* _LINUX_PGALLOC_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ba699df6ef69..2b80fd456c8b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1469,8 +1469,8 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values - * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() - * needs to be called. + * and let generic vmalloc, ioremap and page table update code know when + * arch_sync_kernel_mappings() needs to be called. */ #ifndef ARCH_PAGE_TABLE_SYNC_MASK #define ARCH_PAGE_TABLE_SYNC_MASK 0 @@ -1954,10 +1954,11 @@ static inline bool arch_has_pfn_modify_check(void) /* * Page Table Modification bits for pgtbl_mod_mask. * - * These are used by the p?d_alloc_track*() set of functions an in the generic - * vmalloc/ioremap code to track at which page-table levels entries have been - * modified. Based on that the code can better decide when vmalloc and ioremap - * mapping changes need to be synchronized to other page-tables in the system. + * These are used by the p?d_alloc_track*() and p*d_populate_kernel() + * functions in the generic vmalloc, ioremap and page table update code + * to track at which page-table levels entries have been modified. + * Based on that the code can better decide when page table changes need + * to be synchronized to other page-tables in the system. */ #define __PGTBL_PGD_MODIFIED 0 #define __PGTBL_P4D_MODIFIED 1 diff --git a/mm/kasan/init.c b/mm/kasan/init.c index ced6b29fcf76..8fce3370c84e 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -13,9 +13,9 @@ #include #include #include +#include #include -#include #include "kasan.h" @@ -191,7 +191,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, pud_t *pud; pmd_t *pmd; - p4d_populate(&init_mm, p4d, + p4d_populate_kernel(addr, p4d, lm_alias(kasan_early_shadow_pud)); pud = pud_offset(p4d, addr); pud_populate(&init_mm, pud, @@ -212,7 +212,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, } else { p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); pud_init(p); - p4d_populate(&init_mm, p4d, p); + p4d_populate_kernel(addr, p4d, p); } } zero_pud_populate(p4d, addr, next); @@ -251,10 +251,10 @@ int __ref kasan_populate_early_shadow(const void *shadow_start, * puds,pmds, so pgd_populate(), pud_populate() * is noops. */ - pgd_populate(&init_mm, pgd, + pgd_populate_kernel(addr, pgd, lm_alias(kasan_early_shadow_p4d)); p4d = p4d_offset(pgd, addr); - p4d_populate(&init_mm, p4d, + p4d_populate_kernel(addr, p4d, lm_alias(kasan_early_shadow_pud)); pud = pud_offset(p4d, addr); pud_populate(&init_mm, pud, @@ -273,7 +273,7 @@ int __ref kasan_populate_early_shadow(const void *shadow_start, if (!p) return -ENOMEM; } else { - pgd_populate(&init_mm, pgd, + pgd_populate_kernel(addr, pgd, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } } diff --git a/mm/percpu.c b/mm/percpu.c index d9cbaee92b60..a56f35dcc417 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -3108,7 +3108,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, #endif /* BUILD_EMBED_FIRST_CHUNK */ #ifdef BUILD_PAGE_FIRST_CHUNK -#include +#include #ifndef P4D_TABLE_SIZE #define P4D_TABLE_SIZE PAGE_SIZE @@ -3134,13 +3134,13 @@ void __init __weak pcpu_populate_pte(unsigned long addr) if (pgd_none(*pgd)) { p4d = memblock_alloc_or_panic(P4D_TABLE_SIZE, P4D_TABLE_SIZE); - pgd_populate(&init_mm, pgd, p4d); + pgd_populate_kernel(addr, pgd, p4d); } p4d = p4d_offset(pgd, addr); if (p4d_none(*p4d)) { pud = memblock_alloc_or_panic(PUD_TABLE_SIZE, PUD_TABLE_SIZE); - p4d_populate(&init_mm, p4d, pud); + p4d_populate_kernel(addr, p4d, pud); } pud = pud_offset(p4d, addr); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 41aa0493eb03..dbd8daccade2 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include #include #include "hugetlb_vmemmap.h" @@ -229,7 +229,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) if (!p) return NULL; pud_init(p); - p4d_populate(&init_mm, p4d, p); + p4d_populate_kernel(addr, p4d, p); } return p4d; } @@ -241,7 +241,7 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; - pgd_populate(&init_mm, pgd, p); + pgd_populate_kernel(addr, pgd, p); } return pgd; } From 6659d027998083fbb6d42a165b0c90dc2e8ba989 Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 18 Aug 2025 11:02:06 +0900 Subject: [PATCH 0923/1307] x86/mm/64: define ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() Define ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to ensure page tables are properly synchronized when calling p*d_populate_kernel(). For 5-level paging, synchronization is performed via pgd_populate_kernel(). In 4-level paging, pgd_populate() is a no-op, so synchronization is instead performed at the P4D level via p4d_populate_kernel(). This fixes intermittent boot failures on systems using 4-level paging and a large amount of persistent memory: BUG: unable to handle page fault for address: ffffe70000000034 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP NOPTI RIP: 0010:__init_single_page+0x9/0x6d Call Trace: __init_zone_device_page+0x17/0x5d memmap_init_zone_device+0x154/0x1bb pagemap_range+0x2e0/0x40f memremap_pages+0x10b/0x2f0 devm_memremap_pages+0x1e/0x60 dev_dax_probe+0xce/0x2ec [device_dax] dax_bus_probe+0x6d/0xc9 [... snip ...] It also fixes a crash in vmemmap_set_pmd() caused by accessing vmemmap before sync_global_pgds() [1]: BUG: unable to handle page fault for address: ffffeb3ff1200000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: Oops: 0002 [#1] PREEMPT SMP NOPTI Tainted: [W]=WARN RIP: 0010:vmemmap_set_pmd+0xff/0x230 vmemmap_populate_hugepages+0x176/0x180 vmemmap_populate+0x34/0x80 __populate_section_memmap+0x41/0x90 sparse_add_section+0x121/0x3e0 __add_pages+0xba/0x150 add_pages+0x1d/0x70 memremap_pages+0x3dc/0x810 devm_memremap_pages+0x1c/0x60 xe_devm_add+0x8b/0x100 [xe] xe_tile_init_noalloc+0x6a/0x70 [xe] xe_device_probe+0x48c/0x740 [xe] [... snip ...] Link: https://lkml.kernel.org/r/20250818020206.4517-4-harry.yoo@oracle.com Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Harry Yoo Closes: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@intel.com [1] Suggested-by: Dave Hansen Acked-by: Kiryl Shutsemau Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: Alistair Popple Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: bibo mao Cc: Borislav Betkov Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Dev Jain Cc: Dmitriy Vyukov Cc: Ingo Molnar Cc: Jane Chu Cc: Joao Martins Cc: Joerg Roedel Cc: John Hubbard Cc: Kevin Brodsky Cc: Liam Howlett Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Huth Cc: "Uladzislau Rezki (Sony)" Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- arch/x86/include/asm/pgtable_64_types.h | 3 +++ arch/x86/mm/init_64.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 4604f924d8b8..7eb61ef6a185 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -36,6 +36,9 @@ static inline bool pgtable_l5_enabled(void) #define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57) #endif /* USE_EARLY_PGTABLE_L5 */ +#define ARCH_PAGE_TABLE_SYNC_MASK \ + (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED) + extern unsigned int pgdir_shift; extern unsigned int ptrs_per_p4d; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 76e33bd7c556..b9426fce5f3e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -223,6 +223,24 @@ static void sync_global_pgds(unsigned long start, unsigned long end) sync_global_pgds_l4(start, end); } +/* + * Make kernel mappings visible in all page tables in the system. + * This is necessary except when the init task populates kernel mappings + * during the boot process. In that case, all processes originating from + * the init task copies the kernel mappings, so there is no issue. + * Otherwise, missing synchronization could lead to kernel crashes due + * to missing page table entries for certain kernel mappings. + * + * Synchronization is performed at the top level, which is the PGD in + * 5-level paging systems. But in 4-level paging systems, however, + * pgd_populate() is a no-op, so synchronization is done at the P4D level. + * sync_global_pgds() handles this difference between paging levels. + */ +void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +{ + sync_global_pgds(start, end); +} + /* * NOTE: This function is marked __ref because it calls __init function * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. From a6358f8cf64850f3f27857b8ed8c1b08cfc4685c Mon Sep 17 00:00:00 2001 From: Li Nan Date: Wed, 27 Aug 2025 15:39:54 +0800 Subject: [PATCH 0924/1307] efivarfs: Fix slab-out-of-bounds in efivarfs_d_compare Observed on kernel 6.6 (present on master as well): BUG: KASAN: slab-out-of-bounds in memcmp+0x98/0xd0 Call trace: kasan_check_range+0xe8/0x190 __asan_loadN+0x1c/0x28 memcmp+0x98/0xd0 efivarfs_d_compare+0x68/0xd8 __d_lookup_rcu_op_compare+0x178/0x218 __d_lookup_rcu+0x1f8/0x228 d_alloc_parallel+0x150/0x648 lookup_open.isra.0+0x5f0/0x8d0 open_last_lookups+0x264/0x828 path_openat+0x130/0x3f8 do_filp_open+0x114/0x248 do_sys_openat2+0x340/0x3c0 __arm64_sys_openat+0x120/0x1a0 If dentry->d_name.len < EFI_VARIABLE_GUID_LEN , 'guid' can become negative, leadings to oob. The issue can be triggered by parallel lookups using invalid filename: T1 T2 lookup_open ->lookup simple_lookup d_add // invalid dentry is added to hash list lookup_open d_alloc_parallel __d_lookup_rcu __d_lookup_rcu_op_compare hlist_bl_for_each_entry_rcu // invalid dentry can be retrieved ->d_compare efivarfs_d_compare // oob Fix it by checking 'guid' before cmp. Fixes: da27a24383b2 ("efivarfs: guid part of filenames are case-insensitive") Signed-off-by: Li Nan Signed-off-by: Wu Guanghao Signed-off-by: Ard Biesheuvel --- fs/efivarfs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index c4a139911356..4bb4002e3cdf 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -152,6 +152,10 @@ static int efivarfs_d_compare(const struct dentry *dentry, { int guid = len - EFI_VARIABLE_GUID_LEN; + /* Parallel lookups may produce a temporary invalid filename */ + if (guid <= 0) + return 1; + if (name->len != len) return 1; From dac978e51cce0c1f00a14c4a82f81d387f79b2d4 Mon Sep 17 00:00:00 2001 From: Neil Mandir Date: Tue, 26 Aug 2025 10:30:22 -0400 Subject: [PATCH 0925/1307] net: macb: Disable clocks once When the driver is removed the clocks are disabled twice: once in macb_remove and a second time by runtime pm. Disable wakeup in remove so all the clocks are disabled and skip the second call to macb_clks_disable. Always suspend the device as we always set it active in probe. Fixes: d54f89af6cc4 ("net: macb: Add pm runtime support") Signed-off-by: Neil Mandir Co-developed-by: Sean Anderson Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20250826143022.935521-1-sean.anderson@linux.dev Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 106885451147..16d28a8b3b56 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5404,14 +5404,11 @@ static void macb_remove(struct platform_device *pdev) mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); + device_set_wakeup_enable(&bp->pdev->dev, 0); cancel_work_sync(&bp->hresp_err_bh_work); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - if (!pm_runtime_suspended(&pdev->dev)) { - macb_clks_disable(bp->pclk, bp->hclk, bp->tx_clk, - bp->rx_clk, bp->tsu_clk); - pm_runtime_set_suspended(&pdev->dev); - } + pm_runtime_set_suspended(&pdev->dev); phylink_destroy(bp->phylink); free_netdev(dev); } From 5189446ba995556eaa3755a6e875bc06675b88bd Mon Sep 17 00:00:00 2001 From: Oscar Maes Date: Wed, 27 Aug 2025 08:23:21 +0200 Subject: [PATCH 0926/1307] net: ipv4: fix regression in local-broadcast routes Commit 9e30ecf23b1b ("net: ipv4: fix incorrect MTU in broadcast routes") introduced a regression where local-broadcast packets would have their gateway set in __mkroute_output, which was caused by fi = NULL being removed. Fix this by resetting the fib_info for local-broadcast packets. This preserves the intended changes for directed-broadcast packets. Cc: stable@vger.kernel.org Fixes: 9e30ecf23b1b ("net: ipv4: fix incorrect MTU in broadcast routes") Reported-by: Brett A C Sheffield Closes: https://lore.kernel.org/regressions/20250822165231.4353-4-bacs@librecast.net Signed-off-by: Oscar Maes Reviewed-by: David Ahern Link: https://patch.msgid.link/20250827062322.4807-1-oscmaes92@gmail.com Signed-off-by: Paolo Abeni --- net/ipv4/route.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f639a2ae881a..baa43e5966b1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2575,12 +2575,16 @@ static struct rtable *__mkroute_output(const struct fib_result *res, !netif_is_l3_master(dev_out)) return ERR_PTR(-EINVAL); - if (ipv4_is_lbcast(fl4->daddr)) + if (ipv4_is_lbcast(fl4->daddr)) { type = RTN_BROADCAST; - else if (ipv4_is_multicast(fl4->daddr)) + + /* reset fi to prevent gateway resolution */ + fi = NULL; + } else if (ipv4_is_multicast(fl4->daddr)) { type = RTN_MULTICAST; - else if (ipv4_is_zeronet(fl4->daddr)) + } else if (ipv4_is_zeronet(fl4->daddr)) { return ERR_PTR(-EINVAL); + } if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; From 8022629548949eb4d2e2207b893bfb6d486700cb Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 26 Aug 2025 06:30:01 +0000 Subject: [PATCH 0927/1307] ASoC: rsnd: tidyup direction name on rsnd_dai_connect() commit 2c6b6a3e8b93 ("ASoC: rsnd: use snd_pcm_direction_name()") uses snd_pcm_direction_name() instead of original method to get string "Playback" or "Capture". But io->substream might be NULL in this timing. Let's re-use original method. Fixes: 2c6b6a3e8b93 ("ASoC: rsnd: use snd_pcm_direction_name()") Reported-by: Thuan Nguyen Tested-by: Thuan Nguyen Signed-off-by: Kuninori Morimoto Message-ID: <87zfbmwq6v.wl-kuninori.morimoto.gx@renesas.com> Signed-off-by: Mark Brown --- sound/soc/renesas/rcar/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/renesas/rcar/core.c b/sound/soc/renesas/rcar/core.c index 37d954495ea5..9f086906a2e5 100644 --- a/sound/soc/renesas/rcar/core.c +++ b/sound/soc/renesas/rcar/core.c @@ -597,7 +597,7 @@ int rsnd_dai_connect(struct rsnd_mod *mod, dev_dbg(dev, "%s is connected to io (%s)\n", rsnd_mod_name(mod), - snd_pcm_direction_name(io->substream->stream)); + rsnd_io_is_play(io) ? "Playback" : "Capture"); return 0; } From 224476613c8499f00ce4de975dd65749c5ca498c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 28 Aug 2025 09:55:26 +0300 Subject: [PATCH 0928/1307] wifi: iwlwifi: if scratch is ~0U, consider it a failure We want to see bits being set in the scratch register upon resume, but if all the bits are set, it means that we were kicked out of the PCI bus and that clearly doesn't mean we can assume the firmware is still alive after the suspend / resume cycle. Fixes: cb347bd29d0d ("wifi: iwlwifi: mvm: fix hibernation") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.0f203e559242.I59eff718cb5fda575db41081a1a389f7af488717@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index b7add05f7a85..46d8c1922292 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1222,11 +1222,15 @@ static int _iwl_pci_resume(struct device *device, bool restore) * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, * but not bits [15:8]. So if we have bits set in lower word, assume * the device is alive. + * Alternatively, if the scratch value is 0xFFFFFFFF, then we no longer + * have access to the device and consider it powered off. * For older devices, just try silently to grab the NIC. */ if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { - if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) & - CSR_FUNC_SCRATCH_POWER_OFF_MASK)) + u32 scratch = iwl_read32(trans, CSR_FUNC_SCRATCH); + + if (!(scratch & CSR_FUNC_SCRATCH_POWER_OFF_MASK) || + scratch == ~0U) device_was_powered_off = true; } else { /* From 7bf2dfccc2dd70821104d15cbab7b6fca21872be Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:27 +0300 Subject: [PATCH 0929/1307] wifi: iwlwifi: acpi: check DSM func validity The DSM func 0 (DSM_FUNC_QUERY) returns a bitmap of which other functions contain valid data, query and check it before returning other functions data. Fixes: 9db93491f29e ("iwlwifi: acpi: support device specific method (DSM)") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220085 Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.881e17ff8f6a.Ic6d92997d9d5fad127919d6e1b830cd3fe944468@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 25 ++++++++++++++++++- .../net/wireless/intel/iwlwifi/fw/runtime.h | 8 ++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index bee7d92293b8..7ec22738b5d6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -169,7 +169,7 @@ int iwl_acpi_get_dsm(struct iwl_fw_runtime *fwrt, BUILD_BUG_ON(ARRAY_SIZE(acpi_dsm_size) != DSM_FUNC_NUM_FUNCS); - if (WARN_ON(func >= ARRAY_SIZE(acpi_dsm_size))) + if (WARN_ON(func >= ARRAY_SIZE(acpi_dsm_size) || !func)) return -EINVAL; expected_size = acpi_dsm_size[func]; @@ -178,6 +178,29 @@ int iwl_acpi_get_dsm(struct iwl_fw_runtime *fwrt, if (expected_size != sizeof(u8) && expected_size != sizeof(u32)) return -EOPNOTSUPP; + if (!fwrt->acpi_dsm_funcs_valid) { + ret = iwl_acpi_get_dsm_integer(fwrt->dev, ACPI_DSM_REV, + DSM_FUNC_QUERY, + &iwl_guid, &tmp, + acpi_dsm_size[DSM_FUNC_QUERY]); + if (ret) { + /* always indicate BIT(0) to avoid re-reading */ + fwrt->acpi_dsm_funcs_valid = BIT(0); + return ret; + } + + IWL_DEBUG_RADIO(fwrt, "ACPI DSM validity bitmap 0x%x\n", + (u32)tmp); + /* always indicate BIT(0) to avoid re-reading */ + fwrt->acpi_dsm_funcs_valid = tmp | BIT(0); + } + + if (!(fwrt->acpi_dsm_funcs_valid & BIT(func))) { + IWL_DEBUG_RADIO(fwrt, "ACPI DSM %d not indicated as valid\n", + func); + return -ENODATA; + } + ret = iwl_acpi_get_dsm_integer(fwrt->dev, ACPI_DSM_REV, func, &iwl_guid, &tmp, expected_size); if (ret) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index 0444a736c2b2..bd3bc2846cfa 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -113,6 +113,10 @@ struct iwl_txf_iter_data { * @phy_filters: specific phy filters as read from WPFC BIOS table * @ppag_bios_rev: PPAG BIOS revision * @ppag_bios_source: see &enum bios_source + * @acpi_dsm_funcs_valid: bitmap indicating which DSM values are valid, + * zero (default initialization) means it hasn't been read yet, + * and BIT(0) is set when it has since function 0 also has this + * bitmap and is always supported */ struct iwl_fw_runtime { struct iwl_trans *trans; @@ -189,6 +193,10 @@ struct iwl_fw_runtime { bool uats_valid; u8 uefi_tables_lock_status; struct iwl_phy_specific_cfg phy_filters; + +#ifdef CONFIG_ACPI + u32 acpi_dsm_funcs_valid; +#endif }; void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans, From 1d33694462fa7da451846c39d653585b61375992 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:28 +0300 Subject: [PATCH 0930/1307] wifi: iwlwifi: uefi: check DSM item validity The first array index is a bitmap indicating which of the other values are valid. Check that bitmap before returning a value. Fixes: fc7214c3c986 ("wifi: iwlwifi: read DSM functions from UEFI") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220085 Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.59ec52ff865e.I9e11f497a029eb38f481b2c90c43c0935285216d@changeid --- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index 48126ec6b94b..99a17b9323e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -747,6 +747,12 @@ int iwl_uefi_get_dsm(struct iwl_fw_runtime *fwrt, enum iwl_dsm_funcs func, goto out; } + if (!(data->functions[DSM_FUNC_QUERY] & BIT(func))) { + IWL_DEBUG_RADIO(fwrt, "DSM func %d not in 0x%x\n", + func, data->functions[DSM_FUNC_QUERY]); + goto out; + } + *value = data->functions[func]; IWL_DEBUG_RADIO(fwrt, From 75575e2d252afb29fdbcbeec4d67e042007add52 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 26 Aug 2025 20:26:01 +0300 Subject: [PATCH 0931/1307] wifi: mac80211: do not permit 40 MHz EHT operation on 5/6 GHz The EHT PHY requirements state that 80 MHz must be supported on the 5 and 6 GHz bands unless the STA is 20 MHz only. So if the channel width is limited to 40 MHz on a band other than 2.4 GHz, then disable EHT and downgrade to HE. The primary case where this can happen is if the hardware disables puncturing using IEEE80211_HW_DISALLOW_PUNCTURING. Signed-off-by: Benjamin Berg Cc: stable@vger.kernel.org Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250826202553.a6582f3abf57.Ic670429dc7127f68c818b4290d950ebfb5a0b9e1@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 8 ++++++++ net/mac80211/tests/chan-mode.c | 30 +++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1008eb8e9b13..dd650a127a31 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1189,6 +1189,14 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, "required MCSes not supported, disabling EHT\n"); } + if (conn->mode >= IEEE80211_CONN_MODE_EHT && + channel->band != NL80211_BAND_2GHZ && + conn->bw_limit == IEEE80211_CONN_BW_LIMIT_40) { + conn->mode = IEEE80211_CONN_MODE_HE; + link_id_info(sdata, link_id, + "required bandwidth not supported, disabling EHT\n"); + } + /* the mode can only decrease, so this must terminate */ if (ap_mode != conn->mode) { kfree(elems); diff --git a/net/mac80211/tests/chan-mode.c b/net/mac80211/tests/chan-mode.c index 96c7b3ab2744..adc069065e73 100644 --- a/net/mac80211/tests/chan-mode.c +++ b/net/mac80211/tests/chan-mode.c @@ -2,7 +2,7 @@ /* * KUnit tests for channel mode functions * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation */ #include #include @@ -28,6 +28,10 @@ static const struct determine_chan_mode_case { u8 vht_basic_mcs_1_4, vht_basic_mcs_5_8; u8 he_basic_mcs_1_4, he_basic_mcs_5_8; u8 eht_mcs7_min_nss; + u16 eht_disabled_subchannels; + u8 eht_bw; + enum ieee80211_conn_bw_limit conn_bw_limit; + enum ieee80211_conn_bw_limit expected_bw_limit; int error; } determine_chan_mode_cases[] = { { @@ -128,6 +132,14 @@ static const struct determine_chan_mode_case { .conn_mode = IEEE80211_CONN_MODE_EHT, .eht_mcs7_min_nss = 0x15, .error = EINVAL, + }, { + .desc = "80 MHz EHT is downgraded to 40 MHz HE due to puncturing", + .conn_mode = IEEE80211_CONN_MODE_EHT, + .expected_mode = IEEE80211_CONN_MODE_HE, + .conn_bw_limit = IEEE80211_CONN_BW_LIMIT_80, + .expected_bw_limit = IEEE80211_CONN_BW_LIMIT_40, + .eht_disabled_subchannels = 0x08, + .eht_bw = IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ, } }; KUNIT_ARRAY_PARAM_DESC(determine_chan_mode, determine_chan_mode_cases, desc) @@ -138,7 +150,7 @@ static void test_determine_chan_mode(struct kunit *test) struct t_sdata *t_sdata = T_SDATA(test); struct ieee80211_conn_settings conn = { .mode = params->conn_mode, - .bw_limit = IEEE80211_CONN_BW_LIMIT_20, + .bw_limit = params->conn_bw_limit, }; struct cfg80211_bss cbss = { .channel = &t_sdata->band_5ghz.channels[0], @@ -191,14 +203,21 @@ static void test_determine_chan_mode(struct kunit *test) 0x7f, 0x01, 0x00, 0x88, 0x88, 0x88, 0x00, 0x00, 0x00, /* EHT Operation */ - WLAN_EID_EXTENSION, 0x09, WLAN_EID_EXT_EHT_OPERATION, - 0x01, params->eht_mcs7_min_nss ? params->eht_mcs7_min_nss : 0x11, - 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, + WLAN_EID_EXTENSION, 0x0b, WLAN_EID_EXT_EHT_OPERATION, + 0x03, params->eht_mcs7_min_nss ? params->eht_mcs7_min_nss : 0x11, + 0x00, 0x00, 0x00, params->eht_bw, + params->eht_bw == IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ ? 42 : 36, + 0x00, + u16_get_bits(params->eht_disabled_subchannels, 0xff), + u16_get_bits(params->eht_disabled_subchannels, 0xff00), }; struct ieee80211_chan_req chanreq = {}; struct cfg80211_chan_def ap_chandef = {}; struct ieee802_11_elems *elems; + /* To force EHT downgrade to HE on punctured 80 MHz downgraded to 40 MHz */ + set_bit(IEEE80211_HW_DISALLOW_PUNCTURING, t_sdata->local.hw.flags); + if (params->strict) set_bit(IEEE80211_HW_STRICT, t_sdata->local.hw.flags); else @@ -237,6 +256,7 @@ static void test_determine_chan_mode(struct kunit *test) } else { KUNIT_ASSERT_NOT_ERR_OR_NULL(test, elems); KUNIT_ASSERT_EQ(test, conn.mode, params->expected_mode); + KUNIT_ASSERT_EQ(test, conn.bw_limit, params->expected_bw_limit); } } From 0e20450829ca3c1dbc2db536391537c57a40fe0b Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Fri, 15 Aug 2025 10:30:50 +0800 Subject: [PATCH 0932/1307] wifi: mwifiex: Initialize the chan_stats array to zero The adapter->chan_stats[] array is initialized in mwifiex_init_channel_scan_gap() with vmalloc(), which doesn't zero out memory. The array is filled in mwifiex_update_chan_statistics() and then the user can query the data in mwifiex_cfg80211_dump_survey(). There are two potential issues here. What if the user calls mwifiex_cfg80211_dump_survey() before the data has been filled in. Also the mwifiex_update_chan_statistics() function doesn't necessarily initialize the whole array. Since the array was not initialized at the start that could result in an information leak. Also this array is pretty small. It's a maximum of 900 bytes so it's more appropriate to use kcalloc() instead vmalloc(). Cc: stable@vger.kernel.org Fixes: bf35443314ac ("mwifiex: channel statistics support for mwifiex") Suggested-by: Dan Carpenter Signed-off-by: Qianfeng Rong Reviewed-by: Dan Carpenter Link: https://patch.msgid.link/20250815023055.477719-1-rongqianfeng@vivo.com Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 5 +++-- drivers/net/wireless/marvell/mwifiex/main.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 3498743d5ec0..4c8c7a5fdf23 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4673,8 +4673,9 @@ int mwifiex_init_channel_scan_gap(struct mwifiex_adapter *adapter) * additional active scan request for hidden SSIDs on passive channels. */ adapter->num_in_chan_stats = 2 * (n_channels_bg + n_channels_a); - adapter->chan_stats = vmalloc(array_size(sizeof(*adapter->chan_stats), - adapter->num_in_chan_stats)); + adapter->chan_stats = kcalloc(adapter->num_in_chan_stats, + sizeof(*adapter->chan_stats), + GFP_KERNEL); if (!adapter->chan_stats) return -ENOMEM; diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 7b50a88a18e5..1ec069bc8ea1 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -642,7 +642,7 @@ static int _mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; err_add_intf: - vfree(adapter->chan_stats); + kfree(adapter->chan_stats); err_init_chan_scan: wiphy_unregister(adapter->wiphy); wiphy_free(adapter->wiphy); @@ -1485,7 +1485,7 @@ static void mwifiex_uninit_sw(struct mwifiex_adapter *adapter) wiphy_free(adapter->wiphy); adapter->wiphy = NULL; - vfree(adapter->chan_stats); + kfree(adapter->chan_stats); mwifiex_free_cmd_buffers(adapter); } From 98b6fa62c84f2e129161e976a5b9b3cb4ccd117b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Aug 2025 15:27:30 -0600 Subject: [PATCH 0933/1307] io_uring/kbuf: always use READ_ONCE() to read ring provided buffer lengths Since the buffers are mapped from userspace, it is prudent to use READ_ONCE() to read the value into a local variable, and use that for any other actions taken. Having a stable read of the buffer length avoids worrying about it changing after checking, or being read multiple times. Similarly, the buffer may well change in between it being picked and being committed. Ensure the looping for incremental ring buffer commit stops if it hits a zero sized buffer, as no further progress can be made at that point. Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") Link: https://lore.kernel.org/io-uring/tencent_000C02641F6250C856D0C26228DE29A3D30A@qq.com/ Reported-by: Qingyue Zhang Reported-by: Suoxing Zhang Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 81a13338dfab..19a8bde5e1e1 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -36,15 +36,19 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) { while (len) { struct io_uring_buf *buf; - u32 this_len; + u32 buf_len, this_len; buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); - this_len = min_t(u32, len, buf->len); - buf->len -= this_len; - if (buf->len) { + buf_len = READ_ONCE(buf->len); + this_len = min_t(u32, len, buf_len); + buf_len -= this_len; + /* Stop looping for invalid buffer length of 0 */ + if (buf_len || !this_len) { buf->addr += this_len; + buf->len = buf_len; return false; } + buf->len = 0; bl->head++; len -= this_len; } @@ -159,6 +163,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, __u16 tail, head = bl->head; struct io_uring_buf *buf; void __user *ret; + u32 buf_len; tail = smp_load_acquire(&br->tail); if (unlikely(tail == head)) @@ -168,8 +173,9 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->flags |= REQ_F_BL_EMPTY; buf = io_ring_head_to_buf(br, head, bl->mask); - if (*len == 0 || *len > buf->len) - *len = buf->len; + buf_len = READ_ONCE(buf->len); + if (*len == 0 || *len > buf_len) + *len = buf_len; req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; req->buf_list = bl; req->buf_index = buf->bid; @@ -265,7 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, req->buf_index = buf->bid; do { - u32 len = buf->len; + u32 len = READ_ONCE(buf->len); /* truncate end piece, if needed, for non partial buffers */ if (len > arg->max_len) { From 22e6bdb129ec64e640f5cccef9686f7c1a7d559b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:29 +0300 Subject: [PATCH 0934/1307] wifi: iwlwifi: cfg: restore some 1000 series configs In the fixed commit, I inadvertently removed two configurations while combining the 0x0083/0x0084 device IDs. Replace the fixed matches for the BG versions by a masked match and add the BGN version back with a similar masked match. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220477 Fixes: 1fb053d9876f ("wifi: iwlwifi: cfg: remove unnecessary configs") Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250828095500.fabb99c2df9e.If0ad87bf9ab360da5f613e879fd416c17c544733@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 46d8c1922292..f5e72c90dd57 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -729,10 +729,10 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { DEVICE(0x0083), SUBDEV_MASKED(0x5, 0xF)), IWL_DEV_INFO(iwl1000_bg_cfg, iwl1000_bg_name, DEVICE(0x0083), SUBDEV_MASKED(0x6, 0xF)), + IWL_DEV_INFO(iwl1000_bgn_cfg, iwl1000_bgn_name, + DEVICE(0x0084), SUBDEV_MASKED(0x5, 0xF)), IWL_DEV_INFO(iwl1000_bg_cfg, iwl1000_bg_name, - DEVICE(0x0084), SUBDEV(0x1216)), - IWL_DEV_INFO(iwl1000_bg_cfg, iwl1000_bg_name, - DEVICE(0x0084), SUBDEV(0x1316)), + DEVICE(0x0084), SUBDEV_MASKED(0x6, 0xF)), /* 100 Series WiFi */ IWL_DEV_INFO(iwl100_bgn_cfg, iwl100_bgn_name, From 586e3cb33ba6890054b95aa0ade0a165890efabd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:30 +0300 Subject: [PATCH 0935/1307] wifi: iwlwifi: fix byte count table for old devices For devices handled by iwldvm, bc_table_dword was never set, but I missed that during the removal thereof. Change the logic to not treat the byte count table as dwords for devices older than 9000 series to fix that. Fixes: 6570ea227826 ("wifi: iwlwifi: remove bc_table_dword transport config") Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.eccd7d3939f1.Ibaffa06d0b3aa5f35a9451d94af34de208b8a2bc@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c index 84a05cc1c27a..d912e709a92c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c @@ -2092,7 +2092,8 @@ static void iwl_txq_gen1_update_byte_cnt_tbl(struct iwl_trans *trans, break; } - if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) + if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_9000 && + trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) len = DIV_ROUND_UP(len, 4); if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX)) From 019f71a6760a6f89d388c3cd45622d1aae7d3641 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Aug 2025 09:55:31 +0300 Subject: [PATCH 0936/1307] wifi: iwlwifi: cfg: add back more lost PCI IDs Add back a few more PCI IDs to the config match table that evidently I lost during the cleanups. Fixes: 1fb053d9876f ("wifi: iwlwifi: cfg: remove unnecessary configs") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250828095500.46fee422651e.I8f6c3e9eea9523bb1658f5690b715eb443740e07@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index f5e72c90dd57..f9e2095d6490 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -673,6 +673,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(iwl6005_n_cfg, iwl6005_2agn_sff_name, DEVICE(0x0082), SUBDEV_MASKED(0xC000, 0xF000)), + IWL_DEV_INFO(iwl6005_n_cfg, iwl6005_2agn_sff_name, + DEVICE(0x0085), SUBDEV_MASKED(0xC000, 0xF000)), IWL_DEV_INFO(iwl6005_n_cfg, iwl6005_2agn_d_name, DEVICE(0x0082), SUBDEV(0x4820)), IWL_DEV_INFO(iwl6005_n_cfg, iwl6005_2agn_mow1_name, @@ -964,6 +966,12 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { DEVICE(0x24F3), SUBDEV(0x0004)), IWL_DEV_INFO(iwl8260_cfg, iwl8260_2n_name, DEVICE(0x24F3), SUBDEV(0x0044)), + IWL_DEV_INFO(iwl8260_cfg, iwl8260_2ac_name, + DEVICE(0x24F4)), + IWL_DEV_INFO(iwl8260_cfg, iwl4165_2ac_name, + DEVICE(0x24F5)), + IWL_DEV_INFO(iwl8260_cfg, iwl4165_2ac_name, + DEVICE(0x24F6)), IWL_DEV_INFO(iwl8265_cfg, iwl8265_2ac_name, DEVICE(0x24FD)), IWL_DEV_INFO(iwl8265_cfg, iwl8275_2ac_name, From 497aa80ec7ee145b3606e7434d57091974d78598 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 27 Aug 2025 15:34:58 +0100 Subject: [PATCH 0937/1307] arm64: dts: rockchip: Add vcc-supply to SPI flash on Pinephone Pro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As documented in the PinephonePro-Schematic-V1.0-20211127.pdf, page 11, the SPI Flash's VCC pin is connected to VCC_1V8 power source. This fixes the following warning: spi-nor spi1.0: supply vcc not found, using dummy regulator Signed-off-by: Peter Robinson Reviewed-by: Ondřej Jirman Link: https://lore.kernel.org/r/20250827143501.1646163-1-pbrobinson@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts index 585ef0fd88ef..6f97e57f36f5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts @@ -754,6 +754,7 @@ flash@0 { compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <10000000>; + vcc-supply = <&vcc_1v8>; }; }; From f544bf03a771ee746b908e9a08ecb97c65a35055 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 31 Jul 2025 10:35:14 +0200 Subject: [PATCH 0938/1307] mtd: MTD_INTEL_DG should depend on DRM_I915 or DRM_XE Intel Discrete Graphics non-volatile memory is only present on Intel discrete graphics devices, and its auxiliary device is instantiated by the Intel i915 and Xe2 DRM drivers. Hence add dependencies on DRM_I915 and DRM_XE, to prevent asking the user about this driver when configuring a kernel without Intel graphics support. Fixes: ceb5ab3cb6463795 ("mtd: add driver for intel graphics non-volatile memory device") Signed-off-by: Geert Uytterhoeven Signed-off-by: Miquel Raynal --- drivers/mtd/devices/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index 46cebde79f34..e518dfeee654 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -185,8 +185,8 @@ config MTD_POWERNV_FLASH config MTD_INTEL_DG tristate "Intel Discrete Graphics non-volatile memory driver" - depends on AUXILIARY_BUS - depends on MTD + depends on AUXILIARY_BUS && MTD + depends on DRM_I915!=n || DRM_XE!=n || COMPILE_TEST help This provides an MTD device to access Intel Discrete Graphics non-volatile memory. From 1eae113dd5ff5192cfd3e11b6ab7b96193b42c01 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 30 Jul 2025 21:47:46 +0200 Subject: [PATCH 0939/1307] mtd: rawnand: nuvoton: Fix an error handling path in ma35_nand_chips_init() If a ma35_nand_chip_init() call fails, then a reference to 'nand_np' still needs to be released. Use for_each_child_of_node_scoped() to fix the issue. Fixes: 5abb5d414d55 ("mtd: rawnand: nuvoton: add new driver for the Nuvoton MA35 SoC") Signed-off-by: Christophe JAILLET Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c b/drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c index c23b537948d5..1a285cd8fad6 100644 --- a/drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c +++ b/drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c @@ -935,10 +935,10 @@ static void ma35_chips_cleanup(struct ma35_nand_info *nand) static int ma35_nand_chips_init(struct device *dev, struct ma35_nand_info *nand) { - struct device_node *np = dev->of_node, *nand_np; + struct device_node *np = dev->of_node; int ret; - for_each_child_of_node(np, nand_np) { + for_each_child_of_node_scoped(np, nand_np) { ret = ma35_nand_chip_init(dev, nand, nand_np); if (ret) { ma35_chips_cleanup(nand); From 513c40e59d5a414ab763a9c84797534b5e8c208d Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Tue, 12 Aug 2025 09:26:58 +0200 Subject: [PATCH 0940/1307] mtd: rawnand: stm32_fmc2: avoid overlapping mappings on ECC buffer Avoid below overlapping mappings by using a contiguous non-cacheable buffer. [ 4.077708] DMA-API: stm32_fmc2_nfc 48810000.nand-controller: cacheline tracking EEXIST, overlapping mappings aren't supported [ 4.089103] WARNING: CPU: 1 PID: 44 at kernel/dma/debug.c:568 add_dma_entry+0x23c/0x300 [ 4.097071] Modules linked in: [ 4.100101] CPU: 1 PID: 44 Comm: kworker/u4:2 Not tainted 6.1.82 #1 [ 4.106346] Hardware name: STMicroelectronics STM32MP257F VALID1 SNOR / MB1704 (LPDDR4 Power discrete) + MB1703 + MB1708 (SNOR MB1730) (DT) [ 4.118824] Workqueue: events_unbound deferred_probe_work_func [ 4.124674] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 4.131624] pc : add_dma_entry+0x23c/0x300 [ 4.135658] lr : add_dma_entry+0x23c/0x300 [ 4.139792] sp : ffff800009dbb490 [ 4.143016] x29: ffff800009dbb4a0 x28: 0000000004008022 x27: ffff8000098a6000 [ 4.150174] x26: 0000000000000000 x25: ffff8000099e7000 x24: ffff8000099e7de8 [ 4.157231] x23: 00000000ffffffff x22: 0000000000000000 x21: ffff8000098a6a20 [ 4.164388] x20: ffff000080964180 x19: ffff800009819ba0 x18: 0000000000000006 [ 4.171545] x17: 6361727420656e69 x16: 6c6568636163203a x15: 72656c6c6f72746e [ 4.178602] x14: 6f632d646e616e2e x13: ffff800009832f58 x12: 00000000000004ec [ 4.185759] x11: 00000000000001a4 x10: ffff80000988af58 x9 : ffff800009832f58 [ 4.192916] x8 : 00000000ffffefff x7 : ffff80000988af58 x6 : 80000000fffff000 [ 4.199972] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000 [ 4.207128] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000812d2c40 [ 4.214185] Call trace: [ 4.216605] add_dma_entry+0x23c/0x300 [ 4.220338] debug_dma_map_sg+0x198/0x350 [ 4.224373] __dma_map_sg_attrs+0xa0/0x110 [ 4.228411] dma_map_sg_attrs+0x10/0x2c [ 4.232247] stm32_fmc2_nfc_xfer.isra.0+0x1c8/0x3fc [ 4.237088] stm32_fmc2_nfc_seq_read_page+0xc8/0x174 [ 4.242127] nand_read_oob+0x1d4/0x8e0 [ 4.245861] mtd_read_oob_std+0x58/0x84 [ 4.249596] mtd_read_oob+0x90/0x150 [ 4.253231] mtd_read+0x68/0xac Signed-off-by: Christophe Kerello Cc: stable@vger.kernel.org Fixes: 2cd457f328c1 ("mtd: rawnand: stm32_fmc2: add STM32 FMC2 NAND flash controller driver") Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/stm32_fmc2_nand.c | 28 +++++++++----------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c index a960403081f1..222c3c3b6848 100644 --- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c +++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c @@ -272,6 +272,7 @@ struct stm32_fmc2_nfc { struct sg_table dma_data_sg; struct sg_table dma_ecc_sg; u8 *ecc_buf; + dma_addr_t dma_ecc_addr; int dma_ecc_len; u32 tx_dma_max_burst; u32 rx_dma_max_burst; @@ -902,17 +903,10 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf, if (!write_data && !raw) { /* Configure DMA ECC status */ - p = nfc->ecc_buf; for_each_sg(nfc->dma_ecc_sg.sgl, sg, eccsteps, s) { - sg_set_buf(sg, p, nfc->dma_ecc_len); - p += nfc->dma_ecc_len; - } - - ret = dma_map_sg(nfc->dev, nfc->dma_ecc_sg.sgl, - eccsteps, dma_data_dir); - if (!ret) { - ret = -EIO; - goto err_unmap_data; + sg_dma_address(sg) = nfc->dma_ecc_addr + + s * nfc->dma_ecc_len; + sg_dma_len(sg) = nfc->dma_ecc_len; } desc_ecc = dmaengine_prep_slave_sg(nfc->dma_ecc_ch, @@ -921,7 +915,7 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf, DMA_PREP_INTERRUPT); if (!desc_ecc) { ret = -ENOMEM; - goto err_unmap_ecc; + goto err_unmap_data; } reinit_completion(&nfc->dma_ecc_complete); @@ -929,7 +923,7 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf, desc_ecc->callback_param = &nfc->dma_ecc_complete; ret = dma_submit_error(dmaengine_submit(desc_ecc)); if (ret) - goto err_unmap_ecc; + goto err_unmap_data; dma_async_issue_pending(nfc->dma_ecc_ch); } @@ -949,7 +943,7 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf, if (!write_data && !raw) dmaengine_terminate_all(nfc->dma_ecc_ch); ret = -ETIMEDOUT; - goto err_unmap_ecc; + goto err_unmap_data; } /* Wait DMA data transfer completion */ @@ -969,11 +963,6 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf, } } -err_unmap_ecc: - if (!write_data && !raw) - dma_unmap_sg(nfc->dev, nfc->dma_ecc_sg.sgl, - eccsteps, dma_data_dir); - err_unmap_data: dma_unmap_sg(nfc->dev, nfc->dma_data_sg.sgl, eccsteps, dma_data_dir); @@ -1610,7 +1599,8 @@ static int stm32_fmc2_nfc_dma_setup(struct stm32_fmc2_nfc *nfc) return ret; /* Allocate a buffer to store ECC status registers */ - nfc->ecc_buf = devm_kzalloc(nfc->dev, FMC2_MAX_ECC_BUF_LEN, GFP_KERNEL); + nfc->ecc_buf = dmam_alloc_coherent(nfc->dev, FMC2_MAX_ECC_BUF_LEN, + &nfc->dma_ecc_addr, GFP_KERNEL); if (!nfc->ecc_buf) return -ENOMEM; From 811c0da4542df3c065f6cb843ced68780e27bb44 Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Tue, 12 Aug 2025 09:30:08 +0200 Subject: [PATCH 0941/1307] mtd: rawnand: stm32_fmc2: fix ECC overwrite In case OOB write is requested during a data write, ECC is currently lost. Avoid this issue by only writing in the free spare area. This issue has been seen with a YAFFS2 file system. Signed-off-by: Christophe Kerello Cc: stable@vger.kernel.org Fixes: 2cd457f328c1 ("mtd: rawnand: stm32_fmc2: add STM32 FMC2 NAND flash controller driver") Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/stm32_fmc2_nand.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c index 222c3c3b6848..d957327fb4fa 100644 --- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c +++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c @@ -985,9 +985,21 @@ static int stm32_fmc2_nfc_seq_write(struct nand_chip *chip, const u8 *buf, /* Write oob */ if (oob_required) { - ret = nand_change_write_column_op(chip, mtd->writesize, - chip->oob_poi, mtd->oobsize, - false); + unsigned int offset_in_page = mtd->writesize; + const void *buf = chip->oob_poi; + unsigned int len = mtd->oobsize; + + if (!raw) { + struct mtd_oob_region oob_free; + + mtd_ooblayout_free(mtd, 0, &oob_free); + offset_in_page += oob_free.offset; + buf += oob_free.offset; + len = oob_free.length; + } + + ret = nand_change_write_column_op(chip, offset_in_page, + buf, len, false); if (ret) return ret; } From fd779eac2d659668be4d3dbdac0710afd5d6db12 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 21 Aug 2025 14:00:57 +0200 Subject: [PATCH 0942/1307] mtd: nand: raw: atmel: Respect tAR, tCLR in read setup timing Having setup time 0 violates tAR, tCLR of some chips, for instance TOSHIBA TC58NVG2S3ETAI0 cannot be detected successfully (first ID byte being read duplicated, i.e. 98 98 dc 90 15 76 14 03 instead of 98 dc 90 15 76 ...). Atmel Application Notes postulated 1 cycle NRD_SETUP without explanation [1], but it looks more appropriate to just calculate setup time properly. [1] Link: https://ww1.microchip.com/downloads/aemDocuments/documents/MPU32/ApplicationNotes/ApplicationNotes/doc6255.pdf Cc: stable@vger.kernel.org Fixes: f9ce2eddf176 ("mtd: nand: atmel: Add ->setup_data_interface() hooks") Signed-off-by: Alexander Sverdlin Tested-by: Alexander Dahl Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/atmel/nand-controller.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index 84ab4a83cbd6..db94d14a3807 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -1377,14 +1377,24 @@ static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand, if (ret) return ret; + /* + * Read setup timing depends on the operation done on the NAND: + * + * NRD_SETUP = max(tAR, tCLR) + */ + timeps = max(conf->timings.sdr.tAR_min, conf->timings.sdr.tCLR_min); + ncycles = DIV_ROUND_UP(timeps, mckperiodps); + totalcycles += ncycles; + ret = atmel_smc_cs_conf_set_setup(smcconf, ATMEL_SMC_NRD_SHIFT, ncycles); + if (ret) + return ret; + /* * The read cycle timing is directly matching tRC, but is also * dependent on the setup and hold timings we calculated earlier, * which gives: * - * NRD_CYCLE = max(tRC, NRD_PULSE + NRD_HOLD) - * - * NRD_SETUP is always 0. + * NRD_CYCLE = max(tRC, NRD_SETUP + NRD_PULSE + NRD_HOLD) */ ncycles = DIV_ROUND_UP(conf->timings.sdr.tRC_min, mckperiodps); ncycles = max(totalcycles, ncycles); From c5c5eb24ed6177fc0ef4bb75fc18d07a99c1d3f0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Aug 2025 20:15:59 +0800 Subject: [PATCH 0943/1307] ublk: avoid ublk_io_release() called after ublk char dev is closed When running test_stress_04.sh, the following warning is triggered: WARNING: CPU: 1 PID: 135 at drivers/block/ublk_drv.c:1933 ublk_ch_release+0x423/0x4b0 [ublk_drv] This happens when the daemon is abruptly killed: - some references may still be held, because registering IO buffer doesn't grab ublk char device reference OR - io->task_registered_buffers won't be cleared because io buffer is released from non-daemon context For zero-copy and auto buffer register modes, I/O reference crosses syscalls, so IO reference may not be dropped naturally when ublk server is killed abruptly. However, when releasing io_uring context, it is guaranteed that the reference is dropped finally, see io_sqe_buffers_unregister() from io_ring_ctx_free(). Fix this by adding ublk_drain_io_references() that: - Waits for active I/O references dropped in async way by scheduling work function, for avoiding ublk dev and io_uring file's release dependency - Reinitializes io->ref and io->task_registered_buffers to clean state This ensures the reference count state is clean when ublk_queue_reinit() is called, preventing the warning and potential use-after-free. Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Fixes: 1ceeedb59749 ("ublk: optimize UBLK_IO_UNREGISTER_IO_BUF on daemon task") Fixes: 8a8fe42d765b ("ublk: optimize UBLK_IO_REGISTER_IO_BUF on daemon task") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250827121602.2619736-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 72 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 99abd67b708b..67d4a867aec4 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -239,6 +239,7 @@ struct ublk_device { struct mutex cancel_mutex; bool canceling; pid_t ublksrv_tgid; + struct delayed_work exit_work; }; /* header of ublk_params */ @@ -1595,12 +1596,62 @@ static void ublk_set_canceling(struct ublk_device *ub, bool canceling) ublk_get_queue(ub, i)->canceling = canceling; } -static int ublk_ch_release(struct inode *inode, struct file *filp) +static bool ublk_check_and_reset_active_ref(struct ublk_device *ub) { - struct ublk_device *ub = filp->private_data; + int i, j; + + if (!(ub->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | + UBLK_F_AUTO_BUF_REG))) + return false; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + for (j = 0; j < ubq->q_depth; j++) { + struct ublk_io *io = &ubq->ios[j]; + unsigned int refs = refcount_read(&io->ref) + + io->task_registered_buffers; + + /* + * UBLK_REFCOUNT_INIT or zero means no active + * reference + */ + if (refs != UBLK_REFCOUNT_INIT && refs != 0) + return true; + + /* reset to zero if the io hasn't active references */ + refcount_set(&io->ref, 0); + io->task_registered_buffers = 0; + } + } + return false; +} + +static void ublk_ch_release_work_fn(struct work_struct *work) +{ + struct ublk_device *ub = + container_of(work, struct ublk_device, exit_work.work); struct gendisk *disk; int i; + /* + * For zero-copy and auto buffer register modes, I/O references + * might not be dropped naturally when the daemon is killed, but + * io_uring guarantees that registered bvec kernel buffers are + * unregistered finally when freeing io_uring context, then the + * active references are dropped. + * + * Wait until active references are dropped for avoiding use-after-free + * + * registered buffer may be unregistered in io_ring's release hander, + * so have to wait by scheduling work function for avoiding the two + * file release dependency. + */ + if (ublk_check_and_reset_active_ref(ub)) { + schedule_delayed_work(&ub->exit_work, 1); + return; + } + /* * disk isn't attached yet, either device isn't live, or it has * been removed already, so we needn't to do anything @@ -1673,6 +1724,23 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) ublk_reset_ch_dev(ub); out: clear_bit(UB_STATE_OPEN, &ub->state); + + /* put the reference grabbed in ublk_ch_release() */ + ublk_put_device(ub); +} + +static int ublk_ch_release(struct inode *inode, struct file *filp) +{ + struct ublk_device *ub = filp->private_data; + + /* + * Grab ublk device reference, so it won't be gone until we are + * really released from work function. + */ + ublk_get_device(ub); + + INIT_DELAYED_WORK(&ub->exit_work, ublk_ch_release_work_fn); + schedule_delayed_work(&ub->exit_work, 0); return 0; } From 9b2785ea8592f239836405de023c75c4f3f5ce00 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Aug 2025 20:16:00 +0800 Subject: [PATCH 0944/1307] ublk selftests: add --no_ublk_fixed_fd for not using registered ublk char device Add a new command line option --no_ublk_fixed_fd that excludes the ublk control device (/dev/ublkcN) from io_uring's registered files array. When this option is used, only backing files are registered starting from index 1, while the ublk control device is accessed using its raw file descriptor. Add ublk_get_registered_fd() helper function that returns the appropriate file descriptor for use with io_uring operations. Key optimizations implemented: - Cache UBLKS_Q_NO_UBLK_FIXED_FD flag in ublk_queue.flags to avoid reading dev->no_ublk_fixed_fd in fast path - Cache ublk char device fd in ublk_queue.ublk_fd for fast access - Update ublk_get_registered_fd() to use ublk_queue * parameter - Update io_uring_prep_buf_register/unregister() to use ublk_queue * - Replace ublk_device * access with ublk_queue * access in fast paths Also pass --no_ublk_fixed_fd to test_stress_04.sh for covering plain ublk char device mode. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250827121602.2619736-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/file_backed.c | 10 ++--- tools/testing/selftests/ublk/kublk.c | 38 +++++++++++++--- tools/testing/selftests/ublk/kublk.h | 45 +++++++++++++------ tools/testing/selftests/ublk/null.c | 4 +- tools/testing/selftests/ublk/stripe.c | 4 +- .../testing/selftests/ublk/test_stress_04.sh | 6 +-- 6 files changed, 74 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index 2d93ac860bd5..cd9fe69ecce2 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -20,7 +20,7 @@ static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, struct io_uring_sqe *sqe[1]; ublk_io_alloc_sqes(t, sqe, 1); - io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC); + io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); @@ -42,7 +42,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, if (!sqe[0]) return -ENOMEM; - io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/, + io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, addr, iod->nr_sectors << 9, iod->start_sector << 9); @@ -56,19 +56,19 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); - io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0, + io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0, iod->nr_sectors << 9, iod->start_sector << 9); sqe[1]->buf_index = tag; sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); - io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); return 2; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 6512dfbdbce3..b71faba86c3b 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -432,7 +432,7 @@ static void ublk_thread_deinit(struct ublk_thread *t) } } -static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) +static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags) { struct ublk_dev *dev = q->dev; int depth = dev->dev_info.queue_depth; @@ -446,6 +446,9 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) q->flags = dev->dev_info.flags; q->flags |= extra_flags; + /* Cache fd in queue for fast path access */ + q->ublk_fd = dev->fds[0]; + cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, @@ -481,9 +484,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) return -ENOMEM; } -static int ublk_thread_init(struct ublk_thread *t) +static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags) { struct ublk_dev *dev = t->dev; + unsigned long long flags = dev->dev_info.flags | extra_flags; int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; int ret; @@ -512,7 +516,17 @@ static int ublk_thread_init(struct ublk_thread *t) io_uring_register_ring_fd(&t->ring); - ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + /* Register only backing files starting from index 1, exclude ublk control device */ + if (dev->nr_fds > 1) { + ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1); + } else { + /* No backing files to register, skip file registration */ + ret = 0; + } + } else { + ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + } if (ret) { ublk_err("ublk dev %d thread %d register files failed %d\n", t->dev->dev_info.dev_id, t->idx, ret); @@ -626,9 +640,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) /* These fields should be written once, never change */ ublk_set_sqe_cmd_op(sqe[0], cmd_op); - sqe[0]->fd = 0; /* dev->fds[0] */ + sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */ sqe[0]->opcode = IORING_OP_URING_CMD; - sqe[0]->flags = IOSQE_FIXED_FILE; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe[0]->flags = 0; /* Use raw FD, not fixed file */ + else + sqe[0]->flags = IOSQE_FIXED_FILE; sqe[0]->rw_flags = 0; cmd->tag = io->tag; cmd->q_id = q->q_id; @@ -832,6 +849,7 @@ struct ublk_thread_info { unsigned idx; sem_t *ready; cpu_set_t *affinity; + unsigned long long extra_flags; }; static void *ublk_io_handler_fn(void *data) @@ -844,7 +862,7 @@ static void *ublk_io_handler_fn(void *data) t->dev = info->dev; t->idx = info->idx; - ret = ublk_thread_init(t); + ret = ublk_thread_init(t, info->extra_flags); if (ret) { ublk_err("ublk dev %d thread %u init failed\n", dev_id, t->idx); @@ -934,6 +952,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) if (ctx->auto_zc_fallback) extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; + if (ctx->no_ublk_fixed_fd) + extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD; for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; @@ -951,6 +971,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) tinfo[i].dev = dev; tinfo[i].idx = i; tinfo[i].ready = &ready; + tinfo[i].extra_flags = extra_flags; /* * If threads are not tied 1:1 to queues, setting thread @@ -1471,7 +1492,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); - printf("\t[-e 0|1 ] [-i 0|1]\n"); + printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n"); printf("\t[--nthreads threads] [--per_io_tasks]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1534,6 +1555,7 @@ int main(int argc, char *argv[]) { "size", 1, NULL, 's'}, { "nthreads", 1, NULL, 0 }, { "per_io_tasks", 0, NULL, 0 }, + { "no_ublk_fixed_fd", 0, NULL, 0 }, { 0, 0, 0, 0 } }; const struct ublk_tgt_ops *ops = NULL; @@ -1613,6 +1635,8 @@ int main(int argc, char *argv[]) ctx.nthreads = strtol(optarg, NULL, 10); if (!strcmp(longopts[option_idx].name, "per_io_tasks")) ctx.per_io_tasks = 1; + if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd")) + ctx.no_ublk_fixed_fd = 1; break; case '?': /* diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 219233f8a053..5e55484fb0aa 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -77,6 +77,7 @@ struct dev_ctx { unsigned int recovery:1; unsigned int auto_zc_fallback:1; unsigned int per_io_tasks:1; + unsigned int no_ublk_fixed_fd:1; int _evtfd; int _shmid; @@ -166,7 +167,9 @@ struct ublk_queue { /* borrow one bit of ublk uapi flags, which may never be used */ #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) +#define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) __u64 flags; + int ublk_fd; /* cached ublk char device fd */ struct ublk_io ios[UBLK_QUEUE_DEPTH]; }; @@ -273,34 +276,48 @@ static inline int ublk_io_alloc_sqes(struct ublk_thread *t, return nr_sqes; } -static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, - int dev_fd, int tag, int q_id, __u64 index) +static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) +{ + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + if (fd_index == 0) + /* Return the raw ublk FD for index 0 */ + return q->ublk_fd; + /* Adjust index for backing files (index 1 becomes 0, etc.) */ + return fd_index - 1; + } + return fd_index; +} + +static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) { struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + int dev_fd = ublk_get_registered_fd(q, 0); io_uring_prep_read(sqe, dev_fd, 0, 0, 0); sqe->opcode = IORING_OP_URING_CMD; - sqe->flags |= IOSQE_FIXED_FILE; - sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe->flags &= ~IOSQE_FIXED_FILE; + else + sqe->flags |= IOSQE_FIXED_FILE; cmd->tag = tag; cmd->addr = index; cmd->q_id = q_id; } -static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, - int dev_fd, int tag, int q_id, __u64 index) +static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) { - struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); + sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; +} - io_uring_prep_read(sqe, dev_fd, 0, 0, 0); - sqe->opcode = IORING_OP_URING_CMD; - sqe->flags |= IOSQE_FIXED_FILE; +static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) +{ + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; - - cmd->tag = tag; - cmd->addr = index; - cmd->q_id = q_id; } static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index f0e0003a4860..280043f6b689 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -63,7 +63,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; @@ -71,7 +71,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, __setup_nop_io(tag, iod, sqe[1], q->q_id); sqe[1]->flags |= IOSQE_IO_HARDLINK; - io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); // buf register is marked as IOSQE_CQE_SKIP_SUCCESS diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 1fb9b7cc281b..791fa8dc1651 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -142,7 +142,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, s->nr + extra); if (zc) { - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, io->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); @@ -168,7 +168,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, if (zc) { struct io_uring_sqe *unreg = sqe[s->nr + 1]; - io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, io->buf_index); + io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, io->buf_index); unreg->user_data = build_user_data( tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1); } diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh index 40d1437ca298..3f901db4d09d 100755 --- a/tools/testing/selftests/ublk/test_stress_04.sh +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -28,14 +28,14 @@ _create_backfile 0 256M _create_backfile 1 128M _create_backfile 2 128M -ublk_io_and_kill_daemon 8G -t null -q 4 -z & -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & if _have_feature "AUTO_BUF_REG"; then ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & - ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & fi From 85941afd2c404247e583c827fae0a45da1c1d92c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 25 Aug 2025 09:53:27 +0200 Subject: [PATCH 0945/1307] s390/pai: Deny all events not handled by this PMU Each PAI PMU device driver returns -EINVAL when an event is out of its accepted range. This return value aborts the search for an alternative PMU device driver to handle this event. Change the return value to -ENOENT. This return value is used to try other PMUs instead. This makes the PMUs more robust when the sequence of PMU device driver initialization changes (at boot time) or by using modules. Fixes: 39d62336f5c12 ("s390/pai: add support for cryptography counters") Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_crypto.c | 4 ++-- arch/s390/kernel/perf_pai_ext.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index f373a1009c45..9455f213dc20 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -285,10 +285,10 @@ static int paicrypt_event_init(struct perf_event *event) /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) return -ENOENT; - /* PAI crypto event must be in valid range */ + /* PAI crypto event must be in valid range, try others if not */ if (a->config < PAI_CRYPTO_BASE || a->config > PAI_CRYPTO_BASE + paicrypt_cnt) - return -EINVAL; + return -ENOENT; /* Allow only CRYPTO_ALL for sampling */ if (a->sample_period && a->config != PAI_CRYPTO_BASE) return -EINVAL; diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index d827473e7f87..7b32935273ce 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -265,7 +265,7 @@ static int paiext_event_valid(struct perf_event *event) event->hw.config_base = offsetof(struct paiext_cb, acc); return 0; } - return -EINVAL; + return -ENOENT; } /* Might be called on different CPU than the one the event is intended for. */ From ce971233242b5391d99442271f3ca096fb49818d Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 22 Aug 2025 14:05:57 +0200 Subject: [PATCH 0946/1307] s390/cpum_cf: Deny all sampling events by counter PMU Deny all sampling event by the CPUMF counter facility device driver and return -ENOENT. This return value is used to try other PMUs. Up to now events for type PERF_TYPE_HARDWARE were not tested for sampling and returned later on -EOPNOTSUPP. This ends the search for alternative PMUs. Change that behavior and try other PMUs instead. Fixes: 613a41b0d16e ("s390/cpum_cf: Reject request for sampling in event initialization") Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_cpum_cf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 4d09954ebf49..04457d88e589 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -760,8 +760,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) break; case PERF_TYPE_HARDWARE: - if (is_sampling_event(event)) /* No sampling support */ - return -ENOENT; ev = attr->config; if (!attr->exclude_user && attr->exclude_kernel) { /* @@ -859,6 +857,8 @@ static int cpumf_pmu_event_init(struct perf_event *event) unsigned int type = event->attr.type; int err = -ENOENT; + if (is_sampling_event(event)) /* No sampling support */ + return err; if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) From 3c75dc44ad11c347596aa81af3c8d4a7547dd517 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 27 Aug 2025 17:29:54 +0200 Subject: [PATCH 0947/1307] platform/x86: asus-wmi: map more keys on ExpertBook B9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * there is a dedicated "noise cancel" key in top row, between mic mute and PrintScreen; it sends 0xCA when pressed by itself (mapped to F13), 0xCB with Fn (mapped to F14) * Fn+f sends 0x9D; it is not documented in the manual, but some web search results mention "asus intelligent performance"; mapped to FN_F Signed-off-by: Anton Khirnov Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250827152954.4844-1-anton@khirnov.net Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-nb-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 6928bb6ae0f3..3a488cf9ca06 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -632,6 +632,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x93, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + DVI */ { KE_KEY, 0x95, { KEY_MEDIA } }, { KE_KEY, 0x99, { KEY_PHONE } }, /* Conflicts with fan mode switch */ + { KE_KEY, 0X9D, { KEY_FN_F } }, { KE_KEY, 0xA0, { KEY_SWITCHVIDEOMODE } }, /* SDSP HDMI only */ { KE_KEY, 0xA1, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + HDMI */ { KE_KEY, 0xA2, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + HDMI */ @@ -646,6 +647,8 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_IGNORE, 0xC0, }, /* External display connect/disconnect notification */ { KE_KEY, 0xC4, { KEY_KBDILLUMUP } }, { KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } }, + { KE_KEY, 0xCA, { KEY_F13 } }, /* Noise cancelling on Expertbook B9 */ + { KE_KEY, 0xCB, { KEY_F14 } }, /* Fn+noise-cancel */ { KE_IGNORE, 0xC6, }, /* Ambient Light Sensor notification */ { KE_IGNORE, 0xCF, }, /* AC mode */ { KE_KEY, 0xFA, { KEY_PROG2 } }, /* Lid flip action */ From c96f86217bb28e019403bb8f59eacd8ad5a7ad1a Mon Sep 17 00:00:00 2001 From: Christoffer Sandberg Date: Wed, 27 Aug 2025 15:13:51 +0200 Subject: [PATCH 0948/1307] platform/x86/amd/pmc: Add TUXEDO IB Pro Gen10 AMD to spurious 8042 quirks list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevents instant wakeup ~1s after suspend. It seems to be kernel/system dependent if the IRQ actually manages to wake the system every time or if it gets ignored (and everything works as expected). Signed-off-by: Christoffer Sandberg Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250827131424.16436-1-wse@tuxedocomputers.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc-quirks.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index 7ffc659b2794..18fb44139de2 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -248,6 +248,20 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Lafite Pro V 14M"), } }, + { + .ident = "TUXEDO InfinityBook Pro 14/15 AMD Gen10", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "XxHP4NAx"), + } + }, + { + .ident = "TUXEDO InfinityBook Pro 14/15 AMD Gen10", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "XxKK4NAx_XxSP4NAx"), + } + }, {} }; From 5549202b9c02c2ecbc8634768a3da8d9e82d548d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 Aug 2025 07:24:33 +0200 Subject: [PATCH 0949/1307] platform/x86: asus-wmi: Fix racy registrations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asus_wmi_register_driver() may be called from multiple drivers concurrently, which can lead to the racy list operations, eventually corrupting the memory and hitting Oops on some ASUS machines. Also, the error handling is missing, and it forgot to unregister ACPI lps0 dev ops in the error case. This patch covers those issues by introducing a simple mutex at acpi_wmi_register_driver() & *_unregister_driver, and adding the proper call of asus_s2idle_check_unregister() in the error path. Fixes: feea7bd6b02d ("platform/x86: asus-wmi: Refactor Ally suspend/resume") Link: https://bugzilla.suse.com/show_bug.cgi?id=1246924 Link: https://lore.kernel.org/07815053-0e31-4e8e-8049-b652c929323b@kernel.org Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20250827052441.23382-1-tiwai@suse.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-wmi.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index f7191fdded14..e72a2b5d158e 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -5088,16 +5088,22 @@ static int asus_wmi_probe(struct platform_device *pdev) asus_s2idle_check_register(); - return asus_wmi_add(pdev); + ret = asus_wmi_add(pdev); + if (ret) + asus_s2idle_check_unregister(); + + return ret; } static bool used; +static DEFINE_MUTEX(register_mutex); int __init_or_module asus_wmi_register_driver(struct asus_wmi_driver *driver) { struct platform_driver *platform_driver; struct platform_device *platform_device; + guard(mutex)(®ister_mutex); if (used) return -EBUSY; @@ -5120,6 +5126,7 @@ EXPORT_SYMBOL_GPL(asus_wmi_register_driver); void asus_wmi_unregister_driver(struct asus_wmi_driver *driver) { + guard(mutex)(®ister_mutex); asus_s2idle_check_unregister(); platform_device_unregister(driver->platform_device); From 3010da6ecf2225e41a79b06bc5f4c9750a4d35cb Mon Sep 17 00:00:00 2001 From: Li Yifan Date: Mon, 25 Aug 2025 23:45:50 -0400 Subject: [PATCH 0950/1307] platform/x86/intel/pmc: Add Bartlett Lake support to intel_pmc_core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Bartlett Lake P-core only product support to intel_pmc_core driver. Bartlett Lake hybrid product reuses Raptor Lake model name so it is already enabled. Acked-by: Xi Pardee Signed-off-by: Li Yifan Link: https://lore.kernel.org/r/20250826034550.2284738-1-yifan2.li@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/pmc/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 540cd2fb0673..d040290e80ff 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1625,6 +1625,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &tgl_l_pmc_dev), X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_pmc_dev), X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_pmc_dev), + X86_MATCH_VFM(INTEL_BARTLETTLAKE, &adl_pmc_dev), X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_pmc_dev), X86_MATCH_VFM(INTEL_ARROWLAKE, &arl_pmc_dev), X86_MATCH_VFM(INTEL_ARROWLAKE_H, &arl_h_pmc_dev), From 23408874e90ee299ab731bc0e0a9b3339dfc3c6e Mon Sep 17 00:00:00 2001 From: Edip Hazuri Date: Thu, 14 Aug 2025 23:45:32 +0300 Subject: [PATCH 0951/1307] platform/x86: hp-wmi: Add support for Fn+P hotkey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the Fn+P hotkey found on newer HP Victus (and probably newer Omen) laptops. This hotkey is intended for use with Omen Gaming Hub to change the performance profile (see [1]). Pressing Fn+P under linux produced the following warning in dmesg: > hp_wmi: Unknown event_id - 27 - 0x7 Implemented a handling for this event so that the hotkey cycles between the platform profiles when triggered. Tested on Victus 16-s1011nt (9Z791EA, MB 8C9C). Changes in v2: - Make the key just switches between platform profiles instead of assigning a key event code. - v1: https://lore.kernel.org/all/20250802213541.18791-2-edip@medip.dev/ [1]: https://jpcdn.it/img/adadf6c927ffeb75afd8038f95db400a.png Signed-off-by: Edip Hazuri Link: https://lore.kernel.org/r/20250814204529.18467-4-edip@medip.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 60c8ac8d902c..8b3533d6ba09 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -122,6 +122,7 @@ enum hp_wmi_event_ids { HPWMI_BATTERY_CHARGE_PERIOD = 0x10, HPWMI_SANITIZATION_MODE = 0x17, HPWMI_CAMERA_TOGGLE = 0x1A, + HPWMI_FN_P_HOTKEY = 0x1B, HPWMI_OMEN_KEY = 0x1D, HPWMI_SMART_EXPERIENCE_APP = 0x21, }; @@ -981,6 +982,9 @@ static void hp_wmi_notify(union acpi_object *obj, void *context) key_code, 1, true)) pr_info("Unknown key code - 0x%x\n", key_code); break; + case HPWMI_FN_P_HOTKEY: + platform_profile_cycle(); + break; case HPWMI_OMEN_KEY: if (event_data) /* Only should be true for HP Omen */ key_code = event_data; From b0908e03fdd488a5ffd5b80d86dcfc77207464e7 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 26 Aug 2025 22:40:07 +0200 Subject: [PATCH 0952/1307] platform/x86: acer-wmi: Stop using ACPI bitmap for platform profile choices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that the platform firmware on some models does not return valid data when reading the bitmap of supported platform profiles. This prevents the driver from loading on said models, even when the platform profile interface itself works. Fix this by stop using said bitmap until we have figured out how the OEM software itself detects available platform profiles. Tested-by: Lynne Megido Reported-by: Lynne Megido Closes: https://lore.kernel.org/platform-driver-x86/3f56e68f-85df-4c0a-982c-43f9d635be38@bune.city/ Fixes: 191e21f1a4c3 ("platform/x86: acer-wmi: use an ACPI bitmap to set the platform profile choices") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20250826204007.5088-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/acer-wmi.c | 71 ++++++--------------------------- 1 file changed, 12 insertions(+), 59 deletions(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 69336bd778ee..13eb22b35aa8 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -129,6 +129,7 @@ enum acer_wmi_predator_v4_oc { enum acer_wmi_gaming_misc_setting { ACER_WMID_MISC_SETTING_OC_1 = 0x0005, ACER_WMID_MISC_SETTING_OC_2 = 0x0007, + /* Unreliable on some models */ ACER_WMID_MISC_SETTING_SUPPORTED_PROFILES = 0x000A, ACER_WMID_MISC_SETTING_PLATFORM_PROFILE = 0x000B, }; @@ -794,9 +795,6 @@ static bool platform_profile_support; */ static int last_non_turbo_profile = INT_MIN; -/* The most performant supported profile */ -static int acer_predator_v4_max_perf; - enum acer_predator_v4_thermal_profile { ACER_PREDATOR_V4_THERMAL_PROFILE_QUIET = 0x00, ACER_PREDATOR_V4_THERMAL_PROFILE_BALANCED = 0x01, @@ -2014,7 +2012,7 @@ acer_predator_v4_platform_profile_set(struct device *dev, if (err) return err; - if (tp != acer_predator_v4_max_perf) + if (tp != ACER_PREDATOR_V4_THERMAL_PROFILE_TURBO) last_non_turbo_profile = tp; return 0; @@ -2023,55 +2021,14 @@ acer_predator_v4_platform_profile_set(struct device *dev, static int acer_predator_v4_platform_profile_probe(void *drvdata, unsigned long *choices) { - unsigned long supported_profiles; - int err; + set_bit(PLATFORM_PROFILE_PERFORMANCE, choices); + set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE, choices); + set_bit(PLATFORM_PROFILE_BALANCED, choices); + set_bit(PLATFORM_PROFILE_QUIET, choices); + set_bit(PLATFORM_PROFILE_LOW_POWER, choices); - err = WMID_gaming_get_misc_setting(ACER_WMID_MISC_SETTING_SUPPORTED_PROFILES, - (u8 *)&supported_profiles); - if (err) - return err; - - /* Iterate through supported profiles in order of increasing performance */ - if (test_bit(ACER_PREDATOR_V4_THERMAL_PROFILE_ECO, &supported_profiles)) { - set_bit(PLATFORM_PROFILE_LOW_POWER, choices); - acer_predator_v4_max_perf = ACER_PREDATOR_V4_THERMAL_PROFILE_ECO; - last_non_turbo_profile = ACER_PREDATOR_V4_THERMAL_PROFILE_ECO; - } - - if (test_bit(ACER_PREDATOR_V4_THERMAL_PROFILE_QUIET, &supported_profiles)) { - set_bit(PLATFORM_PROFILE_QUIET, choices); - acer_predator_v4_max_perf = ACER_PREDATOR_V4_THERMAL_PROFILE_QUIET; - last_non_turbo_profile = ACER_PREDATOR_V4_THERMAL_PROFILE_QUIET; - } - - if (test_bit(ACER_PREDATOR_V4_THERMAL_PROFILE_BALANCED, &supported_profiles)) { - set_bit(PLATFORM_PROFILE_BALANCED, choices); - acer_predator_v4_max_perf = ACER_PREDATOR_V4_THERMAL_PROFILE_BALANCED; - last_non_turbo_profile = ACER_PREDATOR_V4_THERMAL_PROFILE_BALANCED; - } - - if (test_bit(ACER_PREDATOR_V4_THERMAL_PROFILE_PERFORMANCE, &supported_profiles)) { - set_bit(PLATFORM_PROFILE_BALANCED_PERFORMANCE, choices); - acer_predator_v4_max_perf = ACER_PREDATOR_V4_THERMAL_PROFILE_PERFORMANCE; - - /* We only use this profile as a fallback option in case no prior - * profile is supported. - */ - if (last_non_turbo_profile < 0) - last_non_turbo_profile = ACER_PREDATOR_V4_THERMAL_PROFILE_PERFORMANCE; - } - - if (test_bit(ACER_PREDATOR_V4_THERMAL_PROFILE_TURBO, &supported_profiles)) { - set_bit(PLATFORM_PROFILE_PERFORMANCE, choices); - acer_predator_v4_max_perf = ACER_PREDATOR_V4_THERMAL_PROFILE_TURBO; - - /* We need to handle the hypothetical case where only the turbo profile - * is supported. In this case the turbo toggle will essentially be a - * no-op. - */ - if (last_non_turbo_profile < 0) - last_non_turbo_profile = ACER_PREDATOR_V4_THERMAL_PROFILE_TURBO; - } + /* Set default non-turbo profile */ + last_non_turbo_profile = ACER_PREDATOR_V4_THERMAL_PROFILE_BALANCED; return 0; } @@ -2108,19 +2065,15 @@ static int acer_thermal_profile_change(void) if (cycle_gaming_thermal_profile) { platform_profile_cycle(); } else { - /* Do nothing if no suitable platform profiles where found */ - if (last_non_turbo_profile < 0) - return 0; - err = WMID_gaming_get_misc_setting( ACER_WMID_MISC_SETTING_PLATFORM_PROFILE, ¤t_tp); if (err) return err; - if (current_tp == acer_predator_v4_max_perf) + if (current_tp == ACER_PREDATOR_V4_THERMAL_PROFILE_TURBO) tp = last_non_turbo_profile; else - tp = acer_predator_v4_max_perf; + tp = ACER_PREDATOR_V4_THERMAL_PROFILE_TURBO; err = WMID_gaming_set_misc_setting( ACER_WMID_MISC_SETTING_PLATFORM_PROFILE, tp); @@ -2128,7 +2081,7 @@ static int acer_thermal_profile_change(void) return err; /* Store last profile for toggle */ - if (current_tp != acer_predator_v4_max_perf) + if (current_tp != ACER_PREDATOR_V4_THERMAL_PROFILE_TURBO) last_non_turbo_profile = current_tp; platform_profile_notify(platform_profile_device); From 95a7c5000956f939b86d8b00b8e6b8345f4a9b65 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 28 Aug 2025 23:48:35 +0800 Subject: [PATCH 0953/1307] bcache: change maintainer's email address Change to my new email address on fnnas.com. Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20250828154835.32926-1-colyli@kernel.org Signed-off-by: Jens Axboe --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..b47daf498a97 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4205,7 +4205,7 @@ W: http://www.baycom.org/~tom/ham/ham.html F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) -M: Coly Li +M: Coly Li M: Kent Overstreet L: linux-bcache@vger.kernel.org S: Maintained From 112f7d3cff02e357c2f7a116fd7ab6a366ed27f4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 28 Aug 2025 16:11:00 +0200 Subject: [PATCH 0954/1307] ALSA: hda: Avoid binding with SOF for SKL/KBL platforms For Intel SKL and KBL platforms, it may be bound with one of three HD-audio drivers (AVS, SOF and legacy). AVS is the preferred one when DMIC is detected, and that's how it's defined in the snd-intel-dspcfg config table. But, when AVS driver is disabled (CONFIG_SND_SOC_INTEL_AVS=n), the device may be bound freely with either SOF or legacy driver. Before 6.17, the legacy driver took it primarily, but on 6.17, likely due to the recent code shuffling, SOF driver seems taking it at first, and fails to probe. For avoiding the regression, we should enforce to bind those with the legacy HD-audio drvier when AVS is disabled. This patch adds the extra two entries in intel-dspcfg table that are applied only when CONFIG_SND_SOC_INTEL_AVS=n, for binding with the legacy driver. Note that there are entries for APL in that config table block, but APL may be supported by SOF for certain setups, so the choice can't be exclusive. Hence this patch includes only SKL and KBL. Link: https://bugzilla.suse.com/show_bug.cgi?id=1248121 Reviewed-by: Cezary Rojewski Link: https://patch.msgid.link/20250828141101.16294-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/core/intel-dsp-config.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/hda/core/intel-dsp-config.c b/sound/hda/core/intel-dsp-config.c index 00f184917623..c15284742899 100644 --- a/sound/hda/core/intel-dsp-config.c +++ b/sound/hda/core/intel-dsp-config.c @@ -116,6 +116,13 @@ static const struct config_entry config_table[] = { .flags = FLAG_SST, .device = PCI_DEVICE_ID_INTEL_HDA_FCL, }, +#else /* AVS disabled; force to legacy as SOF doesn't work for SKL or KBL */ + { + .device = PCI_DEVICE_ID_INTEL_HDA_SKL_LP, + }, + { + .device = PCI_DEVICE_ID_INTEL_HDA_KBL_LP, + }, #endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE) { From b720269334f55f2e683fbb8f1f065a12f2c72eb6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 13:19:23 +0100 Subject: [PATCH 0955/1307] KVM: arm64: Check for SYSREGS_ON_CPU before accessing the 32bit state Just like c6e35dff58d3 ("KVM: arm64: Check for SYSREGS_ON_CPU before accessing the CPU state") fixed the 64bit state access, add a check for the 32bit state actually being on the CPU before writing it. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250817121926.217900-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/exception.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 95d186e0bf54..3e67333197ab 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -59,7 +59,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) { - if (has_vhe()) + if (has_vhe() && vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) write_sysreg(val, spsr_abt); else vcpu->arch.ctxt.spsr_abt = val; @@ -67,7 +67,7 @@ static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val) { - if (has_vhe()) + if (has_vhe() && vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) write_sysreg(val, spsr_und); else vcpu->arch.ctxt.spsr_und = val; From e3f6836a632e6d68201b6bc6e02bda92b00f3f57 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 13:19:24 +0100 Subject: [PATCH 0956/1307] KVM: arm64: Simplify sysreg access on exception delivery Distinguishing between NV and VHE is slightly pointless, and only serves as an extra complication, or a way to introduce bugs, such as the way SPSR_EL1 gets written without checking for the state being resident. Get rid if this silly distinction, and fix the bug in one go. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250817121926.217900-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/exception.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 3e67333197ab..bef40ddb16db 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -22,36 +22,28 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { - u64 val; - - if (unlikely(vcpu_has_nv(vcpu))) + if (has_vhe()) return vcpu_read_sys_reg(vcpu, reg); - else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && - __vcpu_read_sys_reg_from_cpu(reg, &val)) - return val; return __vcpu_sys_reg(vcpu, reg); } static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { - if (unlikely(vcpu_has_nv(vcpu))) + if (has_vhe()) vcpu_write_sys_reg(vcpu, val, reg); - else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) || - !__vcpu_write_sys_reg_to_cpu(val, reg)) + else __vcpu_assign_sys_reg(vcpu, reg, val); } static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, u64 val) { - if (unlikely(vcpu_has_nv(vcpu))) { + if (has_vhe()) { if (target_mode == PSR_MODE_EL1h) vcpu_write_sys_reg(vcpu, val, SPSR_EL1); else vcpu_write_sys_reg(vcpu, val, SPSR_EL2); - } else if (has_vhe()) { - write_sysreg_el1(val, SYS_SPSR); } else { __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val); } From ec0ab059d4359b2dad69f4ef90fa58a0bcdef525 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 13:19:25 +0100 Subject: [PATCH 0957/1307] KVM: arm64: Fix vcpu_{read,write}_sys_reg() accessors Volodymyr reports (again!) that under some circumstances (E2H==0, walking S1 PTs), PAR_EL1 doesn't report the value of the latest walk in the CPU register, but that instead the value is written to the backing store. Further investigation indicates that the root cause of this is that a group of registers (PAR_EL1, TPIDR*_EL{0,1}, the *32_EL2 dregs) should always be considered as "on CPU", as they are not remapped between EL1 and EL2. We fail to treat them accordingly, and end-up considering that the register (PAR_EL1 in this example) should be written to memory instead of in the register. While it would be possible to quickly work around it, it is obvious that the way we track these things at the moment is pretty horrible, and could do with some improvement. Revamp the whole thing by: - defining a location for a register (memory, cpu), potentially depending on the state of the vcpu - define a transformation for this register (mapped register, potential translation, special register needing some particular attention) - convey this information in a structure that can be easily passed around As a result, the accessors themselves become much simpler, as the state is explicit instead of being driven by hard-to-understand conventions. We get rid of the "pure EL2 register" notion, which wasn't very useful, and add sanitisation of the values by applying the RESx masks as required, something that was missing so far. And of course, we add the missing registers to the list, with the indication that they are always loaded. Reported-by: Volodymyr Babchuk Signed-off-by: Marc Zyngier Fixes: fedc612314acf ("KVM: arm64: nv: Handle virtual EL2 registers in vcpu_read/write_sys_reg()") Link: https://lore.kernel.org/r/20250806141707.3479194-3-volodymyr_babchuk@epam.com Link: https://lore.kernel.org/r/20250817121926.217900-4-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 4 +- arch/arm64/kvm/sys_regs.c | 282 ++++++++++++++++++------------ 2 files changed, 168 insertions(+), 118 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d373d555a69b..2de44dc630b5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1158,8 +1158,8 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); __v; \ }) -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); +u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg); +void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg); static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e387d1dfed1e..fe7260eebcbc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -82,43 +82,105 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, "sys_reg write to read-only register"); } -#define PURE_EL2_SYSREG(el2) \ - case el2: { \ - *el1r = el2; \ - return true; \ - } +enum sr_loc_attr { + SR_LOC_MEMORY = 0, /* Register definitely in memory */ + SR_LOC_LOADED = BIT(0), /* Register on CPU, unless it cannot */ + SR_LOC_MAPPED = BIT(1), /* Register in a different CPU register */ + SR_LOC_XLATED = BIT(2), /* Register translated to fit another reg */ + SR_LOC_SPECIAL = BIT(3), /* Demanding register, implies loaded */ +}; -#define MAPPED_EL2_SYSREG(el2, el1, fn) \ - case el2: { \ - *xlate = fn; \ - *el1r = el1; \ - return true; \ - } +struct sr_loc { + enum sr_loc_attr loc; + enum vcpu_sysreg map_reg; + u64 (*xlate)(u64); +}; -static bool get_el2_to_el1_mapping(unsigned int reg, - unsigned int *el1r, u64 (**xlate)(u64)) +static enum sr_loc_attr locate_direct_register(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg reg) { switch (reg) { - PURE_EL2_SYSREG( VPIDR_EL2 ); - PURE_EL2_SYSREG( VMPIDR_EL2 ); - PURE_EL2_SYSREG( ACTLR_EL2 ); - PURE_EL2_SYSREG( HCR_EL2 ); - PURE_EL2_SYSREG( MDCR_EL2 ); - PURE_EL2_SYSREG( HSTR_EL2 ); - PURE_EL2_SYSREG( HACR_EL2 ); - PURE_EL2_SYSREG( VTTBR_EL2 ); - PURE_EL2_SYSREG( VTCR_EL2 ); - PURE_EL2_SYSREG( TPIDR_EL2 ); - PURE_EL2_SYSREG( HPFAR_EL2 ); - PURE_EL2_SYSREG( HCRX_EL2 ); - PURE_EL2_SYSREG( HFGRTR_EL2 ); - PURE_EL2_SYSREG( HFGWTR_EL2 ); - PURE_EL2_SYSREG( HFGITR_EL2 ); - PURE_EL2_SYSREG( HDFGRTR_EL2 ); - PURE_EL2_SYSREG( HDFGWTR_EL2 ); - PURE_EL2_SYSREG( HAFGRTR_EL2 ); - PURE_EL2_SYSREG( CNTVOFF_EL2 ); - PURE_EL2_SYSREG( CNTHCTL_EL2 ); + case SCTLR_EL1: + case CPACR_EL1: + case TTBR0_EL1: + case TTBR1_EL1: + case TCR_EL1: + case TCR2_EL1: + case PIR_EL1: + case PIRE0_EL1: + case POR_EL1: + case ESR_EL1: + case AFSR0_EL1: + case AFSR1_EL1: + case FAR_EL1: + case MAIR_EL1: + case VBAR_EL1: + case CONTEXTIDR_EL1: + case AMAIR_EL1: + case CNTKCTL_EL1: + case ELR_EL1: + case SPSR_EL1: + case ZCR_EL1: + case SCTLR2_EL1: + /* + * EL1 registers which have an ELx2 mapping are loaded if + * we're not in hypervisor context. + */ + return is_hyp_ctxt(vcpu) ? SR_LOC_MEMORY : SR_LOC_LOADED; + + case TPIDR_EL0: + case TPIDRRO_EL0: + case TPIDR_EL1: + case PAR_EL1: + case DACR32_EL2: + case IFSR32_EL2: + case DBGVCR32_EL2: + /* These registers are always loaded, no matter what */ + return SR_LOC_LOADED; + + default: + /* Non-mapped EL2 registers are by definition in memory. */ + return SR_LOC_MEMORY; + } +} + +static void locate_mapped_el2_register(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg reg, + enum vcpu_sysreg map_reg, + u64 (*xlate)(u64), + struct sr_loc *loc) +{ + if (!is_hyp_ctxt(vcpu)) { + loc->loc = SR_LOC_MEMORY; + return; + } + + loc->loc = SR_LOC_LOADED | SR_LOC_MAPPED; + loc->map_reg = map_reg; + + WARN_ON(locate_direct_register(vcpu, map_reg) != SR_LOC_MEMORY); + + if (xlate != NULL && !vcpu_el2_e2h_is_set(vcpu)) { + loc->loc |= SR_LOC_XLATED; + loc->xlate = xlate; + } +} + +#define MAPPED_EL2_SYSREG(r, m, t) \ + case r: { \ + locate_mapped_el2_register(vcpu, r, m, t, loc); \ + break; \ + } + +static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, + struct sr_loc *loc) +{ + if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) { + loc->loc = SR_LOC_MEMORY; + return; + } + + switch (reg) { MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1, translate_sctlr_el2_to_sctlr_el1 ); MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1, @@ -144,125 +206,113 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); + case CNTHCTL_EL2: + /* CNTHCTL_EL2 is super special, until we support NV2.1 */ + loc->loc = ((is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) ? + SR_LOC_SPECIAL : SR_LOC_MEMORY); + break; default: - return false; + loc->loc = locate_direct_register(vcpu, reg); } } -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) { - u64 val = 0x8badf00d8badf00d; - u64 (*xlate)(u64) = NULL; - unsigned int el1r; + struct sr_loc loc = {}; - if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) - goto memory_read; + locate_register(vcpu, reg, &loc); - if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { - if (!is_hyp_ctxt(vcpu)) - goto memory_read; + WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY); + + if (loc.loc & SR_LOC_SPECIAL) { + u64 val; + + WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL); /* - * CNTHCTL_EL2 requires some special treatment to - * account for the bits that can be set via CNTKCTL_EL1. + * CNTHCTL_EL2 requires some special treatment to account + * for the bits that can be set via CNTKCTL_EL1 when E2H==1. */ switch (reg) { case CNTHCTL_EL2: - if (vcpu_el2_e2h_is_set(vcpu)) { - val = read_sysreg_el1(SYS_CNTKCTL); - val &= CNTKCTL_VALID_BITS; - val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; - return val; - } - break; + val = read_sysreg_el1(SYS_CNTKCTL); + val &= CNTKCTL_VALID_BITS; + val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; + return val; + default: + WARN_ON_ONCE(1); } - - /* - * If this register does not have an EL1 counterpart, - * then read the stored EL2 version. - */ - if (reg == el1r) - goto memory_read; - - /* - * If we have a non-VHE guest and that the sysreg - * requires translation to be used at EL1, use the - * in-memory copy instead. - */ - if (!vcpu_el2_e2h_is_set(vcpu) && xlate) - goto memory_read; - - /* Get the current version of the EL1 counterpart. */ - WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val)); - if (reg >= __SANITISED_REG_START__) - val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - - return val; } - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto memory_read; + if (loc.loc & SR_LOC_LOADED) { + enum vcpu_sysreg map_reg = reg; + u64 val = 0x8badf00d8badf00d; - if (__vcpu_read_sys_reg_from_cpu(reg, &val)) - return val; + if (loc.loc & SR_LOC_MAPPED) + map_reg = loc.map_reg; + + if (!(loc.loc & SR_LOC_XLATED) && + __vcpu_read_sys_reg_from_cpu(map_reg, &val)) { + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); + + return val; + } + } -memory_read: return __vcpu_sys_reg(vcpu, reg); } -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg) { - u64 (*xlate)(u64) = NULL; - unsigned int el1r; + struct sr_loc loc = {}; - if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) - goto memory_write; + locate_register(vcpu, reg, &loc); - if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { - if (!is_hyp_ctxt(vcpu)) - goto memory_write; + WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY); - /* - * Always store a copy of the write to memory to avoid having - * to reverse-translate virtual EL2 system registers for a - * non-VHE guest hypervisor. - */ - __vcpu_assign_sys_reg(vcpu, reg, val); + if (loc.loc & SR_LOC_SPECIAL) { + + WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL); switch (reg) { case CNTHCTL_EL2: /* - * If E2H=0, CNHTCTL_EL2 is a pure shadow register. - * Otherwise, some of the bits are backed by + * If E2H=1, some of the bits are backed by * CNTKCTL_EL1, while the rest is kept in memory. * Yes, this is fun stuff. */ - if (vcpu_el2_e2h_is_set(vcpu)) - write_sysreg_el1(val, SYS_CNTKCTL); - return; + write_sysreg_el1(val, SYS_CNTKCTL); + break; + default: + WARN_ON_ONCE(1); } - - /* No EL1 counterpart? We're done here.? */ - if (reg == el1r) - return; - - if (!vcpu_el2_e2h_is_set(vcpu) && xlate) - val = xlate(val); - - /* Redirect this to the EL1 version of the register. */ - WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r)); - return; } - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto memory_write; + if (loc.loc & SR_LOC_LOADED) { + enum vcpu_sysreg map_reg = reg; + u64 xlated_val; - if (__vcpu_write_sys_reg_to_cpu(val, reg)) - return; + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); + + if (loc.loc & SR_LOC_MAPPED) + map_reg = loc.map_reg; + + if (loc.loc & SR_LOC_XLATED) + xlated_val = loc.xlate(val); + else + xlated_val = val; + + __vcpu_write_sys_reg_to_cpu(xlated_val, map_reg); + + /* + * Fall through to write the backing store anyway, which + * allows translated registers to be directly read without a + * reverse translation. + */ + } -memory_write: __vcpu_assign_sys_reg(vcpu, reg, val); } From 3328d17e70545f83515b07851302d48c85c466b0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Aug 2025 13:19:26 +0100 Subject: [PATCH 0958/1307] KVM: arm64: Remove __vcpu_{read,write}_sys_reg_{from,to}_cpu() There is no point having __vcpu_{read,write}_sys_reg_{from,to}_cpu() exposed to the rest of the kernel, as the only callers are in sys_regs.c. Move them where they below, which is another opportunity to simplify things a bit. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250817121926.217900-5-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 107 ------------------------------ arch/arm64/kvm/sys_regs.c | 84 +++++++++++++++++++++-- 2 files changed, 80 insertions(+), 111 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2de44dc630b5..46900f1084ff 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1161,113 +1161,6 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg); void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg); -static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) -{ - /* - * *** VHE ONLY *** - * - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * SYSREGS_ON_CPU *MUST* be checked before using this helper. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - if (!has_vhe()) - return false; - - switch (reg) { - case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; - case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; - case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; - case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; - case TCR2_EL1: *val = read_sysreg_s(SYS_TCR2_EL12); break; - case PIR_EL1: *val = read_sysreg_s(SYS_PIR_EL12); break; - case PIRE0_EL1: *val = read_sysreg_s(SYS_PIRE0_EL12); break; - case POR_EL1: *val = read_sysreg_s(SYS_POR_EL12); break; - case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; - case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; - case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; - case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; - case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; - case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; - case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; - case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break; - case PAR_EL1: *val = read_sysreg_par(); break; - case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; - case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; - case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break; - case SCTLR2_EL1: *val = read_sysreg_s(SYS_SCTLR2_EL12); break; - default: return false; - } - - return true; -} - -static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) -{ - /* - * *** VHE ONLY *** - * - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * SYSREGS_ON_CPU *MUST* be checked before using this helper. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the MPIDR should only be set - * once, before running the VCPU, and never changed later. - */ - if (!has_vhe()) - return false; - - switch (reg) { - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; - case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; - case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; - case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; - case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; - case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; - case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; - case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; - case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break; - default: return false; - } - - return true; -} - struct kvm_vm_stat { struct kvm_vm_stat_generic generic; }; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index fe7260eebcbc..cc0dbc45f144 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -216,6 +216,82 @@ static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, } } +static u64 read_sr_from_cpu(enum vcpu_sysreg reg) +{ + u64 val = 0x8badf00d8badf00d; + + switch (reg) { + case SCTLR_EL1: val = read_sysreg_s(SYS_SCTLR_EL12); break; + case CPACR_EL1: val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: val = read_sysreg_s(SYS_TCR_EL12); break; + case TCR2_EL1: val = read_sysreg_s(SYS_TCR2_EL12); break; + case PIR_EL1: val = read_sysreg_s(SYS_PIR_EL12); break; + case PIRE0_EL1: val = read_sysreg_s(SYS_PIRE0_EL12); break; + case POR_EL1: val = read_sysreg_s(SYS_POR_EL12); break; + case ESR_EL1: val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case AMAIR_EL1: val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: val = read_sysreg_s(SYS_ELR_EL12); break; + case SPSR_EL1: val = read_sysreg_s(SYS_SPSR_EL12); break; + case ZCR_EL1: val = read_sysreg_s(SYS_ZCR_EL12); break; + case SCTLR2_EL1: val = read_sysreg_s(SYS_SCTLR2_EL12); break; + case TPIDR_EL0: val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: val = read_sysreg_s(SYS_TPIDR_EL1); break; + case PAR_EL1: val = read_sysreg_par(); break; + case DACR32_EL2: val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: WARN_ON_ONCE(1); + } + + return val; +} + +static void write_sr_to_cpu(enum vcpu_sysreg reg, u64 val) +{ + switch (reg) { + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; + case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; + case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; + case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; + case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; + case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: WARN_ON_ONCE(1); + } +} + u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) { struct sr_loc loc = {}; @@ -246,13 +322,13 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) if (loc.loc & SR_LOC_LOADED) { enum vcpu_sysreg map_reg = reg; - u64 val = 0x8badf00d8badf00d; if (loc.loc & SR_LOC_MAPPED) map_reg = loc.map_reg; - if (!(loc.loc & SR_LOC_XLATED) && - __vcpu_read_sys_reg_from_cpu(map_reg, &val)) { + if (!(loc.loc & SR_LOC_XLATED)) { + u64 val = read_sr_from_cpu(map_reg); + if (reg >= __SANITISED_REG_START__) val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); @@ -304,7 +380,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg) else xlated_val = val; - __vcpu_write_sys_reg_to_cpu(xlated_val, map_reg); + write_sr_to_cpu(map_reg, xlated_val); /* * Fall through to write the backing store anyway, which From 3e7fd1febc3156d3d98fba229399a13b12d69707 Mon Sep 17 00:00:00 2001 From: Ajye Huang Date: Tue, 26 Aug 2025 23:40:40 +0800 Subject: [PATCH 0959/1307] ASoC: SOF: Intel: WCL: Add the sdw_process_wakeen op Add the missing op in the device description to avoid issues with jack detection. Fixes: 6b04629ae97a ("ASoC: SOF: Intel: add initial support for WCL") Acked-by: Peter Ujfalusi Signed-off-by: Ajye Huang Message-ID: <20250826154040.2723998-1-ajye_huang@compal.corp-partner.google.com> Signed-off-by: Mark Brown --- sound/soc/sof/intel/ptl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/ptl.c b/sound/soc/sof/intel/ptl.c index 1bc1f54c470d..4633cd01e7dd 100644 --- a/sound/soc/sof/intel/ptl.c +++ b/sound/soc/sof/intel/ptl.c @@ -143,6 +143,7 @@ const struct sof_intel_dsp_desc wcl_chip_info = { .read_sdw_lcount = hda_sdw_check_lcount_ext, .check_sdw_irq = lnl_dsp_check_sdw_irq, .check_sdw_wakeen_irq = lnl_sdw_check_wakeen_irq, + .sdw_process_wakeen = hda_sdw_process_wakeen_common, .check_ipc_irq = mtl_dsp_check_ipc_irq, .cl_init = mtl_dsp_cl_init, .power_down_dsp = mtl_power_down_dsp, From ee372e645178802be7cb35263de941db7b2c5354 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 9 Aug 2025 15:48:10 +0100 Subject: [PATCH 0960/1307] KVM: arm64: nv: Fix ATS12 handling of single-stage translation Volodymyr reports that using a Xen DomU as a nested guest (where HCR_EL2.E2H == 0), ATS12 results in a translation that stops at the L2's S1, which isn't something you'd normally expects. Comparing the code against the spec proves to be illuminating, and suggests that the author of such code must have been tired, cross-eyed, drunk, or maybe all of the above. The gist of it is that, apart from HCR_EL2.VM or HCR_EL2.DC being 0, only the use of the EL2&0 translation regime limits the walk to S1 only, and that we must finish the S2 walk in any other case. Which solves the above issue, as E2H==0 indicates that ATS12 walks the EL1&0 translation regime. Explicitly checking for EL2&0 fixes this. Reported-by: Volodymyr Babchuk Suggested-by: Oliver Upton Signed-off-by: Marc Zyngier Fixes: be04cebf3e788 ("KVM: arm64: nv: Add emulation of AT S12E{0,1}{R,W}") Link: https://lore.kernel.org/r/20250806141707.3479194-2-volodymyr_babchuk@epam.com Link: https://lore.kernel.org/r/20250809144811.2314038-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 0e5610533949..d71ca4ddc9d1 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1420,10 +1420,10 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) return; /* - * If we only have a single stage of translation (E2H=0 or - * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. + * If we only have a single stage of translation (EL2&0), exit + * early. Same thing if {VM,DC}=={0,0}. */ - if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || + if (compute_translation_regime(vcpu, op) == TR_EL20 || !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) return; From ebf2bfec412ad293a0b118fb1a20a551088ebc9b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Aug 2025 15:16:16 -0700 Subject: [PATCH 0961/1307] MAINTAINERS: mark bcachefs externally maintained As per many long discussion threads, public and private. Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fed6cd812d79..adcbb094ebd5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4216,7 +4216,7 @@ F: drivers/md/bcache/ BCACHEFS M: Kent Overstreet L: linux-bcachefs@vger.kernel.org -S: Supported +S: Externally maintained C: irc://irc.oftc.net/bcache P: Documentation/filesystems/bcachefs/SubmittingPatches.rst T: git https://evilpiepirate.org/git/bcachefs.git From c34414883f773412964404d77cd2fea04c6f7d60 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Mon, 18 Aug 2025 16:17:52 +0200 Subject: [PATCH 0962/1307] drm/mediatek: mtk_hdmi: Fix inverted parameters in some regmap_update_bits calls In mtk_hdmi driver, a recent change replaced custom register access function calls by regmap ones, but two replacements by regmap_update_bits were done incorrectly, because original offset and mask parameters were inverted, so fix them. Fixes: d6e25b3590a0 ("drm/mediatek: hdmi: Use regmap instead of iomem for main registers") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20250818-mt8173-fix-hdmi-issue-v1-1-55aff9b0295d@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 845fd8aa43c3..b766dd5e6c8d 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -182,8 +182,8 @@ static inline struct mtk_hdmi *hdmi_ctx_from_bridge(struct drm_bridge *b) static void mtk_hdmi_hw_vid_black(struct mtk_hdmi *hdmi, bool black) { - regmap_update_bits(hdmi->regs, VIDEO_SOURCE_SEL, - VIDEO_CFG_4, black ? GEN_RGB : NORMAL_PATH); + regmap_update_bits(hdmi->regs, VIDEO_CFG_4, + VIDEO_SOURCE_SEL, black ? GEN_RGB : NORMAL_PATH); } static void mtk_hdmi_hw_make_reg_writable(struct mtk_hdmi *hdmi, bool enable) @@ -310,8 +310,8 @@ static void mtk_hdmi_hw_send_info_frame(struct mtk_hdmi *hdmi, u8 *buffer, static void mtk_hdmi_hw_send_aud_packet(struct mtk_hdmi *hdmi, bool enable) { - regmap_update_bits(hdmi->regs, AUDIO_PACKET_OFF, - GRL_SHIFT_R2, enable ? 0 : AUDIO_PACKET_OFF); + regmap_update_bits(hdmi->regs, GRL_SHIFT_R2, + AUDIO_PACKET_OFF, enable ? 0 : AUDIO_PACKET_OFF); } static void mtk_hdmi_hw_config_sys(struct mtk_hdmi *hdmi) From bb585591ebf00fb1f6a1fdd1ea96b5848bd9112d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 27 Aug 2025 21:43:09 +0200 Subject: [PATCH 0963/1307] fhandle: use more consistent rules for decoding file handle from userns Commit 620c266f39493 ("fhandle: relax open_by_handle_at() permission checks") relaxed the coditions for decoding a file handle from non init userns. The conditions are that that decoded dentry is accessible from the user provided mountfd (or to fs root) and that all the ancestors along the path have a valid id mapping in the userns. These conditions are intentionally more strict than the condition that the decoded dentry should be "lookable" by path from the mountfd. For example, the path /home/amir/dir/subdir is lookable by path from unpriv userns of user amir, because /home perms is 755, but the owner of /home does not have a valid id mapping in unpriv userns of user amir. The current code did not check that the decoded dentry itself has a valid id mapping in the userns. There is no security risk in that, because that final open still performs the needed permission checks, but this is inconsistent with the checks performed on the ancestors, so the behavior can be a bit confusing. Add the check for the decoded dentry itself, so that the entire path, including the last component has a valid id mapping in the userns. Fixes: 620c266f39493 ("fhandle: relax open_by_handle_at() permission checks") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250827194309.1259650-1-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/fhandle.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/fhandle.c b/fs/fhandle.c index 68a7d2861c58..a907ddfac4d5 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -207,6 +207,14 @@ static int vfs_dentry_acceptable(void *context, struct dentry *dentry) if (!ctx->flags) return 1; + /* + * Verify that the decoded dentry itself has a valid id mapping. + * In case the decoded dentry is the mountfd root itself, this + * verifies that the mountfd inode itself has a valid id mapping. + */ + if (!privileged_wrt_inode_uidgid(user_ns, idmap, d_inode(dentry))) + return 0; + /* * It's racy as we're not taking rename_lock but we're able to ignore * permissions and we just need an approximation whether we were able From 2cbe4ac193ed7172cfd825c0cc46ce4a41be4ba1 Mon Sep 17 00:00:00 2001 From: qaqland Date: Fri, 29 Aug 2025 14:40:48 +0800 Subject: [PATCH 0964/1307] ALSA: usb-audio: Add mute TLV for playback volumes on more devices Applying the quirk of that, the lowest Playback mixer volume setting mutes the audio output, on more devices. Suggested-by: Cryolitia PukNgae Signed-off-by: qaqland Link: https://patch.msgid.link/20250829-sound_quirk-v1-1-745529b44440@uniontech.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 8bc1e247cdf1..766db7d00cbc 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2199,6 +2199,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x0572, 0x1b08, /* Conexant Systems (Rockwell), Inc. */ + QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x0572, 0x1b09, /* Conexant Systems (Rockwell), Inc. */ QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x05a3, 0x9420, /* ELP HD USB Camera */ @@ -2243,6 +2245,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0bda, 0x498a, /* Realtek Semiconductor Corp. */ + QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x0c45, 0x6340, /* Sonix HD USB Camera */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x0c45, 0x636b, /* Microdia JP001 USB Camera */ @@ -2259,6 +2263,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1101, 0x0003, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x12d1, 0x3a07, /* Huawei Technologies Co., Ltd. */ + QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_MIC_RES_16), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ @@ -2349,6 +2355,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2a70, 0x1881, /* OnePlus Technology (Shenzhen) Co., Ltd. BE02T */ + QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ @@ -2365,6 +2373,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x339b, 0x3a07, /* Synaptics HONOR USB-C HEADSET */ + QUIRK_FLAG_MIXER_MIN_MUTE), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x534d, 0x0021, /* MacroSilicon MS2100/MS2106 */ From 78811dd56def1479777c9823f3c2139739ef5bf5 Mon Sep 17 00:00:00 2001 From: Cryolitia PukNgae Date: Fri, 29 Aug 2025 16:46:06 +0800 Subject: [PATCH 0965/1307] ALSA: docs: Add documents for recently changes in snd-usb-audio Changed: - ignore_ctl_error - lowlatency - skip_validation - quirk_flags[19:24] [ corrected a typo -- tiwai ] Signed-off-by: Cryolitia PukNgae Link: https://patch.msgid.link/20250829-sound-doc-v1-1-e0110452b03d@uniontech.com Signed-off-by: Takashi Iwai --- Documentation/sound/alsa-configuration.rst | 29 +++++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst index a45174d165eb..062b86522e4d 100644 --- a/Documentation/sound/alsa-configuration.rst +++ b/Documentation/sound/alsa-configuration.rst @@ -2253,8 +2253,15 @@ device_setup Default: 0x0000 ignore_ctl_error Ignore any USB-controller regarding mixer interface (default: no) + ``ignore_ctl_error=1`` may help when you get an error at accessing + the mixer element such as URB error -22. This happens on some + buggy USB device or the controller. This workaround corresponds to + the ``quirk_flags`` bit 14, too. autoclock Enable auto-clock selection for UAC2 devices (default: yes) +lowlatency + Enable low latency playback mode (default: yes). + Could disable it to switch back to the old mode if face a regression. quirk_alias Quirk alias list, pass strings like ``0123abcd:5678beef``, which applies the existing quirk for the device 5678:beef to a new @@ -2284,6 +2291,11 @@ delayed_register The driver prints a message like "Found post-registration device assignment: 1234abcd:04" for such a device, so that user can notice the need. +skip_validation + Skip unit descriptor validation (default: no). + The option is used to ignores the validation errors with the hexdump + of the unit descriptor instead of a driver probe error, so that we + can check its details. quirk_flags Contains the bit flags for various device specific workarounds. Applied to the corresponding card index. @@ -2307,6 +2319,16 @@ quirk_flags * bit 16: Set up the interface at first like UAC1 * bit 17: Apply the generic implicit feedback sync mode * bit 18: Don't apply implicit feedback sync mode + * bit 19: Don't closed interface during setting sample rate + * bit 20: Force an interface reset whenever stopping & restarting + a stream + * bit 21: Do not set PCM rate (frequency) when only one rate is + available for the given endpoint. + * bit 22: Set the fixed resolution 16 for Mic Capture Volume + * bit 23: Set the fixed resolution 384 for Mic Capture Volume + * bit 24: Set minimum volume control value as mute for devices + where the lowest playback value represents muted state instead + of minimum audible volume This module supports multiple devices, autoprobe and hotplugging. @@ -2314,10 +2336,9 @@ NB: ``nrpacks`` parameter can be modified dynamically via sysfs. Don't put the value over 20. Changing via sysfs has no sanity check. -NB: ``ignore_ctl_error=1`` may help when you get an error at accessing -the mixer element such as URB error -22. This happens on some -buggy USB device or the controller. This workaround corresponds to -the ``quirk_flags`` bit 14, too. +NB: ``ignore_ctl_error=1`` just provides a quick way to work around the +issues. If you have a buggy device that requires these quirks, please +report it to the upstream. NB: ``quirk_alias`` option is provided only for testing / development. If you want to have a proper support, contact to upstream for From 89e7353f522f5cf70cb48c01ce2dcdcb275b8022 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 25 Aug 2025 12:53:28 +0100 Subject: [PATCH 0966/1307] spi: microchip-core-qspi: stop checking viability of op->max_freq in supports_op callback In commit 13529647743d9 ("spi: microchip-core-qspi: Support per spi-mem operation frequency switches") the logic for checking the viability of op->max_freq in mchp_coreqspi_setup_clock() was copied into mchp_coreqspi_supports_op(). Unfortunately, op->max_freq is not valid when this function is called during probe but is instead zero. Accordingly, baud_rate_val is calculated to be INT_MAX due to division by zero, causing probe of the attached memory device to fail. Seemingly spi-microchip-core-qspi was the only driver that had such a modification made to its supports_op callback when the per_op_freq capability was added, so just remove it to restore prior functionality. CC: stable@vger.kernel.org Reported-by: Valentina Fernandez Fixes: 13529647743d9 ("spi: microchip-core-qspi: Support per spi-mem operation frequency switches") Signed-off-by: Conor Dooley Message-ID: <20250825-during-ploy-939bdd068593@spud> Signed-off-by: Mark Brown --- drivers/spi/spi-microchip-core-qspi.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index d13a9b755c7f..8dc98b17f77b 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c @@ -531,10 +531,6 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o static bool mchp_coreqspi_supports_op(struct spi_mem *mem, const struct spi_mem_op *op) { - struct mchp_coreqspi *qspi = spi_controller_get_devdata(mem->spi->controller); - unsigned long clk_hz; - u32 baud_rate_val; - if (!spi_mem_default_supports_op(mem, op)) return false; @@ -557,14 +553,6 @@ static bool mchp_coreqspi_supports_op(struct spi_mem *mem, const struct spi_mem_ return false; } - clk_hz = clk_get_rate(qspi->clk); - if (!clk_hz) - return false; - - baud_rate_val = DIV_ROUND_UP(clk_hz, 2 * op->max_freq); - if (baud_rate_val > MAX_DIVIDER || baud_rate_val < MIN_DIVIDER) - return false; - return true; } From aa28991fd5dc4c01a40caab2bd9af8c5e06f9899 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Fri, 29 Aug 2025 07:38:59 -0400 Subject: [PATCH 0967/1307] platform/x86/intel: power-domains: Use topology_logical_package_id() for package ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, tpmi_get_logical_id() calls topology_physical_package_id() to set the pkg_id of the info structure. Since some VM hosts assign non contiguous package IDs, topology_physical_package_id() can return a larger value than topology_max_packages(). This will result in an invalid reference into tpmi_power_domain_mask[] as that is allocatead based on topology_max_packages() as the maximum package ID. Fixes: 17ca2780458c ("platform/x86/intel: TPMI domain id and CPU mapping") Signed-off-by: David Arcari Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20250829113859.1772827-1-darcari@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/tpmi_power_domains.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/tpmi_power_domains.c b/drivers/platform/x86/intel/tpmi_power_domains.c index 9d8247bb9cfa..8641353b2e06 100644 --- a/drivers/platform/x86/intel/tpmi_power_domains.c +++ b/drivers/platform/x86/intel/tpmi_power_domains.c @@ -178,7 +178,7 @@ static int tpmi_get_logical_id(unsigned int cpu, struct tpmi_cpu_info *info) info->punit_thread_id = FIELD_GET(LP_ID_MASK, data); info->punit_core_id = FIELD_GET(MODULE_ID_MASK, data); - info->pkg_id = topology_physical_package_id(cpu); + info->pkg_id = topology_logical_package_id(cpu); info->linux_cpu = cpu; return 0; From 1180c79fbf36e4c02e76ae4658509523437e52a4 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 30 Jul 2025 23:17:15 +0300 Subject: [PATCH 0968/1307] hwmon: mlxreg-fan: Prevent fans from getting stuck at 0 RPM The fans controlled by the driver can get stuck at 0 RPM if they are configured below a 20% duty cycle. The driver tries to avoid this by enforcing a minimum duty cycle of 20%, but this is done after the fans are registered with the thermal subsystem. This is too late as the thermal subsystem can set their current state before the driver is able to enforce the minimum duty cycle. Fix by setting the minimum duty cycle before registering the fans with the thermal subsystem. Fixes: d7efb2ebc7b3 ("hwmon: (mlxreg-fan) Extend driver to support multiply cooling devices") Reported-by: Nikolay Aleksandrov Tested-by: Nikolay Aleksandrov Signed-off-by: Ido Schimmel Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20250730201715.1111133-1-vadimp@nvidia.com Signed-off-by: Guenter Roeck --- drivers/hwmon/mlxreg-fan.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c index a5f89aab3fb4..c25a54d5b39a 100644 --- a/drivers/hwmon/mlxreg-fan.c +++ b/drivers/hwmon/mlxreg-fan.c @@ -561,15 +561,14 @@ static int mlxreg_fan_cooling_config(struct device *dev, struct mlxreg_fan *fan) if (!pwm->connected) continue; pwm->fan = fan; + /* Set minimal PWM speed. */ + pwm->last_hwmon_state = MLXREG_FAN_PWM_DUTY2STATE(MLXREG_FAN_MIN_DUTY); pwm->cdev = devm_thermal_of_cooling_device_register(dev, NULL, mlxreg_fan_name[i], pwm, &mlxreg_fan_cooling_ops); if (IS_ERR(pwm->cdev)) { dev_err(dev, "Failed to register cooling device\n"); return PTR_ERR(pwm->cdev); } - - /* Set minimal PWM speed. */ - pwm->last_hwmon_state = MLXREG_FAN_PWM_DUTY2STATE(MLXREG_FAN_MIN_DUTY); } return 0; From 98fd069dd87386d87eaf439e3c7b5767618926d2 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 29 Aug 2025 15:05:10 +1200 Subject: [PATCH 0969/1307] hwmon: (ina238) Correctly clamp temperature ina238_write_temp() was attempting to clamp the user input but was throwing away the result. Ensure that we clamp the value to the appropriate range before it is converted into a register value. Fixes: 0d9f596b1fe3 ("hwmon: (ina238) Modify the calculation formula to adapt to different chips") Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20250829030512.1179998-3-chris.packham@alliedtelesis.co.nz Signed-off-by: Guenter Roeck --- drivers/hwmon/ina238.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ina238.c b/drivers/hwmon/ina238.c index 5a394eeff676..4d3dc018ead9 100644 --- a/drivers/hwmon/ina238.c +++ b/drivers/hwmon/ina238.c @@ -572,7 +572,7 @@ static int ina238_write_temp(struct device *dev, u32 attr, long val) return -EOPNOTSUPP; /* Signed */ - regval = clamp_val(val, -40000, 125000); + val = clamp_val(val, -40000, 125000); regval = div_s64(val * 10000, data->config->temp_lsb) << data->config->temp_shift; regval = clamp_val(regval, S16_MIN, S16_MAX) & (0xffff << data->config->temp_shift); From 2d41a4bfee6e9941ff19728c691ab00d19cf882a Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Wed, 27 Aug 2025 13:29:17 +0800 Subject: [PATCH 0970/1307] drm/amdgpu/sdma: bump firmware version checks for user queue support Using the previous firmware could lead to problems with PROTECTED_FENCE_SIGNAL commands, specifically causing register conflicts between MCU_DBG0 and MCU_DBG1. The updated firmware versions ensure proper alignment and unification of the SDMA_SUBOP_PROTECTED_FENCE_SIGNAL value with SDMA 7.x, resolving these hardware coordination issues Fixes: e8cca30d8b34 ("drm/amdgpu/sdma6: add ucode version checks for userq support") Acked-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit aab8b689aded255425db3d80c0030d1ba02fe2ef) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index e6d8eddda2bf..db6e41967f12 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1377,7 +1377,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(6, 0, 0): - if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq) + if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; case IP_VERSION(6, 0, 1): @@ -1385,11 +1385,11 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; case IP_VERSION(6, 0, 2): - if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq) + if ((adev->sdma.instance[0].fw_version >= 23) && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; case IP_VERSION(6, 0, 3): - if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq) + if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq) adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; break; case IP_VERSION(6, 1, 0): From 5171848bdfb8bf87f38331d3f8c0fd5e2b676d3e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 Aug 2025 14:24:31 -0400 Subject: [PATCH 0971/1307] drm/amdgpu/mes11: make MES_MISC_OP_CHANGE_CONFIG failure non-fatal If the firmware is too old, just warn and return success. Fixes: 27b791514789 ("drm/amdgpu/mes: keep enforce isolation up to date") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4414 Cc: shaoyun.Liu@amd.com Reviewed-by: Shaoyun.liu Signed-off-by: Alex Deucher (cherry picked from commit 9f28af76fab0948b59673f69c10aeec47de11c60) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 28eb846280dd..3f6a828cad8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -641,8 +641,9 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, break; case MES_MISC_OP_CHANGE_CONFIG: if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { - dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); - return -EINVAL; + dev_warn_once(mes->adev->dev, + "MES FW version must be larger than 0x63 to support limit single process feature.\n"); + return 0; } misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; misc_pkt.change_config.opcode = From a8b79b09185de868e478eb1b6f1fd8deddb0604d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 28 Aug 2025 09:37:05 -0500 Subject: [PATCH 0972/1307] drm/amd: Re-enable common modes for eDP and LVDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Although compositors will add their own modes, Xorg won't use it's own modes and will only stick to modes advertised by the driver. This mean a user that used to pick 1024x768 could no longer access it unless the panel's native resolution was 1024x768. [How] Revert commit 6d396e7ac1ce3 ("drm/amd/display: Disable common modes for LVDS") and commit 7948afb46af92 ("drm/amd/display: Disable common modes for eDP"). The panel will still use scaling for any non-native modes due to commit 978fa2f6d0b12 ("drm/amd/display: Use scaling for non-native resolutions on eDP") Reported-by: Marek Marczykowski-Górecki Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4538 Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20250828140856.2887993-1-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit c2fbf72fe3c2d08856e834ca43328a8829a261d8) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a0ca3b2c6bd8..7808a647a306 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8381,8 +8381,7 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 1920, 1080); } else { amdgpu_dm_connector_ddc_get_modes(connector, drm_edid); - if (encoder && (connector->connector_type != DRM_MODE_CONNECTOR_eDP) && - (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)) + if (encoder) amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, drm_edid); } From 71403f58b4bb6c13b71c05505593a355f697fd94 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 6 Aug 2025 10:47:50 -0400 Subject: [PATCH 0973/1307] drm/amdgpu: drop hw access in non-DC audio fini We already disable the audio pins in hw_fini so there is no need to do it again in sw_fini. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4481 Cc: oushixiong Signed-off-by: Alex Deucher (cherry picked from commit 5eeb16ca727f11278b2917fd4311a7d7efb0bbd6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 5 ----- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 5 ----- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 5 ----- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 5 ----- 4 files changed, 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index bf7c22f81cda..ba73518f5cdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1462,17 +1462,12 @@ static int dce_v10_0_audio_init(struct amdgpu_device *adev) static void dce_v10_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 47e05783c4a0..b01d88d078fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1511,17 +1511,12 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev) static void dce_v11_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 276c025c4c03..81760a26f2ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1451,17 +1451,12 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev) static void dce_v6_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index e62ccf9eb73d..19a265bd4d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1443,17 +1443,12 @@ static int dce_v8_0_audio_init(struct amdgpu_device *adev) static void dce_v8_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } From 3ebf766c35464ebdeefb6068246267147503dc04 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Wed, 20 Aug 2025 15:46:52 -0400 Subject: [PATCH 0974/1307] drm/amd/display: Clear the CUR_ENABLE register on DCN314 w/out DPP PG [Why&How] ON DCN314, clearing DPP SW structure without power gating it can cause a double cursor in full screen with non-native scaling. A W/A that clears CURSOR0_CONTROL cursor_enable flag if dcn10_plane_atomic_power_down is called and DPP power gating is disabled. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4168 Reviewed-by: Sun peng (Leo) Li Signed-off-by: Ivan Lipski Signed-off-by: Alex Hung Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 645f74f1dc119dad5a2c7bbc05cc315e76883011) Cc: stable@vger.kernel.org --- .../drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c | 9 +++ .../drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h | 2 + .../drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c | 1 + .../amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 72 +++++++++++++++++++ .../amd/display/dc/hwss/dcn314/dcn314_hwseq.h | 2 + .../amd/display/dc/hwss/dcn314/dcn314_init.c | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h | 3 + 7 files changed, 90 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c index 75fb77bca83b..01480a04f85e 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c @@ -520,6 +520,15 @@ void dpp1_dppclk_control( REG_UPDATE(DPP_CONTROL, DPP_CLOCK_ENABLE, 0); } +void dpp_force_disable_cursor(struct dpp *dpp_base) +{ + struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); + + /* Force disable cursor */ + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, 0); + dpp_base->pos.cur0_ctl.bits.cur0_enable = 0; +} + static const struct dpp_funcs dcn10_dpp_funcs = { .dpp_read_state = dpp_read_state, .dpp_reset = dpp_reset, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h index c48139bed11f..f466182963f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h @@ -1525,4 +1525,6 @@ void dpp1_construct(struct dcn10_dpp *dpp1, void dpp1_cm_get_gamut_remap(struct dpp *dpp_base, struct dpp_grph_csc_adjustment *adjust); +void dpp_force_disable_cursor(struct dpp *dpp_base); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index 2d70586cef40..09be2a90cc79 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -1494,6 +1494,7 @@ static struct dpp_funcs dcn30_dpp_funcs = { .dpp_dppclk_control = dpp1_dppclk_control, .dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier, .dpp_get_gamut_remap = dpp3_cm_get_gamut_remap, + .dpp_force_disable_cursor = dpp_force_disable_cursor, }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index e68f21fd5f0f..560984533950 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -528,3 +528,75 @@ void dcn314_disable_link_output(struct dc_link *link, apply_symclk_on_tx_off_wa(link); } + +/** + * dcn314_dpp_pg_control - DPP power gate control. + * + * @hws: dce_hwseq reference. + * @dpp_inst: DPP instance reference. + * @power_on: true if we want to enable power gate, false otherwise. + * + * Enable or disable power gate in the specific DPP instance. + * If power gating is disabled, will force disable cursor in the DPP instance. + */ +void dcn314_dpp_pg_control( + struct dce_hwseq *hws, + unsigned int dpp_inst, + bool power_on) +{ + uint32_t power_gate = power_on ? 0 : 1; + uint32_t pwr_status = power_on ? 0 : 2; + + + if (hws->ctx->dc->debug.disable_dpp_power_gate) { + /* Workaround for DCN314 with disabled power gating */ + if (!power_on) { + + /* Force disable cursor if power gating is disabled */ + struct dpp *dpp = hws->ctx->dc->res_pool->dpps[dpp_inst]; + if (dpp && dpp->funcs->dpp_force_disable_cursor) + dpp->funcs->dpp_force_disable_cursor(dpp); + } + return; + } + if (REG(DOMAIN1_PG_CONFIG) == 0) + return; + + switch (dpp_inst) { + case 0: /* DPP0 */ + REG_UPDATE(DOMAIN1_PG_CONFIG, + DOMAIN1_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN1_PG_STATUS, + DOMAIN1_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 1: /* DPP1 */ + REG_UPDATE(DOMAIN3_PG_CONFIG, + DOMAIN3_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN3_PG_STATUS, + DOMAIN3_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 2: /* DPP2 */ + REG_UPDATE(DOMAIN5_PG_CONFIG, + DOMAIN5_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN5_PG_STATUS, + DOMAIN5_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 3: /* DPP3 */ + REG_UPDATE(DOMAIN7_PG_CONFIG, + DOMAIN7_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN7_PG_STATUS, + DOMAIN7_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h index 2305ad282f21..6c072d0274ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h @@ -47,4 +47,6 @@ void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, void dcn314_disable_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal); +void dcn314_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on); + #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index f5112742edf9..9f454fa90e65 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -141,6 +141,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .enable_power_gating_plane = dcn314_enable_power_gating_plane, .dpp_root_clock_control = dcn314_dpp_root_clock_control, .hubp_pg_control = dcn31_hubp_pg_control, + .dpp_pg_control = dcn314_dpp_pg_control, .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, .update_odm = dcn314_update_odm, .dsc_pg_control = dcn314_dsc_pg_control, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 0c5675d1c593..1b7c085dc2cc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -349,6 +349,9 @@ struct dpp_funcs { struct dpp *dpp_base, enum dc_color_space color_space, struct dc_csc_transform cursor_csc_color_matrix); + + void (*dpp_force_disable_cursor)(struct dpp *dpp_base); + }; From 0ef5c4e4dbbfcebaa9b2eca18097b43016727dfe Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 29 Aug 2025 12:16:32 +1000 Subject: [PATCH 0975/1307] nouveau: fix disabling the nonstall irq due to storm code Nouveau has code that when it gets an IRQ with no allowed handler it disables it to avoid storms. However with nonstall interrupts, we often disable them from the drm driver, but still request their emission via the push submission. Just don't disable nonstall irqs ever in normal operation, the event handling code will filter them out, and the driver will just enable/disable them at load time. This fixes timeouts we've been seeing on/off for a long time, but they became a lot more noticeable on Blackwell. This doesn't fix all of them, there is a subsequent fence emission fix to fix the last few. Fixes: 3ebd64aa3c4f ("drm/nouveau/intr: support multiple trees, and explicit interfaces") Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Link: https://lore.kernel.org/r/20250829021633.1674524-1-airlied@gmail.com [ Fix a typo and a minor checkpatch.pl warning; remove "v2" from commit subject. - Danilo ] Signed-off-by: Danilo Krummrich --- .../gpu/drm/nouveau/nvkm/engine/fifo/base.c | 2 ++ .../gpu/drm/nouveau/nvkm/engine/fifo/ga100.c | 23 ++++++++++++------- .../gpu/drm/nouveau/nvkm/engine/fifo/ga102.c | 1 + .../gpu/drm/nouveau/nvkm/engine/fifo/priv.h | 2 ++ .../nouveau/nvkm/subdev/gsp/rm/r535/fifo.c | 1 + 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c index fdffa0391b31..6fd4e60634fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c @@ -350,6 +350,8 @@ nvkm_fifo_dtor(struct nvkm_engine *engine) nvkm_chid_unref(&fifo->chid); nvkm_event_fini(&fifo->nonstall.event); + if (fifo->func->nonstall_dtor) + fifo->func->nonstall_dtor(fifo); mutex_destroy(&fifo->mutex); if (fifo->func->dtor) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c index e74493a4569e..6848a56f20c0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c @@ -517,19 +517,11 @@ ga100_fifo_nonstall_intr(struct nvkm_inth *inth) static void ga100_fifo_nonstall_block(struct nvkm_event *event, int type, int index) { - struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event); - struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0); - - nvkm_inth_block(&runl->nonstall.inth); } static void ga100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index) { - struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event); - struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0); - - nvkm_inth_allow(&runl->nonstall.inth); } const struct nvkm_event_func @@ -564,12 +556,26 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo) if (ret) return ret; + nvkm_inth_allow(&runl->nonstall.inth); + nr = max(nr, runl->id + 1); } return nr; } +void +ga100_fifo_nonstall_dtor(struct nvkm_fifo *fifo) +{ + struct nvkm_runl *runl; + + nvkm_runl_foreach(runl, fifo) { + if (runl->nonstall.vector < 0) + continue; + nvkm_inth_block(&runl->nonstall.inth); + } +} + int ga100_fifo_runl_ctor(struct nvkm_fifo *fifo) { @@ -599,6 +605,7 @@ ga100_fifo = { .runl_ctor = ga100_fifo_runl_ctor, .mmu_fault = &tu102_fifo_mmu_fault, .nonstall_ctor = ga100_fifo_nonstall_ctor, + .nonstall_dtor = ga100_fifo_nonstall_dtor, .nonstall = &ga100_fifo_nonstall, .runl = &ga100_runl, .runq = &ga100_runq, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c index 755235f55b3a..18a0b1f4eab7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c @@ -30,6 +30,7 @@ ga102_fifo = { .runl_ctor = ga100_fifo_runl_ctor, .mmu_fault = &tu102_fifo_mmu_fault, .nonstall_ctor = ga100_fifo_nonstall_ctor, + .nonstall_dtor = ga100_fifo_nonstall_dtor, .nonstall = &ga100_fifo_nonstall, .runl = &ga100_runl, .runq = &ga100_runq, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h index 5e81ae195329..fff1428ef267 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h @@ -41,6 +41,7 @@ struct nvkm_fifo_func { void (*start)(struct nvkm_fifo *, unsigned long *); int (*nonstall_ctor)(struct nvkm_fifo *); + void (*nonstall_dtor)(struct nvkm_fifo *); const struct nvkm_event_func *nonstall; const struct nvkm_runl_func *runl; @@ -200,6 +201,7 @@ u32 tu102_chan_doorbell_handle(struct nvkm_chan *); int ga100_fifo_runl_ctor(struct nvkm_fifo *); int ga100_fifo_nonstall_ctor(struct nvkm_fifo *); +void ga100_fifo_nonstall_dtor(struct nvkm_fifo *); extern const struct nvkm_event_func ga100_fifo_nonstall; extern const struct nvkm_runl_func ga100_runl; extern const struct nvkm_runq_func ga100_runq; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c index 1ac5628c5140..4ed54b386a60 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c @@ -601,6 +601,7 @@ r535_fifo_new(const struct nvkm_fifo_func *hw, struct nvkm_device *device, rm->chan.func = &r535_chan; rm->nonstall = &ga100_fifo_nonstall; rm->nonstall_ctor = ga100_fifo_nonstall_ctor; + rm->nonstall_dtor = ga100_fifo_nonstall_dtor; return nvkm_fifo_new_(rm, device, type, inst, pfifo); } From 2cb66ae6040fd3cb058c3391b180f378fc0e3e2f Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 29 Aug 2025 12:16:33 +1000 Subject: [PATCH 0976/1307] nouveau: Membar before between semaphore writes and the interrupt This ensures that the memory write and the interrupt are properly ordered and we won't wake up the kernel before the semaphore write has hit memory. Fixes: b1ca384772b6 ("drm/nouveau/gv100-: switch to volta semaphore methods") Cc: stable@vger.kernel.org Signed-off-by: Faith Ekstrand Signed-off-by: Dave Airlie Link: https://lore.kernel.org/r/20250829021633.1674524-2-airlied@gmail.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/gv100_fence.c | 7 +- .../drm/nouveau/include/nvhw/class/clc36f.h | 85 +++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/gv100_fence.c b/drivers/gpu/drm/nouveau/gv100_fence.c index cccdeca72002..317e516c4ec7 100644 --- a/drivers/gpu/drm/nouveau/gv100_fence.c +++ b/drivers/gpu/drm/nouveau/gv100_fence.c @@ -18,7 +18,7 @@ gv100_fence_emit32(struct nouveau_channel *chan, u64 virtual, u32 sequence) struct nvif_push *push = &chan->chan.push; int ret; - ret = PUSH_WAIT(push, 8); + ret = PUSH_WAIT(push, 13); if (ret) return ret; @@ -32,6 +32,11 @@ gv100_fence_emit32(struct nouveau_channel *chan, u64 virtual, u32 sequence) NVDEF(NVC36F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) | NVDEF(NVC36F, SEM_EXECUTE, RELEASE_TIMESTAMP, DIS)); + PUSH_MTHD(push, NVC36F, MEM_OP_A, 0, + MEM_OP_B, 0, + MEM_OP_C, NVDEF(NVC36F, MEM_OP_C, MEMBAR_TYPE, SYS_MEMBAR), + MEM_OP_D, NVDEF(NVC36F, MEM_OP_D, OPERATION, MEMBAR)); + PUSH_MTHD(push, NVC36F, NON_STALL_INTERRUPT, 0); PUSH_KICK(push); diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h b/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h index 8735dda4c8a7..338f74b9f501 100644 --- a/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h +++ b/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h @@ -7,6 +7,91 @@ #define NVC36F_NON_STALL_INTERRUPT (0x00000020) #define NVC36F_NON_STALL_INTERRUPT_HANDLE 31:0 +// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for +// specifying the page address for a targeted TLB invalidate and the uTLB for +// a targeted REPLAY_CANCEL for UVM. +// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly +// rearranged fields. +#define NVC36F_MEM_OP_A (0x00000028) +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 6:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12 +#define NVC36F_MEM_OP_B (0x0000002c) +#define NVC36F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0 +#define NVC36F_MEM_OP_C (0x00000030) +#define NVC36F_MEM_OP_C_MEMBAR_TYPE 2:0 +#define NVC36F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000 +#define NVC36F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0 +// MEM_OP_D MUST be preceded by MEM_OPs A-C. +#define NVC36F_MEM_OP_D (0x00000034) +#define NVC36F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_D_OPERATION 31:27 +#define NVC36F_MEM_OP_D_OPERATION_MEMBAR 0x00000005 +#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009 +#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a +#define NVC36F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d +#define NVC36F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e +// CLEAN_LINES is an alias for Tegra/GPU IP usage +#define NVC36F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e +#define NVC36F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f +#define NVC36F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010 +#define NVC36F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015 +#define NVC36F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3 #define NVC36F_SEM_ADDR_LO (0x0000005c) #define NVC36F_SEM_ADDR_LO_OFFSET 31:2 #define NVC36F_SEM_ADDR_HI (0x00000060) From 31f1a960ad1a14def94fa0b8c25d62b4c032813f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Aug 2025 09:02:16 -0700 Subject: [PATCH 0977/1307] NFSv4: Don't clear capabilities that won't be reset Don't clear the capabilities that are not going to get reset by the call to _nfs4_server_capabilities(). Reported-by: Scott Haiden Fixes: b01f21cacde9 ("NFS: Fix the setting of capabilities when automounting a new filesystem") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7d2b67e06cc3..5b92fcf45dd7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4092,7 +4092,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) }; int err; - nfs_server_set_init_caps(server); do { err = nfs4_handle_exception(server, _nfs4_server_capabilities(server, fhandle), From dd5a8621b886b02f8341c5d4ea68eb2c552ebd3e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Aug 2025 09:07:22 -0700 Subject: [PATCH 0978/1307] NFSv4: Clear the NFS_CAP_FS_LOCATIONS flag if it is not set _nfs4_server_capabilities() is expected to clear any flags that are not supported by the server. Fixes: 8a59bb93b7e3 ("NFSv4 store server support for fs_location attribute") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5b92fcf45dd7..d0f91d9430f6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4013,8 +4013,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f res.attr_bitmask[2]; } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); - server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | - NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL); + server->caps &= + ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | + NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); server->fattr_valid = NFS_ATTR_FATTR_V4; if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) From b3ac33436030bce37ecb3dcae581ecfaad28078c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Aug 2025 09:12:30 -0700 Subject: [PATCH 0979/1307] NFSv4: Clear NFS_CAP_OPEN_XOR and NFS_CAP_DELEGTIME if not supported _nfs4_server_capabilities() should clear capabilities that are not supported by the server. Fixes: d2a00cceb93a ("NFSv4: Detect support for OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d0f91d9430f6..ce61253efd45 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4015,7 +4015,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | - NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS); + NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS | + NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME); server->fattr_valid = NFS_ATTR_FATTR_V4; if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) From 4fb2b677fc1f70ee642c0beecc3cabf226ef5707 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 29 Aug 2025 09:15:12 -0700 Subject: [PATCH 0980/1307] NFSv4: Clear the NFS_CAP_XATTR flag if not supported by the server nfs_server_set_fsinfo() shouldn't assume that NFS_CAP_XATTR is unset on entry to the function. Fixes: b78ef845c35d ("NFSv4.2: query the server for extended attribute support") Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8fb4a950dd55..4e3dcc157a83 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -888,6 +888,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, if (fsinfo->xattr_support) server->caps |= NFS_CAP_XATTR; + else + server->caps &= ~NFS_CAP_XATTR; #endif } From 28010791193a4503f054e8d69a950ef815deb539 Mon Sep 17 00:00:00 2001 From: Ivan Pravdin Date: Wed, 27 Aug 2025 10:53:25 -0400 Subject: [PATCH 0981/1307] Bluetooth: vhci: Prevent use-after-free by removing debugfs files early Move the creation of debugfs files into a dedicated function, and ensure they are explicitly removed during vhci_release(), before associated data structures are freed. Previously, debugfs files such as "force_suspend", "force_wakeup", and others were created under hdev->debugfs but not removed in vhci_release(). Since vhci_release() frees the backing vhci_data structure, any access to these files after release would result in use-after-free errors. Although hdev->debugfs is later freed in hci_release_dev(), user can access files after vhci_data is freed but before hdev->debugfs is released. Fixes: ab4e4380d4e1 ("Bluetooth: Add vhci devcoredump support") Signed-off-by: Ivan Pravdin Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_vhci.c | 57 ++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index f7d8c3c00655..2fef08254d78 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -380,6 +380,28 @@ static const struct file_operations force_devcoredump_fops = { .write = force_devcd_write, }; +static void vhci_debugfs_init(struct vhci_data *data) +{ + struct hci_dev *hdev = data->hdev; + + debugfs_create_file("force_suspend", 0644, hdev->debugfs, data, + &force_suspend_fops); + + debugfs_create_file("force_wakeup", 0644, hdev->debugfs, data, + &force_wakeup_fops); + + if (IS_ENABLED(CONFIG_BT_MSFTEXT)) + debugfs_create_file("msft_opcode", 0644, hdev->debugfs, data, + &msft_opcode_fops); + + if (IS_ENABLED(CONFIG_BT_AOSPEXT)) + debugfs_create_file("aosp_capable", 0644, hdev->debugfs, data, + &aosp_capable_fops); + + debugfs_create_file("force_devcoredump", 0644, hdev->debugfs, data, + &force_devcoredump_fops); +} + static int __vhci_create_device(struct vhci_data *data, __u8 opcode) { struct hci_dev *hdev; @@ -434,22 +456,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) return -EBUSY; } - debugfs_create_file("force_suspend", 0644, hdev->debugfs, data, - &force_suspend_fops); - - debugfs_create_file("force_wakeup", 0644, hdev->debugfs, data, - &force_wakeup_fops); - - if (IS_ENABLED(CONFIG_BT_MSFTEXT)) - debugfs_create_file("msft_opcode", 0644, hdev->debugfs, data, - &msft_opcode_fops); - - if (IS_ENABLED(CONFIG_BT_AOSPEXT)) - debugfs_create_file("aosp_capable", 0644, hdev->debugfs, data, - &aosp_capable_fops); - - debugfs_create_file("force_devcoredump", 0644, hdev->debugfs, data, - &force_devcoredump_fops); + if (!IS_ERR_OR_NULL(hdev->debugfs)) + vhci_debugfs_init(data); hci_skb_pkt_type(skb) = HCI_VENDOR_PKT; @@ -651,6 +659,21 @@ static int vhci_open(struct inode *inode, struct file *file) return 0; } +static void vhci_debugfs_remove(struct hci_dev *hdev) +{ + debugfs_lookup_and_remove("force_suspend", hdev->debugfs); + + debugfs_lookup_and_remove("force_wakeup", hdev->debugfs); + + if (IS_ENABLED(CONFIG_BT_MSFTEXT)) + debugfs_lookup_and_remove("msft_opcode", hdev->debugfs); + + if (IS_ENABLED(CONFIG_BT_AOSPEXT)) + debugfs_lookup_and_remove("aosp_capable", hdev->debugfs); + + debugfs_lookup_and_remove("force_devcoredump", hdev->debugfs); +} + static int vhci_release(struct inode *inode, struct file *file) { struct vhci_data *data = file->private_data; @@ -662,6 +685,8 @@ static int vhci_release(struct inode *inode, struct file *file) hdev = data->hdev; if (hdev) { + if (!IS_ERR_OR_NULL(hdev->debugfs)) + vhci_debugfs_remove(hdev); hci_unregister_dev(hdev); hci_free_dev(hdev); } From 862c628108562d8c7a516a900034823b381d3cba Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 27 Aug 2025 20:40:14 +0000 Subject: [PATCH 0982/1307] Bluetooth: Fix use-after-free in l2cap_sock_cleanup_listen() syzbot reported the splat below without a repro. In the splat, a single thread calling bt_accept_dequeue() freed sk and touched it after that. The root cause would be the racy l2cap_sock_cleanup_listen() call added by the cited commit. bt_accept_dequeue() is called under lock_sock() except for l2cap_sock_release(). Two threads could see the same socket during the list iteration in bt_accept_dequeue(): CPU1 CPU2 (close()) ---- ---- sock_hold(sk) sock_hold(sk); lock_sock(sk) <-- block close() sock_put(sk) bt_accept_unlink(sk) sock_put(sk) <-- refcnt by bt_accept_enqueue() release_sock(sk) lock_sock(sk) sock_put(sk) bt_accept_unlink(sk) sock_put(sk) <-- last refcnt bt_accept_unlink(sk) <-- UAF Depending on the timing, the other thread could show up in the "Freed by task" part. Let's call l2cap_sock_cleanup_listen() under lock_sock() in l2cap_sock_release(). [0]: BUG: KASAN: slab-use-after-free in debug_spin_lock_before kernel/locking/spinlock_debug.c:86 [inline] BUG: KASAN: slab-use-after-free in do_raw_spin_lock+0x26f/0x2b0 kernel/locking/spinlock_debug.c:115 Read of size 4 at addr ffff88803b7eb1c4 by task syz.5.3276/16995 CPU: 3 UID: 0 PID: 16995 Comm: syz.5.3276 Not tainted syzkaller #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xcd/0x630 mm/kasan/report.c:482 kasan_report+0xe0/0x110 mm/kasan/report.c:595 debug_spin_lock_before kernel/locking/spinlock_debug.c:86 [inline] do_raw_spin_lock+0x26f/0x2b0 kernel/locking/spinlock_debug.c:115 spin_lock_bh include/linux/spinlock.h:356 [inline] release_sock+0x21/0x220 net/core/sock.c:3746 bt_accept_dequeue+0x505/0x600 net/bluetooth/af_bluetooth.c:312 l2cap_sock_cleanup_listen+0x5c/0x2a0 net/bluetooth/l2cap_sock.c:1451 l2cap_sock_release+0x5c/0x210 net/bluetooth/l2cap_sock.c:1425 __sock_release+0xb3/0x270 net/socket.c:649 sock_close+0x1c/0x30 net/socket.c:1439 __fput+0x3ff/0xb70 fs/file_table.c:468 task_work_run+0x14d/0x240 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xeb/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x3f6/0x4c0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f2accf8ebe9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffdb6cb1378 EFLAGS: 00000246 ORIG_RAX: 00000000000001b4 RAX: 0000000000000000 RBX: 00000000000426fb RCX: 00007f2accf8ebe9 RDX: 0000000000000000 RSI: 000000000000001e RDI: 0000000000000003 RBP: 00007f2acd1b7da0 R08: 0000000000000001 R09: 00000012b6cb166f R10: 0000001b30e20000 R11: 0000000000000246 R12: 00007f2acd1b609c R13: 00007f2acd1b6090 R14: ffffffffffffffff R15: 00007ffdb6cb1490 Allocated by task 5326: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:388 [inline] __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:405 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4365 [inline] __kmalloc_noprof+0x223/0x510 mm/slub.c:4377 kmalloc_noprof include/linux/slab.h:909 [inline] sk_prot_alloc+0x1a8/0x2a0 net/core/sock.c:2239 sk_alloc+0x36/0xc20 net/core/sock.c:2295 bt_sock_alloc+0x3b/0x3a0 net/bluetooth/af_bluetooth.c:151 l2cap_sock_alloc.constprop.0+0x33/0x1d0 net/bluetooth/l2cap_sock.c:1894 l2cap_sock_new_connection_cb+0x101/0x240 net/bluetooth/l2cap_sock.c:1482 l2cap_connect_cfm+0x4c4/0xf80 net/bluetooth/l2cap_core.c:7287 hci_connect_cfm include/net/bluetooth/hci_core.h:2050 [inline] hci_remote_features_evt+0x4dd/0x970 net/bluetooth/hci_event.c:3712 hci_event_func net/bluetooth/hci_event.c:7519 [inline] hci_event_packet+0xa0d/0x11c0 net/bluetooth/hci_event.c:7573 hci_rx_work+0x2c5/0x16b0 net/bluetooth/hci_core.c:4071 process_one_work+0x9cf/0x1b70 kernel/workqueue.c:3236 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3400 kthread+0x3c2/0x780 kernel/kthread.c:463 ret_from_fork+0x5d7/0x6f0 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Freed by task 16995: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:243 [inline] __kasan_slab_free+0x60/0x70 mm/kasan/common.c:275 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2417 [inline] slab_free mm/slub.c:4680 [inline] kfree+0x2b4/0x4d0 mm/slub.c:4879 sk_prot_free net/core/sock.c:2278 [inline] __sk_destruct+0x75f/0x9a0 net/core/sock.c:2373 sk_destruct+0xc2/0xf0 net/core/sock.c:2401 __sk_free+0xf4/0x3e0 net/core/sock.c:2412 sk_free+0x6a/0x90 net/core/sock.c:2423 sock_put include/net/sock.h:1960 [inline] bt_accept_unlink+0x245/0x2e0 net/bluetooth/af_bluetooth.c:262 bt_accept_dequeue+0x517/0x600 net/bluetooth/af_bluetooth.c:308 l2cap_sock_cleanup_listen+0x5c/0x2a0 net/bluetooth/l2cap_sock.c:1451 l2cap_sock_release+0x5c/0x210 net/bluetooth/l2cap_sock.c:1425 __sock_release+0xb3/0x270 net/socket.c:649 sock_close+0x1c/0x30 net/socket.c:1439 __fput+0x3ff/0xb70 fs/file_table.c:468 task_work_run+0x14d/0x240 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xeb/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x3f6/0x4c0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 1728137b33c0 ("Bluetooth: L2CAP: Fix use-after-free in l2cap_sock_ready_cb") Reported-by: syzbot+e5e64cdf8e92046dd3e1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-bluetooth/68af6b9d.a70a0220.3cafd4.0032.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f4257c4d3052..814fb8610ac4 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1422,7 +1422,10 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + lock_sock_nested(sk, L2CAP_NESTING_PARENT); l2cap_sock_cleanup_listen(sk); + release_sock(sk); + bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, SHUT_RDWR); From 86a9b1250602d877a393dcbab5f42472f77ac0f7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Aug 2025 14:36:28 -0700 Subject: [PATCH 0983/1307] hardening: Require clang 20.1.0 for __counted_by After an innocuous change in -next that modified a structure that contains __counted_by, clang-19 start crashing when building certain files in drivers/gpu/drm/xe. When assertions are enabled, the more descriptive failure is: clang: clang/lib/AST/RecordLayoutBuilder.cpp:3335: const ASTRecordLayout &clang::ASTContext::getASTRecordLayout(const RecordDecl *) const: Assertion `D && "Cannot get layout of forward declarations!"' failed. According to a reverse bisect, a tangential change to the LLVM IR generation phase of clang during the LLVM 20 development cycle [1] resolves this problem. Bump the version of clang that enables CONFIG_CC_HAS_COUNTED_BY to 20.1.0 to ensure that this issue cannot be hit. Link: https://github.com/llvm/llvm-project/commit/160fb1121cdf703c3ef5e61fb26c5659eb581489 [1] Signed-off-by: Nathan Chancellor Reviewed-by: Justin Stitt Link: https://lore.kernel.org/r/20250807-fix-counted_by-clang-19-v1-1-902c86c1d515@kernel.org Signed-off-by: Kees Cook --- init/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 836320251219..d811cad02a75 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -117,10 +117,11 @@ config CC_HAS_NO_PROFILE_FN_ATTR config CC_HAS_COUNTED_BY bool - # clang needs to be at least 19.1.3 to avoid __bdos miscalculations - # https://github.com/llvm/llvm-project/pull/110497 - # https://github.com/llvm/llvm-project/pull/112636 - default y if CC_IS_CLANG && CLANG_VERSION >= 190103 + # clang needs to be at least 20.1.0 to avoid potential crashes + # when building structures that contain __counted_by + # https://github.com/ClangBuiltLinux/linux/issues/2114 + # https://github.com/llvm/llvm-project/commit/160fb1121cdf703c3ef5e61fb26c5659eb581489 + default y if CC_IS_CLANG && CLANG_VERSION >= 200100 # supported since gcc 15.1.0 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 default y if CC_IS_GCC && GCC_VERSION >= 150100 From 8a68d64bb10334426834e8c273319601878e961e Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 29 Aug 2025 15:28:52 -0700 Subject: [PATCH 0984/1307] x86/vmscape: Add old Intel CPUs to affected list These old CPUs are not tested against VMSCAPE, but are likely vulnerable. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen --- arch/x86/kernel/cpu/common.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2b87c93e6609..f98ec9c7fc07 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1240,15 +1240,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define VMSCAPE BIT(11) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { - VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO), - VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO), - VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO), - VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS), + VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), From d6a367ec6c96fc8e61b4d67e69df03565ec69fb7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 28 Aug 2025 23:49:18 +0200 Subject: [PATCH 0985/1307] netfilter: nft_flowtable.sh: re-run with random mtu sizes Jakub says: nft_flowtable.sh is one of the most flake-atious test for netdev CI currently :( The root cause is two-fold: 1. the failing part of the test is supposed to make sure that ip fragments are forwarded for offloaded flows. (flowtable has to pass them to classic forward path). path mtu discovery for these subtests is disabled. 2. nft_flowtable.sh has two passes. One with fixed mtus/file size and one where link mtus and file sizes are random. The CI failures all have same pattern: re-run with random mtus and file size: -o 27663 -l 4117 -r 10089 -s 54384840 [..] PASS: dscp_egress: dscp packet counters match FAIL: file mismatch for ns1 -> ns2 In some cases this error triggers a bit ealier, sometimes in a later subtest: re-run with random mtus and file size: -o 20201 -l 4555 -r 12657 -s 9405856 [..] PASS: dscp_egress: dscp packet counters match PASS: dscp_fwd: dscp packet counters match 2025/08/17 20:37:52 socat[18954] E write(7, 0x560716b96000, 8192): Broken pipe FAIL: file mismatch for ns1 -> ns2 -rw------- 1 root root 9405856 Aug 17 20:36 /tmp/tmp.2n63vlTrQe But all logs I saw show same scenario: 1. Failing tests have pmtu discovery off (i.e., ip fragmentation) 2. The test file is much larger than first-pass default (2M Byte) 3. peers have much larger MTUs compared to the 'network'. These errors are very reproducible when re-running the test with the same commandline arguments. The timeout became much more prominent with 1d2fbaad7cd8 ("tcp: stronger sk_rcvbuf checks"): reassembled packets typically have a skb->truesize more than double the skb length. As that commit is intentional and pmtud-off with large-tcp-packets-as-fragments is not normal adjust the test to use a smaller file for the pmtu-off subtests. While at it, add more information to pass/fail messages and also run the dscp alteration subtest with pmtu discovery enabled. Link: https://netdev.bots.linux.dev/contest.html?test=nft-flowtable-sh Fixes: f84ab634904c ("selftests: netfilter: nft_flowtable.sh: re-run with random mtu sizes") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20250822071330.4168f0db@kernel.org/ Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20250828214918.3385-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- .../selftests/net/netfilter/nft_flowtable.sh | 113 ++++++++++++------ 1 file changed, 76 insertions(+), 37 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a4ee5496f2a1..45832df98295 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -20,6 +20,7 @@ ret=0 SOCAT_TIMEOUT=60 nsin="" +nsin_small="" ns1out="" ns2out="" @@ -36,7 +37,7 @@ cleanup() { cleanup_all_ns - rm -f "$nsin" "$ns1out" "$ns2out" + rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out" [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" } @@ -72,6 +73,7 @@ lmtu=1500 rmtu=2000 filesize=$((2 * 1024 * 1024)) +filesize_small=$((filesize / 16)) usage(){ echo "nft_flowtable.sh [OPTIONS]" @@ -89,7 +91,10 @@ do o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; - s) filesize=$OPTARG;; + s) + filesize=$OPTARG + filesize_small=$((OPTARG / 16)) + ;; *) usage;; esac done @@ -215,6 +220,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then fi nsin=$(mktemp) +nsin_small=$(mktemp) ns1out=$(mktemp) ns2out=$(mktemp) @@ -265,6 +271,7 @@ check_counters() check_dscp() { local what=$1 + local pmtud="$2" local ok=1 local counter @@ -277,37 +284,39 @@ check_dscp() local pc4z=${counter%*bytes*} local pc4z=${pc4z#*packets} + local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected" + case "$what" in "dscp_none") if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_fwd") if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_ingress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_egress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; *) - echo "FAIL: Unknown DSCP check" 1>&2 + echo "$failmsg: Unknown DSCP check" 1>&2 ret=1 ok=0 esac @@ -319,9 +328,9 @@ check_dscp() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 @@ -342,25 +351,39 @@ test_tcp_forwarding_ip() { local nsa=$1 local nsb=$2 - local dstip=$3 - local dstport=$4 + local pmtu=$3 + local dstip=$4 + local dstport=$5 local lret=0 + local socatc + local socatl + local infile="$nsin" - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + if [ $pmtu -eq 0 ]; then + infile="$nsin_small" + fi + + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & lpid=$! busywait 1000 listener_ready - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" + socatc=$? wait $lpid + socatl=$? - if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then + rc=1 + fi + + if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then lret=1 ret=1 fi - if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then lret=1 ret=1 fi @@ -370,14 +393,16 @@ test_tcp_forwarding_ip() test_tcp_forwarding() { - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + local pmtu="$3" + + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 return $? } test_tcp_forwarding_set_dscp() { - check_dscp "dscp_none" + local pmtu="$3" ip netns exec "$nsr1" nft -f - <&2 @@ -489,8 +519,9 @@ table ip nat { } EOF +check_dscp "dscp_none" "0" if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then - echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 + echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 exit 0 fi @@ -512,6 +543,14 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null # are lower than file size and packets were forwarded via flowtable layer. # For earlier tests (large mtus), packets cannot be handled via flowtable # (except pure acks and other small packets). +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null +ip netns exec "$ns2" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 + exit 0 +fi + ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then @@ -644,7 +683,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 1; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" @@ -668,7 +707,7 @@ if [ "$1" = "" ]; then fi echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" - $0 -o "$o" -l "$l" -r "$r" -s "$filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1 fi exit $ret From 9f74c0ea9b26d1505d55b61e36b1623dd347e1d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2025 16:23:52 +0000 Subject: [PATCH 0986/1307] net_sched: gen_estimator: fix est_timer() vs CONFIG_PREEMPT_RT=y syzbot reported a WARNING in est_timer() [1] Problem here is that with CONFIG_PREEMPT_RT=y, timer callbacks can be preempted. Adopt preempt_disable_nested()/preempt_enable_nested() to fix this. [1] WARNING: CPU: 0 PID: 16 at ./include/linux/seqlock.h:221 __seqprop_assert include/linux/seqlock.h:221 [inline] WARNING: CPU: 0 PID: 16 at ./include/linux/seqlock.h:221 est_timer+0x6dc/0x9f0 net/core/gen_estimator.c:93 Modules linked in: CPU: 0 UID: 0 PID: 16 Comm: ktimers/0 Not tainted syzkaller #0 PREEMPT_{RT,(full)} Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:__seqprop_assert include/linux/seqlock.h:221 [inline] RIP: 0010:est_timer+0x6dc/0x9f0 net/core/gen_estimator.c:93 Call Trace: call_timer_fn+0x17e/0x5f0 kernel/time/timer.c:1747 expire_timers kernel/time/timer.c:1798 [inline] __run_timers kernel/time/timer.c:2372 [inline] __run_timer_base+0x648/0x970 kernel/time/timer.c:2384 run_timer_base kernel/time/timer.c:2393 [inline] run_timer_softirq+0xb7/0x180 kernel/time/timer.c:2403 handle_softirqs+0x22c/0x710 kernel/softirq.c:579 __do_softirq kernel/softirq.c:613 [inline] run_ktimerd+0xcf/0x190 kernel/softirq.c:1043 smpboot_thread_fn+0x53f/0xa60 kernel/smpboot.c:160 kthread+0x70e/0x8a0 kernel/kthread.c:463 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.") Reported-by: syzbot+72db9ee39db57c3fecc5@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68adf6fa.a70a0220.3cafd4.0000.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250827162352.3960779-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/gen_estimator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 7d426a8e29f3..f112156db587 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -90,10 +90,12 @@ static void est_timer(struct timer_list *t) rate = (b_packets - est->last_packets) << (10 - est->intvl_log); rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); + preempt_disable_nested(); write_seqcount_begin(&est->seq); est->avbps += brate; est->avpps += rate; write_seqcount_end(&est->seq); + preempt_enable_nested(); est->last_bytes = b_bytes; est->last_packets = b_packets; From b79e498080b170fd94fc83bca2471f450811549b Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 27 Aug 2025 12:26:43 -0700 Subject: [PATCH 0987/1307] xirc2ps_cs: fix register access when enabling FullDuplex The current code incorrectly passes (XIRCREG1_ECR | FullDuplex) as the register address to GetByte(), instead of fetching the register value and OR-ing it with FullDuplex. This results in an invalid register access. Fix it by reading XIRCREG1_ECR first, then or-ing with FullDuplex before writing it back. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Alok Tiwari Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250827192645.658496-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xircom/xirc2ps_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index a31d5d5e6593..97e88886253f 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -1576,7 +1576,7 @@ do_reset(struct net_device *dev, int full) msleep(40); /* wait 40 msec to let it complete */ } if (full_duplex) - PutByte(XIRCREG1_ECR, GetByte(XIRCREG1_ECR | FullDuplex)); + PutByte(XIRCREG1_ECR, GetByte(XIRCREG1_ECR) | FullDuplex); } else { /* No MII */ SelectPage(0); value = GetByte(XIRCREG_ESR); /* read the ESR */ From 8bf935cf789872350b04c1a6468b0a509f67afb2 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 28 Aug 2025 16:29:49 +0800 Subject: [PATCH 0988/1307] ptp: ocp: fix use-after-free bugs causing by ptp_ocp_watchdog The ptp_ocp_detach() only shuts down the watchdog timer if it is pending. However, if the timer handler is already running, the timer_delete_sync() is not called. This leads to race conditions where the devlink that contains the ptp_ocp is deallocated while the timer handler is still accessing it, resulting in use-after-free bugs. The following details one of the race scenarios. (thread 1) | (thread 2) ptp_ocp_remove() | ptp_ocp_detach() | ptp_ocp_watchdog() if (timer_pending(&bp->watchdog))| bp = timer_container_of() timer_delete_sync() | | devlink_free(devlink) //free | | bp-> //use Resolve this by unconditionally calling timer_delete_sync() to ensure the timer is reliably deactivated, preventing any access after free. Fixes: 773bda964921 ("ptp: ocp: Expose various resources on the timecard.") Signed-off-by: Duoming Zhou Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250828082949.28189-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index d39073dc4072..4e1286ce05c9 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -4557,8 +4557,7 @@ ptp_ocp_detach(struct ptp_ocp *bp) ptp_ocp_debugfs_remove_device(bp); ptp_ocp_detach_sysfs(bp); ptp_ocp_attr_group_del(bp); - if (timer_pending(&bp->watchdog)) - timer_delete_sync(&bp->watchdog); + timer_delete_sync(&bp->watchdog); if (bp->ts0) ptp_ocp_unregister_ext(bp->ts0); if (bp->ts1) From 0704a3da7ce50f972e898bbda88d2692a22922d9 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 28 Aug 2025 16:14:57 +0800 Subject: [PATCH 0989/1307] mISDN: Fix memory leak in dsp_hwec_enable() dsp_hwec_enable() allocates dup pointer by kstrdup(arg), but then it updates dup variable by strsep(&dup, ","). As a result when it calls kfree(dup), the dup variable may be a modified pointer that no longer points to the original allocated memory, causing a memory leak. The issue is the same pattern as fixed in commit c6a502c22999 ("mISDN: Fix memory leak in dsp_pipeline_build()"). Fixes: 9a4381618262 ("mISDN: Remove VLAs") Signed-off-by: Miaoqian Lin Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250828081457.36061-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/isdn/mISDN/dsp_hwec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/isdn/mISDN/dsp_hwec.c b/drivers/isdn/mISDN/dsp_hwec.c index 0b3f29195330..0cd216e28f00 100644 --- a/drivers/isdn/mISDN/dsp_hwec.c +++ b/drivers/isdn/mISDN/dsp_hwec.c @@ -51,14 +51,14 @@ void dsp_hwec_enable(struct dsp *dsp, const char *arg) goto _do; { - char *dup, *tok, *name, *val; + char *dup, *next, *tok, *name, *val; int tmp; - dup = kstrdup(arg, GFP_ATOMIC); + dup = next = kstrdup(arg, GFP_ATOMIC); if (!dup) return; - while ((tok = strsep(&dup, ","))) { + while ((tok = strsep(&next, ","))) { if (!strlen(tok)) continue; name = strsep(&tok, "="); From b3852ae3105ec1048535707545d23c1e519c190f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 27 Aug 2025 13:53:39 +0200 Subject: [PATCH 0990/1307] net: ethernet: oa_tc6: Handle failure of spi_setup There is no guarantee that spi_setup succeed, so properly handle the error case. Fixes: aa58bec064ab ("net: ethernet: oa_tc6: implement register write operation") Signed-off-by: Stefan Wahren Cc: stable@kernel.org Reviewed-by: Andrew Lunn Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250827115341.34608-2-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/oa_tc6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c index db200e4ec284..91a906a7918a 100644 --- a/drivers/net/ethernet/oa_tc6.c +++ b/drivers/net/ethernet/oa_tc6.c @@ -1249,7 +1249,8 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev) /* Set the SPI controller to pump at realtime priority */ tc6->spi->rt = true; - spi_setup(tc6->spi); + if (spi_setup(tc6->spi) < 0) + return NULL; tc6->spi_ctrl_tx_buf = devm_kzalloc(&tc6->spi->dev, OA_TC6_CTRL_SPI_BUF_SIZE, From c7217963eb779be0a7627dd2121152fa6786ecf7 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 27 Aug 2025 13:53:40 +0200 Subject: [PATCH 0991/1307] microchip: lan865x: Fix module autoloading Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from spi_device_id table. While at this, fix the misleading variable name (spidev is unrelated to this driver). Fixes: 5cd2340cb6a3 ("microchip: lan865x: add driver support for Microchip's LAN865X MAC-PHY") Signed-off-by: Stefan Wahren Cc: stable@kernel.org Reviewed-by: Andrew Lunn Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250827115341.34608-3-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan865x/lan865x.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index 84c41f193561..9d94c8fb8b91 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -423,10 +423,11 @@ static void lan865x_remove(struct spi_device *spi) free_netdev(priv->netdev); } -static const struct spi_device_id spidev_spi_ids[] = { +static const struct spi_device_id lan865x_ids[] = { { .name = "lan8650" }, {}, }; +MODULE_DEVICE_TABLE(spi, lan865x_ids); static const struct of_device_id lan865x_dt_ids[] = { { .compatible = "microchip,lan8650" }, @@ -441,7 +442,7 @@ static struct spi_driver lan865x_driver = { }, .probe = lan865x_probe, .remove = lan865x_remove, - .id_table = spidev_spi_ids, + .id_table = lan865x_ids, }; module_spi_driver(lan865x_driver); From ca47c44d36a9ad3268d17f89789104a471c07f81 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 27 Aug 2025 13:53:41 +0200 Subject: [PATCH 0992/1307] microchip: lan865x: Fix LAN8651 autoloading Add missing IDs for LAN8651 devices, which are also defined in the DT bindings. Fixes: 5cd2340cb6a3 ("microchip: lan865x: add driver support for Microchip's LAN865X MAC-PHY") Signed-off-by: Stefan Wahren Cc: stable@kernel.org Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250827115341.34608-4-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan865x/lan865x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index 9d94c8fb8b91..79b800d2b72c 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -425,12 +425,14 @@ static void lan865x_remove(struct spi_device *spi) static const struct spi_device_id lan865x_ids[] = { { .name = "lan8650" }, + { .name = "lan8651" }, {}, }; MODULE_DEVICE_TABLE(spi, lan865x_ids); static const struct of_device_id lan865x_dt_ids[] = { { .compatible = "microchip,lan8650" }, + { .compatible = "microchip,lan8651" }, { /* Sentinel */ } }; MODULE_DEVICE_TABLE(of, lan865x_dt_ids); From aea3493246c474bc917d124d6fb627663ab6bef0 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 30 Aug 2025 08:37:49 +0900 Subject: [PATCH 0993/1307] ALSA: firewire-motu: drop EPOLLOUT from poll return values as write is not supported The ALSA HwDep character device of the firewire-motu driver incorrectly returns EPOLLOUT in poll(2), even though the driver implements no operation for write(2). This misleads userspace applications to believe write() is allowed, potentially resulting in unnecessarily wakeups. This issue dates back to the driver's initial code added by a commit 71c3797779d3 ("ALSA: firewire-motu: add hwdep interface"), and persisted when POLLOUT was updated to EPOLLOUT by a commit a9a08845e9ac ('vfs: do bulk POLL* -> EPOLL* replacement("").'). This commit fixes the bug. Signed-off-by: Takashi Sakamoto Link: https://patch.msgid.link/20250829233749.366222-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/motu/motu-hwdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/motu/motu-hwdep.c b/sound/firewire/motu/motu-hwdep.c index 1ed60618220d..fa2685665db3 100644 --- a/sound/firewire/motu/motu-hwdep.c +++ b/sound/firewire/motu/motu-hwdep.c @@ -111,7 +111,7 @@ static __poll_t hwdep_poll(struct snd_hwdep *hwdep, struct file *file, events = 0; spin_unlock_irq(&motu->lock); - return events | EPOLLOUT; + return events; } static int hwdep_get_info(struct snd_motu *motu, void __user *arg) From e5a00dafc7e06ab1b20fd4c1535cfa9b9940061e Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Fri, 29 Aug 2025 18:04:49 +0200 Subject: [PATCH 0994/1307] ALSA: hda: tas2781: fix tas2563 EFI data endianness Before conversion to unify the calibration data management, the tas2563_apply_calib() function performed the big endian conversion and wrote the calibration data to the device. The writing is now done by the common tasdev_load_calibrated_data() function, but without conversion. Put the values into the calibration data buffer with the expected endianness. Fixes: 4fe238513407 ("ALSA: hda/tas2781: Move and unified the calibrated-data getting function for SPI and I2C into the tas2781_hda lib") Cc: Signed-off-by: Gergo Koteles Link: https://patch.msgid.link/20250829160450.66623-1-soyer@irl.hu Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index 7143926c2c30..46a43f0352aa 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -310,6 +310,7 @@ static int tas2563_save_calibration(struct tas2781_hda *h) struct cali_reg *r = &cd->cali_reg_array; unsigned int offset = 0; unsigned char *data; + __be32 bedata; efi_status_t status; unsigned int attr; int ret, i, j, k; @@ -351,6 +352,8 @@ static int tas2563_save_calibration(struct tas2781_hda *h) i, j, status); return -EINVAL; } + bedata = cpu_to_be32(*(uint32_t *)&data[offset]); + memcpy(&data[offset], &bedata, sizeof(bedata)); offset += TAS2563_CAL_DATA_SIZE; } } From d5f8458e34a331e5b228de142145e62ac5bfda34 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Fri, 29 Aug 2025 18:04:50 +0200 Subject: [PATCH 0995/1307] ALSA: hda: tas2781: reorder tas2563 calibration variables The tasdev_load_calibrated_data() function expects the calibration data values in the cali_data buffer as R0, R0Low, InvR0, Power, TLim which is not the same as what tas2563_save_calibration() writes to the buffer. Reorder the EFI variables in the tas2563_save_calibration() function to put the values in the buffer in the correct order. Fixes: 4fe238513407 ("ALSA: hda/tas2781: Move and unified the calibrated-data getting function for SPI and I2C into the tas2781_hda lib") Cc: Signed-off-by: Gergo Koteles Link: https://patch.msgid.link/20250829160450.66623-2-soyer@irl.hu Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c index 46a43f0352aa..45a70fbf6205 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_i2c.c @@ -300,7 +300,7 @@ static int tas2563_save_calibration(struct tas2781_hda *h) { efi_guid_t efi_guid = tasdev_fct_efi_guid[LENOVO]; char *vars[TASDEV_CALIB_N] = { - "R0_%d", "InvR0_%d", "R0_Low_%d", "Power_%d", "TLim_%d" + "R0_%d", "R0_Low_%d", "InvR0_%d", "Power_%d", "TLim_%d" }; efi_char16_t efi_name[TAS2563_CAL_VAR_NAME_MAX]; unsigned long max_size = TAS2563_CAL_DATA_SIZE; From ceca927c86e6f72f72d45487a34368bc9509431d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 29 Aug 2025 12:07:25 -0700 Subject: [PATCH 0996/1307] arm64: mm: Fix CFI failure due to kpti_ng_pgd_alloc function signature Seen during KPTI initialization: CFI failure at create_kpti_ng_temp_pgd+0x124/0xce8 (target: kpti_ng_pgd_alloc+0x0/0x14; expected type: 0xd61b88b6) The call site is alloc_init_pud() at arch/arm64/mm/mmu.c: pud_phys = pgtable_alloc(TABLE_PUD); alloc_init_pud() has the prototype: static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) where the pgtable_alloc() prototype is declared. The target (kpti_ng_pgd_alloc) is used in arch/arm64/kernel/cpufeature.c: create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc), KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, kpti_ng_pgd_alloc, 0); which is an alias for __create_pgd_mapping_locked() with prototype: extern __alias(__create_pgd_mapping_locked) void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags); __create_pgd_mapping_locked() passes the function pointer down: __create_pgd_mapping_locked() -> alloc_init_p4d() -> alloc_init_pud() But the target function (kpti_ng_pgd_alloc) has the wrong signature: static phys_addr_t __init kpti_ng_pgd_alloc(int shift); The "int" should be "enum pgtable_type". To make "enum pgtable_type" available to cpufeature.c, move enum pgtable_type definition from arch/arm64/mm/mmu.c to arch/arm64/include/asm/mmu.h. Adjust kpti_ng_pgd_alloc to use "enum pgtable_type" instead of "int". The function behavior remains identical (parameter is unused). Fixes: c64f46ee1377 ("arm64: mm: use enum to identify pgtable level instead of *_SHIFT") Cc: # 6.16.x Signed-off-by: Kees Cook Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829190721.it.373-kees@kernel.org Reviewed-by: Ryan Roberts Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mmu.h | 7 +++++++ arch/arm64/kernel/cpufeature.c | 5 +++-- arch/arm64/mm/mmu.c | 7 ------- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 6e8aa8e72601..49f1a810df16 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,6 +17,13 @@ #include #include +enum pgtable_type { + TABLE_PTE, + TABLE_PMD, + TABLE_PUD, + TABLE_P4D, +}; + typedef struct { atomic64_t id; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9ad065f15f1d..e49d142a281f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -84,6 +84,7 @@ #include #include #include +#include #include #include #include @@ -1945,11 +1946,11 @@ static bool has_pmuv3(const struct arm64_cpu_capabilities *entry, int scope) extern void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags); + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags); static phys_addr_t __initdata kpti_ng_temp_alloc; -static phys_addr_t __init kpti_ng_pgd_alloc(int shift) +static phys_addr_t __init kpti_ng_pgd_alloc(enum pgtable_type type) { kpti_ng_temp_alloc -= PAGE_SIZE; return kpti_ng_temp_alloc; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 34e5d78af076..183801520740 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -47,13 +47,6 @@ #define NO_CONT_MAPPINGS BIT(1) #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ -enum pgtable_type { - TABLE_PTE, - TABLE_PMD, - TABLE_PUD, - TABLE_P4D, -}; - u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); From d82aa5d3501b25bfb7bc2a24a68ad0a83b2ad10b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 12 Aug 2025 15:49:27 +0100 Subject: [PATCH 0997/1307] kselftest/arm64: Don't open code SVE_PT_SIZE() in fp-ptrace In fp-trace when allocating a buffer to write SVE register data we open code the addition of the header size to the VL depeendent register data size, which lead to an underallocation bug when we cut'n'pasted the code for FPSIMD format writes. Use the SVE_PT_SIZE() macro that the kernel UAPI provides for this. Fixes: b84d2b27954f ("kselftest/arm64: Test FPSIMD format data writes via NT_ARM_SVE in fp-ptrace") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250812-arm64-fp-trace-macro-v1-1-317cfff986a5@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 124bc883365e..cdd7a45c045d 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1187,7 +1187,7 @@ static void sve_write_sve(pid_t child, struct test_config *config) if (!vl) return; - iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_SVE); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", @@ -1234,8 +1234,7 @@ static void sve_write_fpsimd(pid_t child, struct test_config *config) if (!vl) return; - iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, - SVE_PT_REGS_FPSIMD); + iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", From 9dba9a45c348e8460da97c450cddf70b2056deb3 Mon Sep 17 00:00:00 2001 From: John Evans Date: Thu, 28 Aug 2025 12:40:08 +0800 Subject: [PATCH 0998/1307] scsi: lpfc: Fix buffer free/clear order in deferred receive path Fix a use-after-free window by correcting the buffer release sequence in the deferred receive path. The code freed the RQ buffer first and only then cleared the context pointer under the lock. Concurrent paths (e.g., ABTS and the repost path) also inspect and release the same pointer under the lock, so the old order could lead to double-free/UAF. Note that the repost path already uses the correct pattern: detach the pointer under the lock, then free it after dropping the lock. The deferred path should do the same. Fixes: 472e146d1cf3 ("scsi: lpfc: Correct upcalling nvmet_fc transport during io done downcall") Cc: stable@vger.kernel.org Signed-off-by: John Evans Link: https://lore.kernel.org/r/20250828044008.743-1-evans1210144@gmail.com Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvmet.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index fba2e62027b7..4cfc928bcf2d 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1243,7 +1243,7 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, struct lpfc_nvmet_tgtport *tgtp; struct lpfc_async_xchg_ctx *ctxp = container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); - struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer; + struct rqb_dmabuf *nvmebuf; struct lpfc_hba *phba = ctxp->phba; unsigned long iflag; @@ -1251,13 +1251,18 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, lpfc_nvmeio_data(phba, "NVMET DEFERRCV: xri x%x sz %d CPU %02x\n", ctxp->oxid, ctxp->size, raw_smp_processor_id()); + spin_lock_irqsave(&ctxp->ctxlock, iflag); + nvmebuf = ctxp->rqb_buffer; if (!nvmebuf) { + spin_unlock_irqrestore(&ctxp->ctxlock, iflag); lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, "6425 Defer rcv: no buffer oxid x%x: " "flg %x ste %x\n", ctxp->oxid, ctxp->flag, ctxp->state); return; } + ctxp->rqb_buffer = NULL; + spin_unlock_irqrestore(&ctxp->ctxlock, iflag); tgtp = phba->targetport->private; if (tgtp) @@ -1265,9 +1270,6 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, /* Free the nvmebuf since a new buffer already replaced it */ nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf); - spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->rqb_buffer = NULL; - spin_unlock_irqrestore(&ctxp->ctxlock, iflag); } /** From 708e2371f77a9d3f2f1d54d1ec835d71b9d0dafe Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Aug 2025 19:35:50 +0800 Subject: [PATCH 0999/1307] scsi: sr: Reinstate rotational media flag Reinstate the rotational media flag for the CD-ROM driver. The flag has been cleared since commit bd4a633b6f7c ("block: move the nonrot flag to queue_limits") and this breaks some applications. Move queue limit configuration from get_sectorsize() to sr_revalidate_disk() and set the rotational flag. Cc: Christoph Hellwig Fixes: bd4a633b6f7c ("block: move the nonrot flag to queue_limits") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250827113550.2614535-1-ming.lei@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index b17796d5ee66..add13e306898 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -475,13 +475,21 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt) static int sr_revalidate_disk(struct scsi_cd *cd) { + struct request_queue *q = cd->device->request_queue; struct scsi_sense_hdr sshdr; + struct queue_limits lim; + int sector_size; /* if the unit is not ready, nothing more to do */ if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr)) return 0; sr_cd_check(&cd->cdi); - return get_sectorsize(cd); + sector_size = get_sectorsize(cd); + + lim = queue_limits_start_update(q); + lim.logical_block_size = sector_size; + lim.features |= BLK_FEAT_ROTATIONAL; + return queue_limits_commit_update_frozen(q, &lim); } static int sr_block_open(struct gendisk *disk, blk_mode_t mode) @@ -721,10 +729,8 @@ static int sr_probe(struct device *dev) static int get_sectorsize(struct scsi_cd *cd) { - struct request_queue *q = cd->device->request_queue; static const u8 cmd[10] = { READ_CAPACITY }; unsigned char buffer[8] = { }; - struct queue_limits lim; int err; int sector_size; struct scsi_failure failure_defs[] = { @@ -795,9 +801,7 @@ static int get_sectorsize(struct scsi_cd *cd) set_capacity(cd->disk, cd->capacity); } - lim = queue_limits_start_update(q); - lim.logical_block_size = sector_size; - return queue_limits_commit_update_frozen(q, &lim); + return sector_size; } static int get_capabilities(struct scsi_cd *cd) From d9b05321e21e4b218de4ce8a590bf375f58b6346 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 22 Aug 2025 16:12:38 +0200 Subject: [PATCH 1000/1307] futex: Move futex_hash_free() back to __mmput() To avoid a memory leak via mm_alloc() + mmdrop() the futex cleanup code has been moved to __mmdrop(). This resulted in a warnings if the futex hash table has been allocated via vmalloc() the mmdrop() was invoked from atomic context. The free path must stay in __mmput() to ensure it is invoked from preemptible context. In order to avoid the memory leak, delay the allocation of mm_struct::mm->futex_ref to futex_hash_allocate(). This works because neither the per-CPU counter nor the private hash has been allocated and therefore - futex_private_hash() callers (such as exit_pi_state_list()) don't acquire reference if there is no private hash yet. There is also no reference put. - Regular callers (futex_hash()) fallback to global hash. No reference counting here. The futex_ref member can be allocated in futex_hash_allocate() before the private hash itself is allocated. This happens either while the first thread is created or on request. In both cases the process has just a single thread so there can be either futex operation in progress or the request to create a private hash. Move futex_hash_free() back to __mmput(); Move the allocation of mm_struct::futex_ref to futex_hash_allocate(). [ bp: Fold a follow-up fix to prevent a use-after-free: https://lore.kernel.org/r/20250830213806.sEKuuGSm@linutronix.de ] Fixes: e703b7e247503 ("futex: Move futex cleanup to __mmdrop()") Closes: https://lore.kernel.org/all/20250821102721.6deae493@kernel.org/ Reported-by: Jakub Kicinski Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lkml.kernel.org/r/20250822141238.PfnkTjFb@linutronix.de --- kernel/fork.c | 2 +- kernel/futex/core.c | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index af673856499d..c4ada32598bd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -689,7 +689,6 @@ void __mmdrop(struct mm_struct *mm) mm_pasid_drop(mm); mm_destroy_cid(mm); percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); - futex_hash_free(mm); free_mm(mm); } @@ -1138,6 +1137,7 @@ static inline void __mmput(struct mm_struct *mm) if (mm->binfmt) module_put(mm->binfmt->module); lru_gen_del_mm(mm); + futex_hash_free(mm); mmdrop(mm); } diff --git a/kernel/futex/core.c b/kernel/futex/core.c index d9bb5567af0c..125804fbb5cb 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1722,12 +1722,9 @@ int futex_mm_init(struct mm_struct *mm) RCU_INIT_POINTER(mm->futex_phash, NULL); mm->futex_phash_new = NULL; /* futex-ref */ + mm->futex_ref = NULL; atomic_long_set(&mm->futex_atomic, 0); mm->futex_batches = get_state_synchronize_rcu(); - mm->futex_ref = alloc_percpu(unsigned int); - if (!mm->futex_ref) - return -ENOMEM; - this_cpu_inc(*mm->futex_ref); /* 0 -> 1 */ return 0; } @@ -1801,6 +1798,17 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) } } + if (!mm->futex_ref) { + /* + * This will always be allocated by the first thread and + * therefore requires no locking. + */ + mm->futex_ref = alloc_percpu(unsigned int); + if (!mm->futex_ref) + return -ENOMEM; + this_cpu_inc(*mm->futex_ref); /* 0 -> 1 */ + } + fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!fph) From d77b6ff0ce35a6d0b0b7b9581bc3f76d041d4087 Mon Sep 17 00:00:00 2001 From: Stanislav Fort Date: Sun, 31 Aug 2025 16:56:23 +0200 Subject: [PATCH 1001/1307] batman-adv: fix OOB read/write in network-coding decode batadv_nc_skb_decode_packet() trusts coded_len and checks only against skb->len. XOR starts at sizeof(struct batadv_unicast_packet), reducing payload headroom, and the source skb length is not verified, allowing an out-of-bounds read and a small out-of-bounds write. Validate that coded_len fits within the payload area of both destination and source sk_buffs before XORing. Fixes: 2df5278b0267 ("batman-adv: network coding - receive coded packets and decode them") Cc: stable@vger.kernel.org Reported-by: Stanislav Fort Signed-off-by: Stanislav Fort Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/network-coding.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 9f56308779cc..af97d077369f 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1687,7 +1687,12 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, coding_len = ntohs(coded_packet_tmp.coded_len); - if (coding_len > skb->len) + /* ensure dst buffer is large enough (payload only) */ + if (coding_len + h_size > skb->len) + return NULL; + + /* ensure src buffer is large enough (payload only) */ + if (coding_len + h_size > nc_packet->skb->len) return NULL; /* Here the magic is reversed: From c09461a0d24fba8a847a37a381626141da22d8ee Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 27 Aug 2025 13:12:16 +0000 Subject: [PATCH 1002/1307] rust: use the new name Location::file_as_c_str() in Rust >= 1.91.0 As part of the stabilization of Location::file_with_nul(), it was brought up that the with_nul() suffix usually means something else in Rust APIs, so the API is being renamed prior to stabilization [1]. Thus, use the new name on new rustc versions. Link: https://www.github.com/rust-lang/rust/pull/145928 [1] Signed-off-by: Alice Ryhl Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/20250827-file_as_c_str-v1-1-d3f5a3916a9c@google.com [ Kept `cfg` separation. Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- init/Kconfig | 3 +++ rust/kernel/lib.rs | 15 ++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 836320251219..e7459cbea6ca 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -145,6 +145,9 @@ config RUSTC_HAS_UNNECESSARY_TRANSMUTES config RUSTC_HAS_FILE_WITH_NUL def_bool RUSTC_VERSION >= 108900 +config RUSTC_HAS_FILE_AS_C_STR + def_bool RUSTC_VERSION >= 109100 + config PAHOLE_VERSION int default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index ed53169e795c..fef97f2a5098 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -296,7 +296,7 @@ macro_rules! asm { /// Gets the C string file name of a [`Location`]. /// -/// If `file_with_nul()` is not available, returns a string that warns about it. +/// If `Location::file_as_c_str()` is not available, returns a string that warns about it. /// /// [`Location`]: core::panic::Location /// @@ -310,8 +310,8 @@ macro_rules! asm { /// let caller = core::panic::Location::caller(); /// /// // Output: -/// // - A path like "rust/kernel/example.rs" if file_with_nul() is available. -/// // - "" otherwise. +/// // - A path like "rust/kernel/example.rs" if `file_as_c_str()` is available. +/// // - "" otherwise. /// let caller_file = file_from_location(caller); /// /// // Prints out the message with caller's file name. @@ -326,7 +326,12 @@ macro_rules! asm { /// ``` #[inline] pub fn file_from_location<'a>(loc: &'a core::panic::Location<'a>) -> &'a core::ffi::CStr { - #[cfg(CONFIG_RUSTC_HAS_FILE_WITH_NUL)] + #[cfg(CONFIG_RUSTC_HAS_FILE_AS_C_STR)] + { + loc.file_as_c_str() + } + + #[cfg(all(CONFIG_RUSTC_HAS_FILE_WITH_NUL, not(CONFIG_RUSTC_HAS_FILE_AS_C_STR)))] { loc.file_with_nul() } @@ -334,6 +339,6 @@ pub fn file_from_location<'a>(loc: &'a core::panic::Location<'a>) -> &'a core::f #[cfg(not(CONFIG_RUSTC_HAS_FILE_WITH_NUL))] { let _ = loc; - c"" + c"" } } From 8851e27d2cb947ea8bbbe8e812068f7bf5cbd00b Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 29 Aug 2025 21:55:25 +0200 Subject: [PATCH 1003/1307] rust: support Rust >= 1.91.0 target spec Starting with Rust 1.91.0 (expected 2025-10-30), the target spec format has changed the type of the `target-pointer-width` key from string to integer [1]. Thus conditionally use one or the other depending on the version. Cc: Waffle Maybe Link: https://github.com/rust-lang/rust/pull/144443 [1] Link: https://lore.kernel.org/r/20250829195525.721664-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_target.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 39c82908ff3a..38b3416bb979 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -225,7 +225,11 @@ fn main() { ts.push("features", features); ts.push("llvm-target", "x86_64-linux-gnu"); ts.push("supported-sanitizers", ["kcfi", "kernel-address"]); - ts.push("target-pointer-width", "64"); + if cfg.rustc_version_atleast(1, 91, 0) { + ts.push("target-pointer-width", 64); + } else { + ts.push("target-pointer-width", "64"); + } } else if cfg.has("X86_32") { // This only works on UML, as i386 otherwise needs regparm support in rustc if !cfg.has("UML") { @@ -245,7 +249,11 @@ fn main() { } ts.push("features", features); ts.push("llvm-target", "i386-unknown-linux-gnu"); - ts.push("target-pointer-width", "32"); + if cfg.rustc_version_atleast(1, 91, 0) { + ts.push("target-pointer-width", 32); + } else { + ts.push("target-pointer-width", "32"); + } } else if cfg.has("LOONGARCH") { panic!("loongarch uses the builtin rustc loongarch64-unknown-none-softfloat target"); } else { From b320789d6883cc00ac78ce83bccbfe7ed58afcf0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 31 Aug 2025 15:33:07 -0700 Subject: [PATCH 1004/1307] Linux 6.17-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 06c28b1d7e67..b9c661913250 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* From 6976c7a69dafbb34a0d4814e2def9d3d7114836d Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Fri, 29 Aug 2025 08:59:59 +0800 Subject: [PATCH 1005/1307] smb: client: Fix NULL pointer dereference in cifs_debug_dirs_proc_show() Reading /proc/fs/cifs/open_dirs may hit a NULL dereference when tcon->cfids is NULL. Add NULL check before accessing cfids to prevent the crash. Reproduction: - Mount CIFS share - cat /proc/fs/cifs/open_dirs Fixes: 844e5c0eb176 ("smb3 client: add way to show directory leases for improved debugging") Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index beb4f18f05ef..edb2e7f7fc23 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -304,6 +304,8 @@ static int cifs_debug_dirs_proc_show(struct seq_file *m, void *v) list_for_each(tmp1, &ses->tcon_list) { tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); cfids = tcon->cfids; + if (!cfids) + continue; spin_lock(&cfids->cfid_list_lock); /* check lock ordering */ seq_printf(m, "Num entries: %d\n", cfids->num_entries); list_for_each_entry(cfid, &cfids->entries, entry) { @@ -319,8 +321,6 @@ static int cifs_debug_dirs_proc_show(struct seq_file *m, void *v) seq_printf(m, "\n"); } spin_unlock(&cfids->cfid_list_lock); - - } } } From b5ee94ac651aa42612095c4a75ff7f5c47cd9315 Mon Sep 17 00:00:00 2001 From: Philipp Kerling Date: Wed, 20 Aug 2025 16:24:13 +0200 Subject: [PATCH 1006/1307] ksmbd: allow a filename to contain colons on SMB3.1.1 posix extensions If the client sends SMB2_CREATE_POSIX_CONTEXT to ksmbd, allow the filename to contain a colon (':'). This requires disabling the support for Alternate Data Streams (ADS), which are denoted by a colon-separated suffix to the filename on Windows. This should not be an issue, since this concept is not known to POSIX anyway and the client has to explicitly request a POSIX context to get this behavior. Link: https://lore.kernel.org/all/f9401718e2be2ab22058b45a6817db912784ef61.camel@rx2.rx-server.de/ Signed-off-by: Philipp Kerling Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 25 ++++++++++++++----------- fs/smb/server/vfs_cache.h | 2 ++ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 0d92ce49aed7..a565fc36cee6 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2951,18 +2951,19 @@ int smb2_open(struct ksmbd_work *work) } ksmbd_debug(SMB, "converted name = %s\n", name); - if (strchr(name, ':')) { - if (!test_share_config_flag(work->tcon->share_conf, - KSMBD_SHARE_FLAG_STREAMS)) { - rc = -EBADF; - goto err_out2; - } - rc = parse_stream_name(name, &stream_name, &s_type); - if (rc < 0) - goto err_out2; - } if (posix_ctxt == false) { + if (strchr(name, ':')) { + if (!test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_STREAMS)) { + rc = -EBADF; + goto err_out2; + } + rc = parse_stream_name(name, &stream_name, &s_type); + if (rc < 0) + goto err_out2; + } + rc = ksmbd_validate_filename(name); if (rc < 0) goto err_out2; @@ -3443,6 +3444,8 @@ int smb2_open(struct ksmbd_work *work) fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE | FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE)); + fp->is_posix_ctxt = posix_ctxt; + /* fp should be searchable through ksmbd_inode.m_fp_list * after daccess, saccess, attrib_only, and stream are * initialized. @@ -5988,7 +5991,7 @@ static int smb2_rename(struct ksmbd_work *work, if (IS_ERR(new_name)) return PTR_ERR(new_name); - if (strchr(new_name, ':')) { + if (fp->is_posix_ctxt == false && strchr(new_name, ':')) { int s_type; char *xattr_stream_name, *stream_name = NULL; size_t xattr_stream_size; diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 0708155b5caf..78b506c5ef03 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -112,6 +112,8 @@ struct ksmbd_file { bool is_durable; bool is_persistent; bool is_resilient; + + bool is_posix_ctxt; }; static inline void set_ctx_actor(struct dir_context *ctx, From 0aee6faf5b2e7eacff632dc63bc9b25c5a19dd1d Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Mon, 25 Aug 2025 11:05:56 +0900 Subject: [PATCH 1007/1307] MAINTAINERS: Update Nobuhiro Iwamatsu's email address The company's email address has been changed, so update my email address in MAINTAINERS and .mailmap files. Signed-off-by: Nobuhiro Iwamatsu Signed-off-by: Arnd Bergmann --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index a124aeed52a2..aa09e792017f 100644 --- a/.mailmap +++ b/.mailmap @@ -589,6 +589,7 @@ Nikolay Aleksandrov Nikolay Aleksandrov Nikolay Aleksandrov Nikolay Aleksandrov +Nobuhiro Iwamatsu Odelu Kukatla Oleksandr Natalenko Oleksij Rempel diff --git a/MAINTAINERS b/MAINTAINERS index fed6cd812d79..636547f581cb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3526,7 +3526,7 @@ F: Documentation/devicetree/bindings/arm/ti/nspire.yaml F: arch/arm/boot/dts/nspire/ ARM/TOSHIBA VISCONTI ARCHITECTURE -M: Nobuhiro Iwamatsu +M: Nobuhiro Iwamatsu L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/iwamatsu/linux-visconti.git From 5c5a41a75452e9eb5810a7d0d9916b61fe9fd1c5 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 29 Aug 2025 00:39:40 +0200 Subject: [PATCH 1008/1307] gpu: nova-core: depend on CONFIG_64BIT If built on architectures with CONFIG_ARCH_DMA_ADDR_T_64BIT=y nova-core produces that following build failures: error[E0308]: mismatched types --> drivers/gpu/nova-core/fb.rs:49:59 | 49 | hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle())?; | ----------------------- ^^^^^^^^^^^^^^^^^ expected `u64`, found `u32` | | | arguments to this method are incorrect | note: method defined here --> drivers/gpu/nova-core/fb/hal.rs:19:8 | 19 | fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result; | ^^^^^^^^^^^^^^^^^^^^^^^ help: you can convert a `u32` to a `u64` | 49 | hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle().into())?; | +++++++ error[E0308]: mismatched types --> drivers/gpu/nova-core/fb.rs:65:47 | 65 | if hal.read_sysmem_flush_page(bar) == self.page.dma_handle() { | ------------------------------- ^^^^^^^^^^^^^^^^^^^^^^ expected `u64`, found `u32` | | | expected because this is `u64` | help: you can convert a `u32` to a `u64` | 65 | if hal.read_sysmem_flush_page(bar) == self.page.dma_handle().into() { | +++++++ error: this arithmetic operation will overflow --> drivers/gpu/nova-core/falcon.rs:469:23 | 469 | .set_base((dma_start >> 40) as u16) | ^^^^^^^^^^^^^^^^^ attempt to shift right by `40_i32`, which would overflow | = note: `#[deny(arithmetic_overflow)]` on by default This is due to the code making assumptions on the width of dma_addr_t to be 64 bit. While this could technically be handled, it is rather painful to deal with, as the following example illustrates: pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> DmaAddress { let addr = u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT | u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40()) << FLUSH_SYSMEM_ADDR_SHIFT_HI; addr.try_into().unwrap_or_else(|_| { kernel::warn_on!(true); 0 }) } At the same time there's not much value for nova-core to support 32-bit, given that the supported GPU architectures are Turing and later, hence depend on CONFIG_64BIT. Cc: John Hubbard Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/lkml/20250828160247.37492-1-ojeda@kernel.org/ Fixes: 6554ad65b589 ("gpu: nova-core: register sysmem flush page") Fixes: 69f5cd67ce41 ("gpu: nova-core: add falcon register definitions and base code") Reviewed-by: Alexandre Courbot Reviewed-by: John Hubbard Link: https://lore.kernel.org/r/20250828223954.351348-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig index 8726d80d6ba4..20d3e6d0d796 100644 --- a/drivers/gpu/nova-core/Kconfig +++ b/drivers/gpu/nova-core/Kconfig @@ -1,5 +1,6 @@ config NOVA_CORE tristate "Nova Core GPU driver" + depends on 64BIT depends on PCI depends on RUST depends on RUST_FW_LOADER_ABSTRACTIONS From 850470a8413a8a78e772c4f6bd9fe81ec6bd5b0f Mon Sep 17 00:00:00 2001 From: yangshiguang Date: Sat, 30 Aug 2025 10:09:46 +0800 Subject: [PATCH 1009/1307] mm: slub: avoid wake up kswapd in set_track_prepare set_track_prepare() can incur lock recursion. The issue is that it is called from hrtimer_start_range_ns holding the per_cpu(hrtimer_bases)[n].lock, but when enabled CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare, and try to hold the per_cpu(hrtimer_bases)[n].lock. Avoid deadlock caused by implicitly waking up kswapd by passing in allocation flags, which do not contain __GFP_KSWAPD_RECLAIM in the debug_objects_fill_pool() case. Inside stack depot they are processed by gfp_nested_mask(). Since ___slab_alloc() has preemption disabled, we mask out __GFP_DIRECT_RECLAIM from the flags there. The oops looks something like: BUG: spinlock recursion on CPU#3, swapper/3/0 lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3 Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT) Call trace: spin_bug+0x0 _raw_spin_lock_irqsave+0x80 hrtimer_try_to_cancel+0x94 task_contending+0x10c enqueue_dl_entity+0x2a4 dl_server_start+0x74 enqueue_task_fair+0x568 enqueue_task+0xac do_activate_task+0x14c ttwu_do_activate+0xcc try_to_wake_up+0x6c8 default_wake_function+0x20 autoremove_wake_function+0x1c __wake_up+0xac wakeup_kswapd+0x19c wake_all_kswapds+0x78 __alloc_pages_slowpath+0x1ac __alloc_pages_noprof+0x298 stack_depot_save_flags+0x6b0 stack_depot_save+0x14 set_track_prepare+0x5c ___slab_alloc+0xccc __kmalloc_cache_noprof+0x470 __set_page_owner+0x2bc post_alloc_hook[jt]+0x1b8 prep_new_page+0x28 get_page_from_freelist+0x1edc __alloc_pages_noprof+0x13c alloc_slab_page+0x244 allocate_slab+0x7c ___slab_alloc+0x8e8 kmem_cache_alloc_noprof+0x450 debug_objects_fill_pool+0x22c debug_object_activate+0x40 enqueue_hrtimer[jt]+0xdc hrtimer_start_range_ns+0x5f8 ... Signed-off-by: yangshiguang Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects") Cc: stable@vger.kernel.org Signed-off-by: Vlastimil Babka --- mm/slub.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 1787e4d51e48..d257141896c9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -962,19 +962,19 @@ static struct track *get_track(struct kmem_cache *s, void *object, } #ifdef CONFIG_STACKDEPOT -static noinline depot_stack_handle_t set_track_prepare(void) +static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { depot_stack_handle_t handle; unsigned long entries[TRACK_ADDRS_COUNT]; unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3); - handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT); + handle = stack_depot_save(entries, nr_entries, gfp_flags); return handle; } #else -static inline depot_stack_handle_t set_track_prepare(void) +static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return 0; } @@ -996,9 +996,9 @@ static void set_track_update(struct kmem_cache *s, void *object, } static __always_inline void set_track(struct kmem_cache *s, void *object, - enum track_item alloc, unsigned long addr) + enum track_item alloc, unsigned long addr, gfp_t gfp_flags) { - depot_stack_handle_t handle = set_track_prepare(); + depot_stack_handle_t handle = set_track_prepare(gfp_flags); set_track_update(s, object, alloc, addr, handle); } @@ -1926,9 +1926,9 @@ static inline bool free_debug_processing(struct kmem_cache *s, static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} static inline int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { return 1; } -static inline depot_stack_handle_t set_track_prepare(void) { return 0; } +static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return 0; } static inline void set_track(struct kmem_cache *s, void *object, - enum track_item alloc, unsigned long addr) {} + enum track_item alloc, unsigned long addr, gfp_t gfp_flags) {} static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab) {} static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, @@ -3881,9 +3881,14 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, * For debug caches here we had to go through * alloc_single_from_partial() so just store the * tracking info and return the object. + * + * Due to disabled preemption we need to disallow + * blocking. The flags are further adjusted by + * gfp_nested_mask() in stack_depot itself. */ if (s->flags & SLAB_STORE_USER) - set_track(s, freelist, TRACK_ALLOC, addr); + set_track(s, freelist, TRACK_ALLOC, addr, + gfpflags & ~(__GFP_DIRECT_RECLAIM)); return freelist; } @@ -3915,7 +3920,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, goto new_objects; if (s->flags & SLAB_STORE_USER) - set_track(s, freelist, TRACK_ALLOC, addr); + set_track(s, freelist, TRACK_ALLOC, addr, + gfpflags & ~(__GFP_DIRECT_RECLAIM)); return freelist; } @@ -4426,8 +4432,12 @@ static noinline void free_to_partial_list( unsigned long flags; depot_stack_handle_t handle = 0; + /* + * We cannot use GFP_NOWAIT as there are callsites where waking up + * kswapd could deadlock + */ if (s->flags & SLAB_STORE_USER) - handle = set_track_prepare(); + handle = set_track_prepare(__GFP_NOWARN); spin_lock_irqsave(&n->list_lock, flags); From 75e81743e3815a934f5ec688a9837bc5fa56dbb3 Mon Sep 17 00:00:00 2001 From: Harshit Shah Date: Thu, 28 Aug 2025 11:27:49 -0700 Subject: [PATCH 1010/1307] arm64: dts: axiado: Add missing UART aliases Axiado AX3000 EVK has total of 4 UART ports. Add missing alias for uart0, uart1, uart2. This fixes the probe failures on the remaining UARTs. Fixes: 1f7055779001 ("arm64: dts: axiado: Add initial support for AX3000 SoC and eval board") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Harshit Shah Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/axiado/ax3000-evk.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/axiado/ax3000-evk.dts b/arch/arm64/boot/dts/axiado/ax3000-evk.dts index 92101c5b534b..b86e96962557 100644 --- a/arch/arm64/boot/dts/axiado/ax3000-evk.dts +++ b/arch/arm64/boot/dts/axiado/ax3000-evk.dts @@ -14,6 +14,9 @@ / { #size-cells = <2>; aliases { + serial0 = &uart0; + serial1 = &uart1; + serial2 = &uart2; serial3 = &uart3; }; From 750b54513f69f1046895346ea97cc3d96584355e Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 18 Aug 2025 23:08:04 +0200 Subject: [PATCH 1011/1307] MAINTAINERS: exclude defconfig from ARM64 PORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patches for the arm64 defconfig are supposed to be sent to the SoC maintainers (e.g. a change in the generic arm64 defconfig required for Rockchip devices should be send to Heiko Stübner as he is listed as maintainer for "ARM/Rockchip SoC support") and not the ARM64 PORT maintainers. While we cannot easily describe this in MAINTAINERS, we can at least stop it from giving false information and make it behave the same way as for the MAINTAINERS file itself (which basically has the same rules), so that it just outputs the LKML for the ARM64 defconfig. Signed-off-by: Sebastian Reichel Acked-by: Will Deacon Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20250818-arm64-defconfig-v1-1-f589553c3d72@collabora.com Signed-off-by: Arnd Bergmann --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 636547f581cb..04a84ada4c19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3667,6 +3667,7 @@ F: drivers/virt/coco/arm-cca-guest/ F: drivers/virt/coco/pkvm-guest/ F: tools/testing/selftests/arm64/ X: arch/arm64/boot/dts/ +X: arch/arm64/configs/defconfig ARROW SPEEDCHIPS XRS7000 SERIES ETHERNET SWITCH DRIVER M: George McCollister From 69a79ada8eb034ce016b5b78fb7d08d8687223de Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Fri, 8 Aug 2025 13:09:39 +0200 Subject: [PATCH 1012/1307] accel/ivpu: Prevent recovery work from being queued during device removal Use disable_work_sync() instead of cancel_work_sync() in ivpu_dev_fini() to ensure that no new recovery work items can be queued after device removal has started. Previously, recovery work could be scheduled even after canceling existing work, potentially leading to use-after-free bugs if recovery accessed freed resources. Rename ivpu_pm_cancel_recovery() to ivpu_pm_disable_recovery() to better reflect its new behavior. Fixes: 58cde80f45a2 ("accel/ivpu: Use dedicated work for job timeout detection") Cc: stable@vger.kernel.org # v6.8+ Signed-off-by: Karol Wachowski Reviewed-by: Lizhi Hou Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250808110939.328366-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 2 +- drivers/accel/ivpu/ivpu_pm.c | 4 ++-- drivers/accel/ivpu/ivpu_pm.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 3d6d52492536..3289751b4757 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -677,7 +677,7 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_jobs_abort_all(vdev); - ivpu_pm_cancel_recovery(vdev); + ivpu_pm_disable_recovery(vdev); ivpu_pm_disable(vdev); ivpu_prepare_for_reset(vdev); ivpu_shutdown(vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index eacda1dbe840..475ddc94f1cf 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -417,10 +417,10 @@ void ivpu_pm_init(struct ivpu_device *vdev) ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay); } -void ivpu_pm_cancel_recovery(struct ivpu_device *vdev) +void ivpu_pm_disable_recovery(struct ivpu_device *vdev) { drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work)); - cancel_work_sync(&vdev->pm->recovery_work); + disable_work_sync(&vdev->pm->recovery_work); } void ivpu_pm_enable(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index 89b264cc0e3e..a2aa7a27f32e 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -25,7 +25,7 @@ struct ivpu_pm_info { void ivpu_pm_init(struct ivpu_device *vdev); void ivpu_pm_enable(struct ivpu_device *vdev); void ivpu_pm_disable(struct ivpu_device *vdev); -void ivpu_pm_cancel_recovery(struct ivpu_device *vdev); +void ivpu_pm_disable_recovery(struct ivpu_device *vdev); int ivpu_pm_suspend_cb(struct device *dev); int ivpu_pm_resume_cb(struct device *dev); From 63ddc0a75b3b071f04f4bc277b2510eb06d21648 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Mon, 25 Aug 2025 09:27:08 +0200 Subject: [PATCH 1013/1307] arm64: dts: rockchip: fix USB on RADXA ROCK 5T The RADXA ROCK 5T board uses the same GPIO pin for controlling the USB host port regulator. This control pin was mistakenly left out of the ROCK 5T device tree. Reported-by: FUKAUMI Naoki Closes: https://libera.catirclogs.org/linux-rockchip/2025-08-25#38609886; Fixes: 0ea651de9b79 ("arm64: dts: rockchip: add ROCK 5T device tree") Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20250825-rock5t-usb-fix-v1-1-de71954a1bb5@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts index 258c7400301d..6acc7a8a5a12 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts @@ -95,6 +95,12 @@ hp_detect: hp-detect { rockchip,pins = <4 RK_PC3 RK_FUNC_GPIO &pcfg_pull_none>; }; }; + + usb { + vcc5v0_host_en: vcc5v0-host-en { + rockchip,pins = <1 RK_PA1 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; }; &vcc3v3_pcie2x1l0 { @@ -103,3 +109,10 @@ &vcc3v3_pcie2x1l0 { pinctrl-0 = <&pcie2_0_vcc3v3_en>; status = "okay"; }; + +&vcc5v0_host { + enable-active-high; + gpio = <&gpio1 RK_PA1 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&vcc5v0_host_en>; +}; From cc8e91054c0a778074ecffaf12bd0944e884d71c Mon Sep 17 00:00:00 2001 From: Tina Wuest Date: Mon, 1 Sep 2025 12:20:24 +0300 Subject: [PATCH 1014/1307] ALSA: usb-audio: Allow Focusrite devices to use low samplerates Commit 05f254a6369ac020fc0382a7cbd3ef64ad997c92 ("ALSA: usb-audio: Improve filtering of sample rates on Focusrite devices") changed the check for max_rate in a way which was overly restrictive, forcing devices to use very high samplerates if they support them, despite support existing for lower rates as well. This maintains the intended outcome (ensuring samplerates selected are supported) while allowing devices with higher maximum samplerates to be opened at all supported samplerates. This patch was tested with a Clarett+ 8Pre USB Fixes: 05f254a6369a ("ALSA: usb-audio: Improve filtering of sample rates on Focusrite devices") Signed-off-by: Tina Wuest Link: https://patch.msgid.link/20250901092024.140993-1-tina@wuest.me Signed-off-by: Takashi Iwai --- sound/usb/format.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 0ee532acbb60..ec95a063beb1 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -327,12 +327,16 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, max_rate = combine_quad(&fmt[6]); switch (max_rate) { + case 192000: + if (rate == 176400 || rate == 192000) + return true; + fallthrough; + case 96000: + if (rate == 88200 || rate == 96000) + return true; + fallthrough; case 48000: return (rate == 44100 || rate == 48000); - case 96000: - return (rate == 88200 || rate == 96000); - case 192000: - return (rate == 176400 || rate == 192000); default: usb_audio_info(chip, "%u:%d : unexpected max rate: %u\n", From bcd6659d4911c528381531472a0cefbd4003e29e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 1 Sep 2025 13:50:08 +0200 Subject: [PATCH 1015/1307] ALSA: hda/hdmi: Add pin fix for another HP EliteDesk 800 G4 model It was reported that HP EliteDesk 800 G4 DM 65W (SSID 103c:845a) needs the similar quirk for enabling HDMI outputs, too. This patch adds the corresponding quirk entry. Cc: Link: https://patch.msgid.link/20250901115009.27498-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/hdmi/hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/hdmi/hdmi.c b/sound/hda/codecs/hdmi/hdmi.c index b5d840d9892b..44576b30f699 100644 --- a/sound/hda/codecs/hdmi/hdmi.c +++ b/sound/hda/codecs/hdmi/hdmi.c @@ -1582,6 +1582,7 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x83e2, "HP EliteDesk 800 G4", 1), SND_PCI_QUIRK(0x103c, 0x83ef, "HP MP9 G4 Retail System AMS", 1), + SND_PCI_QUIRK(0x103c, 0x845a, "HP EliteDesk 800 G4 DM 65W", 1), SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), From 782a7c73078e1301c0c427f21c06377d77dfa541 Mon Sep 17 00:00:00 2001 From: Larisa Grigore Date: Thu, 28 Aug 2025 11:14:40 +0100 Subject: [PATCH 1016/1307] spi: spi-fsl-lpspi: Fix transmissions when using CONT Commit 6a130448498c ("spi: lpspi: Fix wrong transmission when don't use CONT") breaks transmissions when CONT is used. The TDIE interrupt should not be disabled in all cases. If CONT is used and the TX transfer is not yet completed yet, but the interrupt handler is called because there are characters to be received, TDIE is replaced with FCIE. When the transfer is finally completed, SR_TDF is set but the interrupt handler isn't called again. Fixes: 6a130448498c ("spi: lpspi: Fix wrong transmission when don't use CONT") Signed-off-by: Larisa Grigore Signed-off-by: James Clark Reviewed-by: Frank Li Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-1-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 313e444a34f3..eaa6bade61a6 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -3,7 +3,7 @@ // Freescale i.MX7ULP LPSPI driver // // Copyright 2016 Freescale Semiconductor, Inc. -// Copyright 2018 NXP Semiconductors +// Copyright 2018, 2023, 2025 NXP #include #include @@ -785,7 +785,7 @@ static irqreturn_t fsl_lpspi_isr(int irq, void *dev_id) if (temp_SR & SR_MBF || readl(fsl_lpspi->base + IMX7ULP_FSR) & FSR_TXCOUNT) { writel(SR_FCF, fsl_lpspi->base + IMX7ULP_SR); - fsl_lpspi_intctrl(fsl_lpspi, IER_FCIE); + fsl_lpspi_intctrl(fsl_lpspi, IER_FCIE | (temp_IER & IER_TDIE)); return IRQ_HANDLED; } From cbe33705864ba2697a2939de715b81538cf32430 Mon Sep 17 00:00:00 2001 From: Larisa Grigore Date: Thu, 28 Aug 2025 11:14:41 +0100 Subject: [PATCH 1017/1307] spi: spi-fsl-lpspi: Set correct chip-select polarity bit The driver currently supports multiple chip-selects, but only sets the polarity for the first one (CS 0). Fix it by setting the PCSPOL bit for the desired chip-select. Fixes: 5314987de5e5 ("spi: imx: add lpspi bus driver") Signed-off-by: Larisa Grigore Signed-off-by: James Clark Reviewed-by: Frank Li Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-2-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index eaa6bade61a6..5ea4a306ffa6 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -5,6 +5,7 @@ // Copyright 2016 Freescale Semiconductor, Inc. // Copyright 2018, 2023, 2025 NXP +#include #include #include #include @@ -70,7 +71,7 @@ #define DER_TDDE BIT(0) #define CFGR1_PCSCFG BIT(27) #define CFGR1_PINCFG (BIT(24)|BIT(25)) -#define CFGR1_PCSPOL BIT(8) +#define CFGR1_PCSPOL_MASK GENMASK(11, 8) #define CFGR1_NOSTALL BIT(3) #define CFGR1_HOST BIT(0) #define FSR_TXCOUNT (0xFF) @@ -423,7 +424,9 @@ static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi) else temp = CFGR1_PINCFG; if (fsl_lpspi->config.mode & SPI_CS_HIGH) - temp |= CFGR1_PCSPOL; + temp |= FIELD_PREP(CFGR1_PCSPOL_MASK, + BIT(fsl_lpspi->config.chip_select)); + writel(temp, fsl_lpspi->base + IMX7ULP_CFGR1); temp = readl(fsl_lpspi->base + IMX7ULP_CR); From e811b088a3641861fc9d2b2b840efc61a0f1907d Mon Sep 17 00:00:00 2001 From: Larisa Grigore Date: Thu, 28 Aug 2025 11:14:42 +0100 Subject: [PATCH 1018/1307] spi: spi-fsl-lpspi: Reset FIFO and disable module on transfer abort In DMA mode fsl_lpspi_reset() is always called at the end, even when the transfer is aborted. In PIO mode aborts skip the reset leaving the FIFO filled and the module enabled. Fix it by always calling fsl_lpspi_reset(). Fixes: a15dc3d657fa ("spi: lpspi: Fix CLK pin becomes low before one transfer") Signed-off-by: Larisa Grigore Reviewed-by: Frank Li Signed-off-by: James Clark Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-3-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 5ea4a306ffa6..e50261e9a1fa 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -733,12 +733,10 @@ static int fsl_lpspi_pio_transfer(struct spi_controller *controller, fsl_lpspi_write_tx_fifo(fsl_lpspi); ret = fsl_lpspi_wait_for_completion(controller); - if (ret) - return ret; fsl_lpspi_reset(fsl_lpspi); - return 0; + return ret; } static int fsl_lpspi_transfer_one(struct spi_controller *controller, From dedf9c93dece441e9a0a4836458bc93677008ddd Mon Sep 17 00:00:00 2001 From: Larisa Grigore Date: Thu, 28 Aug 2025 11:14:43 +0100 Subject: [PATCH 1019/1307] spi: spi-fsl-lpspi: Clear status register after disabling the module Clear the error flags after disabling the module to avoid the case when a flag is set again between flag clear and module disable. And use SR_CLEAR_MASK to replace hardcoded value for improved readability. Although fsl_lpspi_reset() was only introduced in commit a15dc3d657fa ("spi: lpspi: Fix CLK pin becomes low before one transfer"), the original driver only reset SR in the interrupt handler, making it vulnerable to the same issue. Therefore the fixes commit is set at the introduction of the driver. Fixes: 5314987de5e5 ("spi: imx: add lpspi bus driver") Signed-off-by: Larisa Grigore Signed-off-by: Ciprian Marian Costea Reviewed-by: Frank Li Signed-off-by: James Clark Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-4-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index e50261e9a1fa..fc4d49f0717e 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -83,6 +83,8 @@ #define TCR_RXMSK BIT(19) #define TCR_TXMSK BIT(18) +#define SR_CLEAR_MASK GENMASK(13, 8) + struct fsl_lpspi_devtype_data { u8 prescale_max; }; @@ -535,14 +537,13 @@ static int fsl_lpspi_reset(struct fsl_lpspi_data *fsl_lpspi) fsl_lpspi_intctrl(fsl_lpspi, 0); } - /* W1C for all flags in SR */ - temp = 0x3F << 8; - writel(temp, fsl_lpspi->base + IMX7ULP_SR); - /* Clear FIFO and disable module */ temp = CR_RRF | CR_RTF; writel(temp, fsl_lpspi->base + IMX7ULP_CR); + /* W1C for all flags in SR */ + writel(SR_CLEAR_MASK, fsl_lpspi->base + IMX7ULP_SR); + return 0; } From b663fd4532699cc24f5d1094f3859198ee1ed4b6 Mon Sep 17 00:00:00 2001 From: Larisa Grigore Date: Thu, 28 Aug 2025 11:14:44 +0100 Subject: [PATCH 1020/1307] dt-bindings: lpspi: Document support for S32G Add compatible strings 'nxp,s32g2-lpspi' and 'nxp,s32g3-lpspi' for S32G2 and S32G3. Require nxp,s32g3-lpspi to fallback to nxp,s32g2-lpspi since they are currently compatible. Signed-off-by: Larisa Grigore Signed-off-by: James Clark Reviewed-by: Rob Herring (Arm) Reviewed-by: Frank Li Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-5-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml index a65a42ccaafe..a82360bed188 100644 --- a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml +++ b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml @@ -20,6 +20,7 @@ properties: - enum: - fsl,imx7ulp-spi - fsl,imx8qxp-spi + - nxp,s32g2-lpspi - items: - enum: - fsl,imx8ulp-spi @@ -27,6 +28,10 @@ properties: - fsl,imx94-spi - fsl,imx95-spi - const: fsl,imx7ulp-spi + - items: + - const: nxp,s32g3-lpspi + - const: nxp,s32g2-lpspi + reg: maxItems: 1 From fb4273faa4d0eeca8cb7265531d48eb084bcceea Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 28 Aug 2025 11:14:45 +0100 Subject: [PATCH 1021/1307] spi: spi-fsl-lpspi: Constify devtype datas Add const for all devtype_data. Signed-off-by: James Clark Reviewed-by: Frank Li Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-6-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index fc4d49f0717e..f16449cf42bb 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -135,11 +135,11 @@ struct fsl_lpspi_data { * ERR051608 fixed or not: * https://www.nxp.com/docs/en/errata/i.MX93_1P87f.pdf */ -static struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { +static const struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { .prescale_max = 1, }; -static struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { +static const struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { .prescale_max = 7, }; From 9bbfb1ec959ce95f91cfab544f705e5257be3be1 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 28 Aug 2025 11:14:46 +0100 Subject: [PATCH 1022/1307] spi: spi-fsl-lpspi: Treat prescale_max == 0 as no erratum This erratum only ever results in a max value of 1, otherwise the full 3 bits are available. To avoid repeating the same default prescale value for every new device's devdata, treat 0 as no limit (7) and only set a value when the erratum is present. Change the field to be 3 bits to catch out of range definitions. No functionality change. Signed-off-by: James Clark Reviewed-by: Frank Li Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-7-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index f16449cf42bb..ea25e8dab0a4 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -86,7 +86,7 @@ #define SR_CLEAR_MASK GENMASK(13, 8) struct fsl_lpspi_devtype_data { - u8 prescale_max; + u8 prescale_max : 3; /* 0 == no limit */ }; struct lpspi_config { @@ -132,15 +132,15 @@ struct fsl_lpspi_data { }; /* - * ERR051608 fixed or not: - * https://www.nxp.com/docs/en/errata/i.MX93_1P87f.pdf + * Devices with ERR051608 have a max TCR_PRESCALE value of 1, otherwise there is + * no prescale limit: https://www.nxp.com/docs/en/errata/i.MX93_1P87f.pdf */ static const struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { .prescale_max = 1, }; static const struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { - .prescale_max = 7, + /* All defaults */ }; static const struct of_device_id fsl_lpspi_dt_ids[] = { @@ -324,7 +324,7 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) int scldiv; perclk_rate = clk_get_rate(fsl_lpspi->clk_per); - prescale_max = fsl_lpspi->devtype_data->prescale_max; + prescale_max = fsl_lpspi->devtype_data->prescale_max ?: 7; if (!config.speed_hz) { dev_err(fsl_lpspi->dev, From 41c91c2eed83cb93781078108077b7e34f867fc2 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 28 Aug 2025 11:14:47 +0100 Subject: [PATCH 1023/1307] spi: spi-fsl-lpspi: Parameterize reading num-cs from hardware Add query_hw_for_num_cs in devtype to avoid directly checking compatible string "fsl,imx93-spi". No functionality change. Signed-off-by: James Clark Reviewed-by: Frank Li Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-8-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index ea25e8dab0a4..ce347cdbb009 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -87,6 +87,7 @@ struct fsl_lpspi_devtype_data { u8 prescale_max : 3; /* 0 == no limit */ + bool query_hw_for_num_cs : 1; }; struct lpspi_config { @@ -137,6 +138,7 @@ struct fsl_lpspi_data { */ static const struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { .prescale_max = 1, + .query_hw_for_num_cs = true, }; static const struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { @@ -932,7 +934,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev) fsl_lpspi->rxfifosize = 1 << ((temp >> 8) & 0x0f); if (of_property_read_u32((&pdev->dev)->of_node, "num-cs", &num_cs)) { - if (of_device_is_compatible(pdev->dev.of_node, "fsl,imx93-spi")) + if (devtype_data->query_hw_for_num_cs) num_cs = ((temp >> 16) & 0xf); else num_cs = 1; From 431f6c88cb5d2d62d579d4d78f5c1a2583465ffb Mon Sep 17 00:00:00 2001 From: Larisa Grigore Date: Thu, 28 Aug 2025 11:14:48 +0100 Subject: [PATCH 1024/1307] spi: spi-fsl-lpspi: Add compatible for S32G S32G doesn't have the max prescale erratum (default) and it can query the max number of CS from hardware, so add those settings. Signed-off-by: Larisa Grigore Signed-off-by: Ciprian Marian Costea Signed-off-by: James Clark Reviewed-by: Frank Li Link: https://patch.msgid.link/20250828-james-nxp-lpspi-v2-9-6262b9aa9be4@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index ce347cdbb009..431439d4cdda 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -145,9 +145,14 @@ static const struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { /* All defaults */ }; +static const struct fsl_lpspi_devtype_data s32g_lpspi_devtype_data = { + .query_hw_for_num_cs = true, +}; + static const struct of_device_id fsl_lpspi_dt_ids[] = { { .compatible = "fsl,imx7ulp-spi", .data = &imx7ulp_lpspi_devtype_data,}, { .compatible = "fsl,imx93-spi", .data = &imx93_lpspi_devtype_data,}, + { .compatible = "nxp,s32g2-lpspi", .data = &s32g_lpspi_devtype_data,}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fsl_lpspi_dt_ids); From 7446284023e8ef694fb392348185349c773eefb3 Mon Sep 17 00:00:00 2001 From: Khairul Anuar Romli Date: Tue, 26 Aug 2025 08:33:58 +0800 Subject: [PATCH 1025/1307] spi: cadence-quadspi: Implement refcount to handle unbind during busy driver support indirect read and indirect write operation with assumption no force device removal(unbind) operation. However force device removal(removal) is still available to root superuser. Unbinding driver during operation causes kernel crash. This changes ensure driver able to handle such operation for indirect read and indirect write by implementing refcount to track attached devices to the controller and gracefully wait and until attached devices remove operation completed before proceed with removal operation. Signed-off-by: Khairul Anuar Romli Reviewed-by: Matthew Gerlach Reviewed-by: Niravkumar L Rabara Link: https://patch.msgid.link/8704fd6bd2ff4d37bba4a0eacf5eba3ba001079e.1756168074.git.khairul.anuar.romli@altera.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 177f9a33f3a2..9bf823348cd3 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -108,6 +108,8 @@ struct cqspi_st { bool is_jh7110; /* Flag for StarFive JH7110 SoC */ bool disable_stig_mode; + refcount_t refcount; + refcount_t inflight_ops; const struct cqspi_driver_platdata *ddata; }; @@ -735,6 +737,9 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata, u8 *rxbuf_end = rxbuf + n_rx; int ret = 0; + if (!refcount_read(&cqspi->refcount)) + return -ENODEV; + writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR); writel(remaining, reg_base + CQSPI_REG_INDIRECTRDBYTES); @@ -1071,6 +1076,9 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata, unsigned int write_bytes; int ret; + if (!refcount_read(&cqspi->refcount)) + return -ENODEV; + writel(to_addr, reg_base + CQSPI_REG_INDIRECTWRSTARTADDR); writel(remaining, reg_base + CQSPI_REG_INDIRECTWRBYTES); @@ -1461,12 +1469,26 @@ static int cqspi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op) struct cqspi_st *cqspi = spi_controller_get_devdata(mem->spi->controller); struct device *dev = &cqspi->pdev->dev; + if (refcount_read(&cqspi->inflight_ops) == 0) + return -ENODEV; + ret = pm_runtime_resume_and_get(dev); if (ret) { dev_err(&mem->spi->dev, "resume failed with %d\n", ret); return ret; } + if (!refcount_read(&cqspi->refcount)) + return -EBUSY; + + refcount_inc(&cqspi->inflight_ops); + + if (!refcount_read(&cqspi->refcount)) { + if (refcount_read(&cqspi->inflight_ops)) + refcount_dec(&cqspi->inflight_ops); + return -EBUSY; + } + ret = cqspi_mem_process(mem, op); pm_runtime_put_autosuspend(dev); @@ -1474,6 +1496,9 @@ static int cqspi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op) if (ret) dev_err(&mem->spi->dev, "operation failed with %d\n", ret); + if (refcount_read(&cqspi->inflight_ops) > 1) + refcount_dec(&cqspi->inflight_ops); + return ret; } @@ -1925,6 +1950,9 @@ static int cqspi_probe(struct platform_device *pdev) } } + refcount_set(&cqspi->refcount, 1); + refcount_set(&cqspi->inflight_ops, 1); + ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0, pdev->name, cqspi); if (ret) { @@ -1987,6 +2015,11 @@ static void cqspi_remove(struct platform_device *pdev) { struct cqspi_st *cqspi = platform_get_drvdata(pdev); + refcount_set(&cqspi->refcount, 0); + + if (!refcount_dec_and_test(&cqspi->inflight_ops)) + cqspi_wait_idle(cqspi); + spi_unregister_controller(cqspi->host); cqspi_controller_enable(cqspi, 0); From bd7e7bc2cc2024035dfbc8239c9f4d8675793445 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 29 Aug 2025 06:49:51 -0700 Subject: [PATCH 1026/1307] hwmon: (ina238) Correctly clamp shunt voltage limit When clamping a register value, the result needs to be masked against the register size. This was missing, resulting in errors when trying to write negative limits. Fix by masking the clamping result against the register size. Fixes: eacb52f010a80 ("hwmon: Driver for Texas Instruments INA238") Cc: Nathan Rossi Cc: Chris Packham Signed-off-by: Guenter Roeck --- drivers/hwmon/ina238.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ina238.c b/drivers/hwmon/ina238.c index 4d3dc018ead9..c6b2734898d0 100644 --- a/drivers/hwmon/ina238.c +++ b/drivers/hwmon/ina238.c @@ -379,7 +379,7 @@ static int ina238_write_in(struct device *dev, u32 attr, int channel, regval = clamp_val(val, -163, 163); regval = (regval * 1000 * 4) / (INA238_SHUNT_VOLTAGE_LSB * data->gain); - regval = clamp_val(regval, S16_MIN, S16_MAX); + regval = clamp_val(regval, S16_MIN, S16_MAX) & 0xffff; switch (attr) { case hwmon_in_max: From c2623573178bab32990695fb729e9b69710ed66d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 29 Aug 2025 11:51:20 -0700 Subject: [PATCH 1027/1307] hwmon: (ina238) Correctly clamp power limits ina238_write_power() was attempting to clamp the user input but was throwing away the result. Ensure that we clamp the value to the appropriate range before it is converted into a register value. Fixes: 0d9f596b1fe34 ("hwmon: (ina238) Modify the calculation formula to adapt to different chips") Cc: Wenliang Yan Cc: Chris Packham Signed-off-by: Guenter Roeck --- drivers/hwmon/ina238.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ina238.c b/drivers/hwmon/ina238.c index c6b2734898d0..59a2c8889fa2 100644 --- a/drivers/hwmon/ina238.c +++ b/drivers/hwmon/ina238.c @@ -517,9 +517,10 @@ static int ina238_write_power(struct device *dev, u32 attr, long val) * Unsigned postive values. Compared against the 24-bit power register, * lower 8-bits are truncated. Same conversion to/from uW as POWER * register. + * The first clamp_val() is to establish a baseline to avoid overflows. */ - regval = clamp_val(val, 0, LONG_MAX); - regval = div_u64(val * 4 * 100 * data->rshunt, data->config->power_calculate_factor * + regval = clamp_val(val, 0, LONG_MAX / 2); + regval = div_u64(regval * 4 * 100 * data->rshunt, data->config->power_calculate_factor * 1000ULL * INA238_FIXED_SHUNT * data->gain); regval = clamp_val(regval >> 8, 0, U16_MAX); From a22d3b0d49d411e64ed07e30c2095035ecb30ed2 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 19 Aug 2025 08:56:22 +0200 Subject: [PATCH 1028/1307] phy: ti: gmii-sel: Always write the RGMII ID setting Some SoCs are just validated with the TX delay enabled. With commit ca13b249f291 ("net: ethernet: ti: am65-cpsw: fixup PHY mode for fixed RGMII TX delay"), the network driver will patch the delay setting on the fly assuming that the TX delay setting is fixed. In reality, the TX delay is configurable and just skipped in the documentation. There are bootloaders, which will disable the TX delay and this will lead to a transmit path which doesn't add any delays at all. Fix that by always writing the RGMII_ID setting and report an error for unsupported RGMII delay modes. This is safe to do and shouldn't break any boards in mainline because the fixed delay is only introduced for gmii-sel compatibles which are used together with the am65-cpsw-nuss driver and also contains the commit above. Fixes: ca13b249f291 ("net: ethernet: ti: am65-cpsw: fixup PHY mode for fixed RGMII TX delay") Signed-off-by: Michael Walle Reviewed-by: Maxime Chevallier Link: https://lore.kernel.org/r/20250819065622.1019537-1-mwalle@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-gmii-sel.c | 47 +++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index ff5d5e29629f..50adabb867cb 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -34,6 +34,7 @@ enum { PHY_GMII_SEL_PORT_MODE = 0, PHY_GMII_SEL_RGMII_ID_MODE, PHY_GMII_SEL_RMII_IO_CLK_EN, + PHY_GMII_SEL_FIXED_TX_DELAY, PHY_GMII_SEL_LAST, }; @@ -127,6 +128,11 @@ static int phy_gmii_sel_mode(struct phy *phy, enum phy_mode mode, int submode) goto unsupported; } + /* With a fixed delay, some modes are not supported at all. */ + if (soc_data->features & BIT(PHY_GMII_SEL_FIXED_TX_DELAY) && + rgmii_id != 0) + return -EINVAL; + if_phy->phy_if_mode = submode; dev_dbg(dev, "%s id:%u mode:%u rgmii_id:%d rmii_clk_ext:%d\n", @@ -210,25 +216,46 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_soc_dm814 = { static const struct reg_field phy_gmii_sel_fields_am654[][PHY_GMII_SEL_LAST] = { - { [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x0, 0, 2), }, - { [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x4, 0, 2), }, - { [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x8, 0, 2), }, - { [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0xC, 0, 2), }, - { [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x10, 0, 2), }, - { [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x14, 0, 2), }, - { [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x18, 0, 2), }, - { [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x1C, 0, 2), }, + { + [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x0, 0, 2), + [PHY_GMII_SEL_RGMII_ID_MODE] = REG_FIELD(0x0, 4, 4), + }, { + [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x4, 0, 2), + [PHY_GMII_SEL_RGMII_ID_MODE] = REG_FIELD(0x4, 4, 4), + }, { + [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x8, 0, 2), + [PHY_GMII_SEL_RGMII_ID_MODE] = REG_FIELD(0x8, 4, 4), + }, { + [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0xC, 0, 2), + [PHY_GMII_SEL_RGMII_ID_MODE] = REG_FIELD(0xC, 4, 4), + }, { + [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x10, 0, 2), + [PHY_GMII_SEL_RGMII_ID_MODE] = REG_FIELD(0x10, 4, 4), + }, { + [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x14, 0, 2), + [PHY_GMII_SEL_RGMII_ID_MODE] = REG_FIELD(0x14, 4, 4), + }, { + [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x18, 0, 2), + [PHY_GMII_SEL_RGMII_ID_MODE] = REG_FIELD(0x18, 4, 4), + }, { + [PHY_GMII_SEL_PORT_MODE] = REG_FIELD(0x1C, 0, 2), + [PHY_GMII_SEL_RGMII_ID_MODE] = REG_FIELD(0x1C, 4, 4), + }, }; static const struct phy_gmii_sel_soc_data phy_gmii_sel_soc_am654 = { .use_of_data = true, + .features = BIT(PHY_GMII_SEL_RGMII_ID_MODE) | + BIT(PHY_GMII_SEL_FIXED_TX_DELAY), .regfields = phy_gmii_sel_fields_am654, }; static const struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw5g_soc_j7200 = { .use_of_data = true, + .features = BIT(PHY_GMII_SEL_RGMII_ID_MODE) | + BIT(PHY_GMII_SEL_FIXED_TX_DELAY), .regfields = phy_gmii_sel_fields_am654, .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | BIT(PHY_INTERFACE_MODE_SGMII) | BIT(PHY_INTERFACE_MODE_USXGMII), @@ -239,6 +266,8 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw5g_soc_j7200 = { static const struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw9g_soc_j721e = { .use_of_data = true, + .features = BIT(PHY_GMII_SEL_RGMII_ID_MODE) | + BIT(PHY_GMII_SEL_FIXED_TX_DELAY), .regfields = phy_gmii_sel_fields_am654, .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | BIT(PHY_INTERFACE_MODE_SGMII), .num_ports = 8, @@ -248,6 +277,8 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw9g_soc_j721e = { static const struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw9g_soc_j784s4 = { .use_of_data = true, + .features = BIT(PHY_GMII_SEL_RGMII_ID_MODE) | + BIT(PHY_GMII_SEL_FIXED_TX_DELAY), .regfields = phy_gmii_sel_fields_am654, .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII) | BIT(PHY_INTERFACE_MODE_SGMII) | BIT(PHY_INTERFACE_MODE_USXGMII), From 6cb8c1f957f674ca20b7d7c96b1f1bb11b83b679 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 21 Aug 2025 10:01:47 +0200 Subject: [PATCH 1029/1307] phy: qcom: qmp-pcie: Fix PHY initialization when powered down by firmware Commit 0cc22f5a861c ("phy: qcom: qmp-pcie: Add PHY register retention support") added support for using the "no_csr" reset to skip configuration of the PHY if the init sequence was already applied by the boot firmware. The expectation is that the PHY is only turned on/off by using the "no_csr" reset, instead of powering it down and re-programming it after a full reset. The boot firmware on X1E does not fully conform to this expectation: If the PCIe3 link fails to come up (e.g. because no PCIe card is inserted), the firmware powers down the PHY using the QPHY_PCS_POWER_DOWN_CONTROL register. The QPHY_START_CTRL register is kept as-is, so the driver assumes the PHY is already initialized and skips the configuration/power up sequence. The PHY won't come up again without clearing the QPHY_PCS_POWER_DOWN_CONTROL, so eventually initialization fails: qcom-qmp-pcie-phy 1be0000.phy: phy initialization timed-out phy phy-1be0000.phy.0: phy poweron failed --> -110 qcom-pcie 1bd0000.pcie: cannot initialize host qcom-pcie 1bd0000.pcie: probe with driver qcom-pcie failed with error -110 This can be reliably reproduced on the X1E CRD, QCP and Devkit when no card is inserted for PCIe3. Fix this by checking the QPHY_PCS_POWER_DOWN_CONTROL register in addition to QPHY_START_CTRL. If the PHY is powered down with the register, it doesn't conform to the expectations for using the "no_csr" reset, so we fully re-initialize with the normal reset sequence. Also check the register more carefully to ensure all of the bits we expect are actually set. A simple !!(readl()) is not enough, because the PHY might be only partially set up with some of the expected bits set. Cc: stable@vger.kernel.org Fixes: 0cc22f5a861c ("phy: qcom: qmp-pcie: Add PHY register retention support") Signed-off-by: Stephan Gerhold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250821-phy-qcom-qmp-pcie-nocsr-fix-v3-1-4898db0cc07c@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 25 ++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c index 95830dcfdec9..0fa63b734b67 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c @@ -3067,6 +3067,14 @@ struct qmp_pcie { struct clk_fixed_rate aux_clk_fixed; }; +static bool qphy_checkbits(const void __iomem *base, u32 offset, u32 val) +{ + u32 reg; + + reg = readl(base + offset); + return (reg & val) == val; +} + static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val) { u32 reg; @@ -4339,16 +4347,21 @@ static int qmp_pcie_init(struct phy *phy) struct qmp_pcie *qmp = phy_get_drvdata(phy); const struct qmp_phy_cfg *cfg = qmp->cfg; void __iomem *pcs = qmp->pcs; - bool phy_initialized = !!(readl(pcs + cfg->regs[QPHY_START_CTRL])); int ret; - qmp->skip_init = qmp->nocsr_reset && phy_initialized; /* - * We need to check the existence of init sequences in two cases: - * 1. The PHY doesn't support no_csr reset. - * 2. The PHY supports no_csr reset but isn't initialized by bootloader. - * As we can't skip init in these two cases. + * We can skip PHY initialization if all of the following conditions + * are met: + * 1. The PHY supports the nocsr_reset that preserves the PHY config. + * 2. The PHY was started (and not powered down again) by the + * bootloader, with all of the expected bits set correctly. + * In this case, we can continue without having the init sequence + * defined in the driver. */ + qmp->skip_init = qmp->nocsr_reset && + qphy_checkbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START) && + qphy_checkbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], cfg->pwrdn_ctrl); + if (!qmp->skip_init && !cfg->tbls.serdes_num) { dev_err(qmp->dev, "Init sequence not available\n"); return -ENODATA; From 49c2502b5946ebf454d7e16fd0189769a82b6117 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 30 Aug 2025 11:38:42 -0700 Subject: [PATCH 1030/1307] selftests: drv-net: csum: fix interface name for remote host Use cfg.remote_ifname for arguments of remote command. Without this UDP tests fail in NIPA where local interface is called enp1s0 and remote enp0s4. Fixes: 1d0dc857b5d8 ("selftests: drv-net: add checksum tests") Reviewed-by: Willem de Bruijn Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250830183842.688935-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/csum.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cd23af875317..3e3a89a34afe 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -17,7 +17,7 @@ def test_receive(cfg, ipver="6", extra_args=None): ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" - tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" with bkg(rx_cmd, exit_wait=True): wait_port_listen(34000, proto="udp") @@ -37,7 +37,7 @@ def test_transmit(cfg, ipver="6", extra_args=None): if extra_args != "-U -Z": extra_args += " -r 1" - rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): From 7000f4fa9b24ae2511b07babd0d49e888db5d265 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 29 Aug 2025 23:23:27 -0700 Subject: [PATCH 1031/1307] bnxt_en: fix incorrect page count in RX aggr ring log The warning in bnxt_alloc_one_rx_ring_netmem() reports the number of pages allocated for the RX aggregation ring. However, it mistakenly used bp->rx_ring_size instead of bp->rx_agg_ring_size, leading to confusing or misleading log output. Use the correct bp->rx_agg_ring_size value to fix this. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Alok Tiwari Reviewed-by: Jacob Keller Reviewed-by: Michael Chan Reviewed-by: Somnath Kotur Link: https://patch.msgid.link/20250830062331.783783-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 31e3d825b4bc..0daa08cecaf2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4397,7 +4397,7 @@ static void bnxt_alloc_one_rx_ring_netmem(struct bnxt *bp, for (i = 0; i < bp->rx_agg_ring_size; i++) { if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_KERNEL)) { netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d pages only\n", - ring_nr, i, bp->rx_ring_size); + ring_nr, i, bp->rx_agg_ring_size); break; } prod = NEXT_RX_AGG(prod); From c6dd1aa2cbb72b33e0569f3e71d95792beab5042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Bl=C3=A4se?= Date: Thu, 28 Aug 2025 11:14:35 +0200 Subject: [PATCH 1032/1307] icmp: fix icmp_ndo_send address translation for reply direction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The icmp_ndo_send function was originally introduced to ensure proper rate limiting when icmp_send is called by a network device driver, where the packet's source address may have already been transformed by SNAT. However, the original implementation only considers the IP_CT_DIR_ORIGINAL direction for SNAT and always replaced the packet's source address with that of the original-direction tuple. This causes two problems: 1. For SNAT: Reply-direction packets were incorrectly translated using the source address of the CT original direction, even though no translation is required. 2. For DNAT: Reply-direction packets were not handled at all. In DNAT, the original direction's destination is translated. Therefore, in the reply direction the source address must be set to the reply-direction source, so rate limiting works as intended. Fix this by using the connection direction to select the correct tuple for source address translation, and adjust the pre-checks to handle reply-direction packets in case of DNAT. Additionally, wrap the `ct->status` access in READ_ONCE(). This avoids possible KCSAN reports about concurrent updates to `ct->status`. Fixes: 0b41713b6066 ("icmp: introduce helper for nat'd source address in network device context") Signed-off-by: Fabian Bläse Cc: Jason A. Donenfeld Reviewed-by: Florian Westphal Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 6 ++++-- net/ipv6/ip6_icmp.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2ffe73ea644f..c48c572f024d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -799,11 +799,12 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct sk_buff *cloned_skb = NULL; struct ip_options opts = { 0 }; enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; struct nf_conn *ct; __be32 orig_ip; ct = nf_ct_get(skb_in, &ctinfo); - if (!ct || !(ct->status & IPS_SRC_NAT)) { + if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) { __icmp_send(skb_in, type, code, info, &opts); return; } @@ -818,7 +819,8 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) goto out; orig_ip = ip_hdr(skb_in)->saddr; - ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; + dir = CTINFO2DIR(ctinfo); + ip_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.ip; __icmp_send(skb_in, type, code, info, &opts); ip_hdr(skb_in)->saddr = orig_ip; out: diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 9e3574880cb0..233914b63bdb 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -54,11 +54,12 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) struct inet6_skb_parm parm = { 0 }; struct sk_buff *cloned_skb = NULL; enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; struct in6_addr orig_ip; struct nf_conn *ct; ct = nf_ct_get(skb_in, &ctinfo); - if (!ct || !(ct->status & IPS_SRC_NAT)) { + if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) { __icmpv6_send(skb_in, type, code, info, &parm); return; } @@ -73,7 +74,8 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) goto out; orig_ip = ipv6_hdr(skb_in)->saddr; - ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; + dir = CTINFO2DIR(ctinfo); + ipv6_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.in6; __icmpv6_send(skb_in, type, code, info, &parm); ipv6_hdr(skb_in)->saddr = orig_ip; out: From e580beaf43d563aaf457f1c7f934002355ebfe7b Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 28 Aug 2025 20:18:58 +0800 Subject: [PATCH 1033/1307] eth: mlx4: Fix IS_ERR() vs NULL check bug in mlx4_en_create_rx_ring Replace NULL check with IS_ERR() check after calling page_pool_create() since this function returns error pointers (ERR_PTR). Using NULL check could lead to invalid pointer dereference. Fixes: 8533b14b3d65 ("eth: mlx4: create a page pool for Rx") Signed-off-by: Miaoqian Lin Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250828121858.67639-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 92a16ddb7d86..13666d50b90f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -267,8 +267,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, pp.dma_dir = priv->dma_dir; ring->pp = page_pool_create(&pp); - if (!ring->pp) + if (IS_ERR(ring->pp)) { + err = PTR_ERR(ring->pp); goto err_ring; + } if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) goto err_pp; From b434a3772dca1c90a40e8ec69230caa55c18ef84 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Fri, 29 Aug 2025 15:48:42 +0900 Subject: [PATCH 1034/1307] docs: remove obsolete description about threaded NAPI Commit 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") introduced threaded NAPI configuration per individual NAPI instance, however obsolete description that threaded NAPI is per device has remained. Remove the old description and clarify that only NAPI instances running in threaded mode spawn kernel threads by changing "Each NAPI instance" to "Each threaded NAPI instance". Signed-off-by: Kohei Enju Reviewed-by: Samiullah Khawaja Link: https://patch.msgid.link/20250829064857.51503-1-enjuk@amazon.com Signed-off-by: Jakub Kicinski --- Documentation/networking/napi.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index a15754adb041..7dd60366f4ff 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -433,9 +433,8 @@ Threaded NAPI Threaded NAPI is an operating mode that uses dedicated kernel threads rather than software IRQ context for NAPI processing. -The configuration is per netdevice and will affect all -NAPI instances of that device. Each NAPI instance will spawn a separate -thread (called ``napi/${ifc-name}-${napi-id}``). +Each threaded NAPI instance will spawn a separate thread +(called ``napi/${ifc-name}-${napi-id}``). It is recommended to pin each kernel thread to a single CPU, the same CPU as the CPU which services the interrupt. Note that the mapping From 6bc8a5098bf4a365c4086a4a4130bfab10a58260 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 29 Aug 2025 10:35:21 -0400 Subject: [PATCH 1035/1307] net: macb: Fix tx_ptr_lock locking macb_start_xmit and macb_tx_poll can be called with bottom-halves disabled (e.g. from softirq) as well as with interrupts disabled (with netpoll). Because of this, all other functions taking tx_ptr_lock must use spin_lock_irqsave. Fixes: 138badbc21a0 ("net: macb: use NAPI for TX completion path") Reported-by: Mike Galbraith Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20250829143521.1686062-1-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 28 ++++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 16d28a8b3b56..c769b7dbd3ba 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1223,12 +1223,13 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) { struct macb *bp = queue->bp; u16 queue_index = queue - bp->queues; + unsigned long flags; unsigned int tail; unsigned int head; int packets = 0; u32 bytes = 0; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); head = queue->tx_head; for (tail = queue->tx_tail; tail != head && packets < budget; tail++) { struct macb_tx_skb *tx_skb; @@ -1291,7 +1292,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) CIRC_CNT(queue->tx_head, queue->tx_tail, bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp)) netif_wake_subqueue(bp->dev, queue_index); - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return packets; } @@ -1707,8 +1708,9 @@ static void macb_tx_restart(struct macb_queue *queue) { struct macb *bp = queue->bp; unsigned int head_idx, tbqp; + unsigned long flags; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); if (queue->tx_head == queue->tx_tail) goto out_tx_ptr_unlock; @@ -1720,19 +1722,20 @@ static void macb_tx_restart(struct macb_queue *queue) if (tbqp == head_idx) goto out_tx_ptr_unlock; - spin_lock_irq(&bp->lock); + spin_lock(&bp->lock); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - spin_unlock_irq(&bp->lock); + spin_unlock(&bp->lock); out_tx_ptr_unlock: - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); } static bool macb_tx_complete_pending(struct macb_queue *queue) { bool retval = false; + unsigned long flags; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); if (queue->tx_head != queue->tx_tail) { /* Make hw descriptor updates visible to CPU */ rmb(); @@ -1740,7 +1743,7 @@ static bool macb_tx_complete_pending(struct macb_queue *queue) if (macb_tx_desc(queue, queue->tx_tail)->ctrl & MACB_BIT(TX_USED)) retval = true; } - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return retval; } @@ -2308,6 +2311,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) struct macb_queue *queue = &bp->queues[queue_index]; unsigned int desc_cnt, nr_frags, frag_size, f; unsigned int hdrlen; + unsigned long flags; bool is_lso; netdev_tx_t ret = NETDEV_TX_OK; @@ -2368,7 +2372,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) desc_cnt += DIV_ROUND_UP(frag_size, bp->max_tx_length); } - spin_lock_bh(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); /* This is a hard error, log it. */ if (CIRC_SPACE(queue->tx_head, queue->tx_tail, @@ -2392,15 +2396,15 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(bp->dev, queue_index), skb->len); - spin_lock_irq(&bp->lock); + spin_lock(&bp->lock); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - spin_unlock_irq(&bp->lock); + spin_unlock(&bp->lock); if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) netif_stop_subqueue(dev, queue_index); unlock: - spin_unlock_bh(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return ret; } From 030e1c45666629f72d0fc1d040f9d2915680de8e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 29 Aug 2025 20:55:40 +0200 Subject: [PATCH 1036/1307] macsec: read MACSEC_SA_ATTR_PN with nla_get_uint The code currently reads both U32 attributes and U64 attributes as U64, so when a U32 attribute is provided by userspace (ie, when not using XPN), on big endian systems, we'll load that value into the upper 32bits of the next_pn field instead of the lower 32bits. This means that the value that userspace provided is ignored (we only care about the lower 32bits for non-XPN), and we'll start using PNs from 0. Switch to nla_get_uint, which will read the value correctly on all arches, whether it's 32b or 64b. Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://patch.msgid.link/1c1df1661b89238caf5beefb84a10ebfd56c66ea.1756459839.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 4c75d1fea552..01329fe7451a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1844,7 +1844,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); - rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); + rx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } @@ -2086,7 +2086,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) } spin_lock_bh(&tx_sa->lock); - tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); + tx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) @@ -2398,7 +2398,7 @@ static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&tx_sa->lock); prev_pn = tx_sa->next_pn_halves; - tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); + tx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); } @@ -2496,7 +2496,7 @@ static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&rx_sa->lock); prev_pn = rx_sa->next_pn_halves; - rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); + rx_sa->next_pn = nla_get_uint(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } From c873ccbb2f8db46ad9b4a989ea924b6d8f19abf1 Mon Sep 17 00:00:00 2001 From: Gu Bowen Date: Fri, 22 Aug 2025 15:35:41 +0800 Subject: [PATCH 1037/1307] mm: fix possible deadlock in kmemleak There are some AA deadlock issues in kmemleak, similar to the situation reported by Breno [1]. The deadlock path is as follows: mem_pool_alloc() -> raw_spin_lock_irqsave(&kmemleak_lock, flags); -> pr_warn() -> netconsole subsystem -> netpoll -> __alloc_skb -> __create_object -> raw_spin_lock_irqsave(&kmemleak_lock, flags); To solve this problem, switch to printk_safe mode before printing warning message, this will redirect all printk()-s to a special per-CPU buffer, which will be flushed later from a safe context (irq work), and this deadlock problem can be avoided. The proper API to use should be printk_deferred_enter()/printk_deferred_exit() [2]. Another way is to place the warn print after kmemleak is released. Link: https://lkml.kernel.org/r/20250822073541.1886469-1-gubowen5@huawei.com Link: https://lore.kernel.org/all/20250731-kmemleak_lock-v1-1-728fd470198f@debian.org/#t [1] Link: https://lore.kernel.org/all/5ca375cd-4a20-4807-b897-68b289626550@redhat.com/ [2] Signed-off-by: Gu Bowen Reviewed-by: Waiman Long Reviewed-by: Catalin Marinas Reviewed-by: Breno Leitao Cc: Greg Kroah-Hartman Cc: John Ogness Cc: Lu Jialin Cc: Petr Mladek Cc: Signed-off-by: Andrew Morton --- mm/kmemleak.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 84265983f239..1ac56ceb29b6 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -437,9 +437,15 @@ static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias, else if (untagged_objp == untagged_ptr || alias) return object; else { + /* + * Printk deferring due to the kmemleak_lock held. + * This is done to avoid deadlock. + */ + printk_deferred_enter(); kmemleak_warn("Found object by alias at 0x%08lx\n", ptr); dump_object_info(object); + printk_deferred_exit(); break; } } @@ -736,6 +742,11 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr, else if (untagged_objp + parent->size <= untagged_ptr) link = &parent->rb_node.rb_right; else { + /* + * Printk deferring due to the kmemleak_lock held. + * This is done to avoid deadlock. + */ + printk_deferred_enter(); kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n", ptr); /* @@ -743,6 +754,7 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr, * be freed while the kmemleak_lock is held. */ dump_object_info(parent); + printk_deferred_exit(); return -EEXIST; } } @@ -856,13 +868,8 @@ static void delete_object_part(unsigned long ptr, size_t size, raw_spin_lock_irqsave(&kmemleak_lock, flags); object = __find_and_remove_object(ptr, 1, objflags); - if (!object) { -#ifdef DEBUG - kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n", - ptr, size); -#endif + if (!object) goto unlock; - } /* * Create one or two objects that may result from the memory block @@ -882,8 +889,14 @@ static void delete_object_part(unsigned long ptr, size_t size, unlock: raw_spin_unlock_irqrestore(&kmemleak_lock, flags); - if (object) + if (object) { __delete_object(object); + } else { +#ifdef DEBUG + kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n", + ptr, size); +#endif + } out: if (object_l) From dcc6785caffad27f2ea601fdd2f9782036e1faed Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Tue, 26 Aug 2025 10:08:36 +0200 Subject: [PATCH 1038/1307] arm64: dts: rockchip: fix second M.2 slot on ROCK 5T The Radxa ROCK 5T has two M.2 slots, much like the Radxa Rock 5B+. As it stands, the board won't be able to use PCIe3 if the second M.2 slot is in use. Fix this by adding the necessary node enablement and data-lanes property to the ROCK 5T device tree, mirroring what's in the ROCK 5B+ device tree. Reported-by: FUKAUMI Naoki Closes: https://libera.catirclogs.org/linux-rockchip/2025-08-25#38610630; Fixes: 0ea651de9b79 ("arm64: dts: rockchip: add ROCK 5T device tree") Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20250826-rock5t-second-m2-fix-v1-1-8252124f9cc8@collabora.com Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3588-rock-5t.dts | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts index 6acc7a8a5a12..f16ff0064309 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5t.dts @@ -68,6 +68,22 @@ &pcie2x1l1 { status = "okay"; }; +&pcie30phy { + data-lanes = <1 1 2 2>; +}; + +&pcie3x2 { + pinctrl-names = "default"; + pinctrl-0 = <&pcie3x2_rst>; + reset-gpios = <&gpio4 RK_PB0 GPIO_ACTIVE_HIGH>; + vpcie3v3-supply = <&vcc3v3_pcie30>; + status = "okay"; +}; + +&pcie3x4 { + num-lanes = <2>; +}; + &pinctrl { hdmirx { hdmirx_hpd: hdmirx-5v-detection { @@ -90,6 +106,12 @@ pcie2_0_vcc3v3_en: pcie2-0-vcc-en { }; }; + pcie3 { + pcie3x2_rst: pcie3x2-rst { + rockchip,pins = <4 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; + sound { hp_detect: hp-detect { rockchip,pins = <4 RK_PC3 RK_FUNC_GPIO &pcfg_pull_none>; From e63419dbf2ceb083c1651852209c7f048089ac0f Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Sat, 30 Aug 2025 11:49:53 +0200 Subject: [PATCH 1039/1307] dmaengine: ti: edma: Fix memory allocation size for queue_priority_map Fix a critical memory allocation bug in edma_setup_from_hw() where queue_priority_map was allocated with insufficient memory. The code declared queue_priority_map as s8 (*)[2] (pointer to array of 2 s8), but allocated memory using sizeof(s8) instead of the correct size. This caused out-of-bounds memory writes when accessing: queue_priority_map[i][0] = i; queue_priority_map[i][1] = i; The bug manifested as kernel crashes with "Oops - undefined instruction" on ARM platforms (BeagleBoard-X15) during EDMA driver probe, as the memory corruption triggered kernel hardening features on Clang. Change the allocation to use sizeof(*queue_priority_map) which automatically gets the correct size for the 2D array structure. Fixes: 2b6b3b742019 ("ARM/dmaengine: edma: Merge the two drivers under drivers/dma/") Signed-off-by: Anders Roxell Link: https://lore.kernel.org/r/20250830094953.3038012-1-anders.roxell@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/ti/edma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 3ed406f08c44..552be71db6c4 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2064,8 +2064,8 @@ static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata, * priority. So Q0 is the highest priority queue and the last queue has * the lowest priority. */ - queue_priority_map = devm_kcalloc(dev, ecc->num_tc + 1, sizeof(s8), - GFP_KERNEL); + queue_priority_map = devm_kcalloc(dev, ecc->num_tc + 1, + sizeof(*queue_priority_map), GFP_KERNEL); if (!queue_priority_map) return -ENOMEM; From 7e2368a21741e2db542330b32aa6fdd8908e7cff Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 28 Aug 2025 16:17:33 +0800 Subject: [PATCH 1040/1307] dma-debug: don't enforce dma mapping check on noncoherent allocations As discussed in [1], there is no need to enforce dma mapping check on noncoherent allocations, a simple test on the returned CPU address is good enough. Add a new pair of debug helpers and use them for noncoherent alloc/free to fix this issue. Fixes: efa70f2fdc84 ("dma-mapping: add a new dma_alloc_pages API") Link: https://lore.kernel.org/all/ff6c1fe6-820f-4e58-8395-df06aa91706c@oss.qualcomm.com # 1 Signed-off-by: Baochen Qiang Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250828-dma-debug-fix-noncoherent-dma-check-v1-1-76e9be0dd7fc@oss.qualcomm.com --- kernel/dma/debug.c | 48 +++++++++++++++++++++++++++++++++++++++++++- kernel/dma/debug.h | 20 ++++++++++++++++++ kernel/dma/mapping.c | 4 ++-- 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index e43c6de2bce4..b82399437db0 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -39,6 +39,7 @@ enum { dma_debug_sg, dma_debug_coherent, dma_debug_resource, + dma_debug_noncoherent, }; enum map_err_types { @@ -141,6 +142,7 @@ static const char *type2name[] = { [dma_debug_sg] = "scatter-gather", [dma_debug_coherent] = "coherent", [dma_debug_resource] = "resource", + [dma_debug_noncoherent] = "noncoherent", }; static const char *dir2name[] = { @@ -993,7 +995,8 @@ static void check_unmap(struct dma_debug_entry *ref) "[mapped as %s] [unmapped as %s]\n", ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); - } else if (entry->type == dma_debug_coherent && + } else if ((entry->type == dma_debug_coherent || + entry->type == dma_debug_noncoherent) && ref->paddr != entry->paddr) { err_printk(ref->dev, entry, "device driver frees " "DMA memory with different CPU address " @@ -1581,6 +1584,49 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } } +void debug_dma_alloc_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr, + unsigned long attrs) +{ + struct dma_debug_entry *entry; + + if (unlikely(dma_debug_disabled())) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_noncoherent; + entry->dev = dev; + entry->paddr = page_to_phys(page); + entry->size = size; + entry->dev_addr = dma_addr; + entry->direction = direction; + + add_dma_entry(entry, attrs); +} + +void debug_dma_free_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr) +{ + struct dma_debug_entry ref = { + .type = dma_debug_noncoherent, + .dev = dev, + .paddr = page_to_phys(page), + .dev_addr = dma_addr, + .size = size, + .direction = direction, + }; + + if (unlikely(dma_debug_disabled())) + return; + + check_unmap(&ref); +} + static int __init dma_debug_driver_setup(char *str) { int i; diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h index f525197d3cae..48757ca13f31 100644 --- a/kernel/dma/debug.h +++ b/kernel/dma/debug.h @@ -54,6 +54,13 @@ extern void debug_dma_sync_sg_for_cpu(struct device *dev, extern void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction); +extern void debug_dma_alloc_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr, + unsigned long attrs); +extern void debug_dma_free_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr); #else /* CONFIG_DMA_API_DEBUG */ static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, @@ -126,5 +133,18 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev, int nelems, int direction) { } + +static inline void debug_dma_alloc_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr, + unsigned long attrs) +{ +} + +static inline void debug_dma_free_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr) +{ +} #endif /* CONFIG_DMA_API_DEBUG */ #endif /* _KERNEL_DMA_DEBUG_H */ diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 107e4a4d251d..56de28a3b179 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -712,7 +712,7 @@ struct page *dma_alloc_pages(struct device *dev, size_t size, if (page) { trace_dma_alloc_pages(dev, page_to_virt(page), *dma_handle, size, dir, gfp, 0); - debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); + debug_dma_alloc_pages(dev, page, size, dir, *dma_handle, 0); } else { trace_dma_alloc_pages(dev, NULL, 0, size, dir, gfp, 0); } @@ -738,7 +738,7 @@ void dma_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir) { trace_dma_free_pages(dev, page_to_virt(page), dma_handle, size, dir, 0); - debug_dma_unmap_page(dev, dma_handle, size, dir); + debug_dma_free_pages(dev, page, size, dir, dma_handle); __dma_free_pages(dev, size, page, dma_handle, dir); } EXPORT_SYMBOL_GPL(dma_free_pages); From e51bd0e595476c1527bb0b4def095a6fd16b2563 Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Wed, 13 Aug 2025 11:16:47 +0800 Subject: [PATCH 1041/1307] selftests/fs/mount-notify: Fix compilation failure. Commit c6d9775c2066 ("selftests/fs/mount-notify: build with tools include dir") introduces the struct __kernel_fsid_t to decouple dependency with headers_install. The commit forgets to define a macro for __kernel_fsid_t and it will cause type re-definition issue. Signed-off-by: Xing Guo Link: https://lore.kernel.org/20250813031647.96411-1-higuoxing@gmail.com Acked-by: Amir Goldstein Closes: https://lore.kernel.org/oe-lkp/202508110628.65069d92-lkp@intel.com Signed-off-by: Christian Brauner --- .../mount-notify/mount-notify_test.c | 17 ++++++++--------- .../mount-notify/mount-notify_test_ns.c | 18 ++++++++---------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 63ce708d93ed..e4b7c2b457ee 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -2,6 +2,13 @@ // Copyright (c) 2025 Miklos Szeredi #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include #include #include @@ -10,20 +17,12 @@ #include #include #include +#include #include "../../kselftest_harness.h" #include "../statmount/statmount.h" #include "../utils.h" -// Needed for linux/fanotify.h -#ifndef __kernel_fsid_t -typedef struct { - int val[2]; -} __kernel_fsid_t; -#endif - -#include - static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; static const int mark_cmds[] = { diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c index 090a5ca65004..9f57ca46e3af 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c @@ -2,6 +2,13 @@ // Copyright (c) 2025 Miklos Szeredi #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include #include #include @@ -10,21 +17,12 @@ #include #include #include +#include #include "../../kselftest_harness.h" -#include "../../pidfd/pidfd.h" #include "../statmount/statmount.h" #include "../utils.h" -// Needed for linux/fanotify.h -#ifndef __kernel_fsid_t -typedef struct { - int val[2]; -} __kernel_fsid_t; -#endif - -#include - static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; static const int mark_types[] = { From ba1e9421cf1a8369d25c3832439702a015d6b5f9 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 28 Aug 2025 20:41:17 +0800 Subject: [PATCH 1042/1307] net/smc: fix one NULL pointer dereference in smc_ib_is_sg_need_sync() BUG: kernel NULL pointer dereference, address: 00000000000002ec PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI CPU: 28 UID: 0 PID: 343 Comm: kworker/28:1 Kdump: loaded Tainted: G OE 6.17.0-rc2+ #9 NONE Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 Workqueue: smc_hs_wq smc_listen_work [smc] RIP: 0010:smc_ib_is_sg_need_sync+0x9e/0xd0 [smc] ... Call Trace: smcr_buf_map_link+0x211/0x2a0 [smc] __smc_buf_create+0x522/0x970 [smc] smc_buf_create+0x3a/0x110 [smc] smc_find_rdma_v2_device_serv+0x18f/0x240 [smc] ? smc_vlan_by_tcpsk+0x7e/0xe0 [smc] smc_listen_find_device+0x1dd/0x2b0 [smc] smc_listen_work+0x30f/0x580 [smc] process_one_work+0x18c/0x340 worker_thread+0x242/0x360 kthread+0xe7/0x220 ret_from_fork+0x13a/0x160 ret_from_fork_asm+0x1a/0x30 If the software RoCE device is used, ibdev->dma_device is a null pointer. As a result, the problem occurs. Null pointer detection is added to prevent problems. Fixes: 0ef69e788411c ("net/smc: optimize for smc_sndbuf_sync_sg_for_device and smc_rmb_sync_sg_for_cpu") Signed-off-by: Liu Jian Reviewed-by: Guangguan Wang Reviewed-by: Zhu Yanjun Reviewed-by: D. Wythe Link: https://patch.msgid.link/20250828124117.2622624-1-liujian56@huawei.com Signed-off-by: Paolo Abeni --- net/smc/smc_ib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 53828833a3f7..a42ef3f77b96 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -742,6 +742,9 @@ bool smc_ib_is_sg_need_sync(struct smc_link *lnk, unsigned int i; bool ret = false; + if (!lnk->smcibdev->ibdev->dma_device) + return ret; + /* for now there is just one DMA address */ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, buf_slot->sgt[lnk->link_idx].nents, i) { From f1b55db08d527240fbc3b8c84229134a98d8d080 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 29 Aug 2025 21:57:42 +0200 Subject: [PATCH 1043/1307] rust: device: fix unresolved link to drm::Device drm::Device is only available when CONFIG_DRM=y, which we have to consider for intra-doc links, otherwise the rustdoc make target produces the following warning. >> warning: unresolved link to `kernel::drm::Device` --> rust/kernel/device.rs:154:22 | 154 | /// [`drm::Device`]: kernel::drm::Device | ^^^^^^^^^^^^^^^^^^^ no item named `drm` in module `kernel` | = note: `#[warn(rustdoc::broken_intra_doc_links)]` on by default Fix this by making the intra-doc link conditional on CONFIG_DRM being enabled. Fixes: d6e26c1ae4a6 ("device: rust: expand documentation for Device") Suggested-by: Alice Ryhl Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508261644.9LclwUgt-lkp@intel.com/ Reviewed-by: Alice Ryhl Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250829195745.31174-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/device.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 5902b3714a16..a1db49eb159a 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -138,7 +138,9 @@ /// } /// ``` /// -/// An example for a class device implementation is [`drm::Device`]. +/// An example for a class device implementation is +#[cfg_attr(CONFIG_DRM = "y", doc = "[`drm::Device`](kernel::drm::Device).")] +#[cfg_attr(not(CONFIG_DRM = "y"), doc = "`drm::Device`.")] /// /// # Invariants /// @@ -151,7 +153,6 @@ /// dropped from any thread. /// /// [`AlwaysRefCounted`]: kernel::types::AlwaysRefCounted -/// [`drm::Device`]: kernel::drm::Device /// [`impl_device_context_deref`]: kernel::impl_device_context_deref /// [`pci::Device`]: kernel::pci::Device /// [`platform::Device`]: kernel::platform::Device From ef9f21c3f370bcd45688a3a573b788e39b364e80 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 1 Sep 2025 14:55:13 +0200 Subject: [PATCH 1044/1307] gpio: fix GPIO submenu in Kconfig Commit a86240a37d43 ("gpiolib: enable CONFIG_GPIOLIB_LEGACY even for !GPIOLIB") accidentally pulled all items from within the GPIOLIB submenu into the main driver menu. Put them back under the top-level GPIO entry. Suggested-by: Rob Herring Fixes: a86240a37d43 ("gpiolib: enable CONFIG_GPIOLIB_LEGACY even for !GPIOLIB") Reported-by: Rob Herring Closes: https://lore.kernel.org/all/20250813222649.GA965895-robh@kernel.org/ Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250901125513.108691-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index e43abb322fa6..d8ac40d0eb6f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -3,6 +3,9 @@ # GPIO infrastructure and drivers # +config GPIOLIB_LEGACY + def_bool y + menuconfig GPIOLIB bool "GPIO Support" help @@ -12,9 +15,6 @@ menuconfig GPIOLIB If unsure, say N. -config GPIOLIB_LEGACY - def_bool y - if GPIOLIB config GPIOLIB_FASTPATH_LIMIT From aa2e1e4563d3ab689ffa86ca1412ecbf9fd3b308 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 2 Sep 2025 17:03:58 +0800 Subject: [PATCH 1045/1307] dmaengine: dw: dmamux: Fix device reference leak in rzn1_dmamux_route_allocate The reference taken by of_find_device_by_node() must be released when not needed anymore. Add missing put_device() call to fix device reference leaks. Fixes: 134d9c52fca2 ("dmaengine: dw: dmamux: Introduce RZN1 DMA router support") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20250902090358.2423285-1-linmq006@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/dw/rzn1-dmamux.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/dma/dw/rzn1-dmamux.c b/drivers/dma/dw/rzn1-dmamux.c index 4fb8508419db..deadf135681b 100644 --- a/drivers/dma/dw/rzn1-dmamux.c +++ b/drivers/dma/dw/rzn1-dmamux.c @@ -48,12 +48,16 @@ static void *rzn1_dmamux_route_allocate(struct of_phandle_args *dma_spec, u32 mask; int ret; - if (dma_spec->args_count != RNZ1_DMAMUX_NCELLS) - return ERR_PTR(-EINVAL); + if (dma_spec->args_count != RNZ1_DMAMUX_NCELLS) { + ret = -EINVAL; + goto put_device; + } map = kzalloc(sizeof(*map), GFP_KERNEL); - if (!map) - return ERR_PTR(-ENOMEM); + if (!map) { + ret = -ENOMEM; + goto put_device; + } chan = dma_spec->args[0]; map->req_idx = dma_spec->args[4]; @@ -94,12 +98,15 @@ static void *rzn1_dmamux_route_allocate(struct of_phandle_args *dma_spec, if (ret) goto clear_bitmap; + put_device(&pdev->dev); return map; clear_bitmap: clear_bit(map->req_idx, dmamux->used_chans); free_map: kfree(map); +put_device: + put_device(&pdev->dev); return ERR_PTR(ret); } From 232674e1a6dd2f7a68b0d496a7ed1a57d79533da Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Mon, 1 Sep 2025 14:40:32 +0200 Subject: [PATCH 1046/1307] drm/sched: Fix racy access to drm_sched_entity.dependency The drm_sched_job_unschedulable trace point can access entity->dependency after it was cleared by the callback installed in drm_sched_entity_add_dependency_cb, causing: BUG: kernel NULL pointer dereference, address: 0000000000000020 [...] Workqueue: comp_1.1.0 drm_sched_run_job_work [gpu_sched] RIP: 0010:trace_event_raw_event_drm_sched_job_unschedulable+0x70/0xd0 [gpu_sched] To fix this we either need to keep a reference to the fence before setting up the callbacks, or move the trace_drm_sched_job_unschedulable calls into drm_sched_entity_add_dependency_cb where they can be done earlier. Fixes: 76d97c870f29 ("drm/sched: Trace dependencies for GPU jobs") Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250901124032.1955-1-pierre-eric.pelloux-prayer@amd.com (cherry picked from commit b2b8af21fec35be417a3199b5a6c354605dd222a) Signed-off-by: Maxime Ripard --- drivers/gpu/drm/scheduler/sched_entity.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 8867b95ab089..3d06f72531ba 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -391,7 +391,8 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority); * Add a callback to the current dependency of the entity to wake up the * scheduler when the entity becomes available. */ -static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) +static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity, + struct drm_sched_job *sched_job) { struct drm_gpu_scheduler *sched = entity->rq->sched; struct dma_fence *fence = entity->dependency; @@ -421,6 +422,10 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) entity->dependency = fence; } + if (trace_drm_sched_job_unschedulable_enabled() && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &entity->dependency->flags)) + trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (!dma_fence_add_callback(entity->dependency, &entity->cb, drm_sched_entity_wakeup)) return true; @@ -461,10 +466,8 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) while ((entity->dependency = drm_sched_job_dependency(sched_job, entity))) { - if (drm_sched_entity_add_dependency_cb(entity)) { - trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (drm_sched_entity_add_dependency_cb(entity, sched_job)) return NULL; - } } /* skip jobs from entity that marked guilty */ From 773b27a8a2f00ce3134e92e50ea4794a98ba2b76 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 29 Aug 2025 15:28:26 +0800 Subject: [PATCH 1047/1307] net: mctp: mctp_fraq_queue should take ownership of passed skb As of commit f5d83cf0eeb9 ("net: mctp: unshare packets when reassembling"), we skb_unshare() in mctp_frag_queue(). The unshare may invalidate the original skb pointer, so we need to treat the skb as entirely owned by the fraq queue, even on failure. Fixes: f5d83cf0eeb9 ("net: mctp: unshare packets when reassembling") Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20250829-mctp-skb-unshare-v1-1-1c28fe10235a@codeconstruct.com.au Signed-off-by: Paolo Abeni --- net/mctp/route.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 2b2b958ef6a3..4d314e062ba9 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -378,6 +378,7 @@ static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {} static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {} #endif +/* takes ownership of skb, both in success and failure cases */ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) { struct mctp_hdr *hdr = mctp_hdr(skb); @@ -387,8 +388,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) & MCTP_HDR_SEQ_MASK; if (!key->reasm_head) { - /* Since we're manipulating the shared frag_list, ensure it isn't - * shared with any other SKBs. + /* Since we're manipulating the shared frag_list, ensure it + * isn't shared with any other SKBs. In the cloned case, + * this will free the skb; callers can no longer access it + * safely. */ key->reasm_head = skb_unshare(skb, GFP_ATOMIC); if (!key->reasm_head) @@ -402,10 +405,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; if (this_seq != exp_seq) - return -EINVAL; + goto err_free; if (key->reasm_head->len + skb->len > mctp_message_maxlen) - return -EINVAL; + goto err_free; skb->next = NULL; skb->sk = NULL; @@ -419,6 +422,10 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) key->reasm_head->truesize += skb->truesize; return 0; + +err_free: + kfree_skb(skb); + return -EINVAL; } static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) @@ -532,18 +539,16 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) * key isn't observable yet */ mctp_frag_queue(key, skb); + skb = NULL; /* if the key_add fails, we've raced with another * SOM packet with the same src, dest and tag. There's * no way to distinguish future packets, so all we - * can do is drop; we'll free the skb on exit from - * this function. + * can do is drop. */ rc = mctp_key_add(key, msk); - if (!rc) { + if (!rc) trace_mctp_key_acquire(key); - skb = NULL; - } /* we don't need to release key->lock on exit, so * clean up here and suppress the unlock via @@ -561,8 +566,7 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) key = NULL; } else { rc = mctp_frag_queue(key, skb); - if (!rc) - skb = NULL; + skb = NULL; } } @@ -572,17 +576,16 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) */ /* we need to be continuing an existing reassembly... */ - if (!key->reasm_head) + if (!key->reasm_head) { rc = -EINVAL; - else + } else { rc = mctp_frag_queue(key, skb); + skb = NULL; + } if (rc) goto out_unlock; - /* we've queued; the queue owns the skb now */ - skb = NULL; - /* end of message? deliver to socket, and we're done with * the reassembly/response key */ From e27e34bc99413a29cafae02ad572ea3c9beba2ce Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 29 Aug 2025 15:40:23 +0800 Subject: [PATCH 1048/1307] net: mctp: usb: initialise mac header in RX path We're not currently setting skb->mac_header on ingress, and the netdev core rx path expects it. Without it, we'll hit a warning on DEBUG_NETDEV from commit 1e4033b53db4 ("net: skb_reset_mac_len() must check if mac_header was set") Initialise the mac_header to refer to the USB transport header. Fixes: 0791c0327a6e ("net: mctp: Add MCTP USB transport driver") Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20250829-mctp-usb-mac-header-v1-1-338ad725e183@codeconstruct.com.au Signed-off-by: Paolo Abeni --- drivers/net/mctp/mctp-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index 775a386d0aca..36ccc53b1797 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -183,6 +183,7 @@ static void mctp_usb_in_complete(struct urb *urb) struct mctp_usb_hdr *hdr; u8 pkt_len; /* length of MCTP packet, no USB header */ + skb_reset_mac_header(skb); hdr = skb_pull_data(skb, sizeof(*hdr)); if (!hdr) break; From a6099f263e1f408bcc7913c9df24b0677164fc5d Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 29 Aug 2025 17:40:51 +0530 Subject: [PATCH 1049/1307] net: ethernet: ti: am65-cpsw-nuss: Fix null pointer dereference for ndev In the TX completion packet stage of TI SoCs with CPSW2G instance, which has single external ethernet port, ndev is accessed without being initialized if no TX packets have been processed. It results into null pointer dereference, causing kernel to crash. Fix this by having a check on the number of TX packets which have been processed. Fixes: 9a369ae3d143 ("net: ethernet: ti: am65-cpsw: remove am65_cpsw_nuss_tx_compl_packets_2g()") Signed-off-by: Nishanth Menon Signed-off-by: Chintan Vankar Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250829121051.2031832-1-c-vankar@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index ecd6ecac87bb..8b2364f5f731 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1522,7 +1522,7 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, } } - if (single_port) { + if (single_port && num_tx) { netif_txq = netdev_get_tx_queue(ndev, chn); netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); From 379b3c983fc0257c183052278832ac68e3ccd33b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 28 Aug 2025 15:48:37 +0200 Subject: [PATCH 1050/1307] drm/xe: Fix incorrect migration of backed-up object to VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an object is backed up to shmem it is incorrectly identified as not having valid data by the move code. This means moving to VRAM skips the -EMULTIHOP step and the bo is cleared. This causes all sorts of weird behaviour on DGFX if an already evicted object is targeted by the shrinker. Fix this by using ttm_tt_is_swapped() to identify backed-up objects. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5996 Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos") Cc: Matthew Brost Cc: Matthew Auld Cc: # v6.15+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250828134837.5709-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 1047bd82794a1eab64d643f196d09171ce983f44) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1be2415966df..9954bb458ce1 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -819,8 +819,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, return ret; } - tt_has_data = ttm && (ttm_tt_is_populated(ttm) || - (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); + tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm)); move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : (!mem_type_is_vram(old_mem_type) && !tt_has_data)); From 304f455b648d68ba47135583b644ee72ac48d544 Mon Sep 17 00:00:00 2001 From: James Jones Date: Tue, 26 Aug 2025 12:57:16 -0700 Subject: [PATCH 1051/1307] MAINTAINERS: Update git entry for nouveau The gitlab repository previously associated with the nouveau module has fallen out of use. The drm-misc tree here: https://gitlab.freedesktop.org/drm/misc/kernel.git Is now where most nouveau-related patches are applied. This change updates the MAINTAINERS file to reflect this. Signed-off-by: James Jones Link: https://lore.kernel.org/r/20250826195716.1897-1-jajones@nvidia.com Signed-off-by: Danilo Krummrich --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3f1c4af3f8e0..f94e115a8d32 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7820,7 +7820,7 @@ Q: https://patchwork.freedesktop.org/project/nouveau/ Q: https://gitlab.freedesktop.org/drm/nouveau/-/merge_requests B: https://gitlab.freedesktop.org/drm/nouveau/-/issues C: irc://irc.oftc.net/nouveau -T: git https://gitlab.freedesktop.org/drm/nouveau.git +T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/nouveau/ F: include/uapi/drm/nouveau_drm.h From 81ac63321eb936b1a1f7045b37674661f8ffb4a5 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Tue, 5 Aug 2025 10:36:29 +0800 Subject: [PATCH 1052/1307] trace: Remove redundant __GFP_NOWARN Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made GFP_NOWAIT implicitly include __GFP_NOWARN. Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g., `GFP_NOWAIT | __GFP_NOWARN`) is now redundant. Let's clean up these redundant flags across subsystems. No functional changes. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250805023630.335719-1-rongqianfeng@vivo.com Signed-off-by: Qianfeng Rong Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index af42aaa3d172..2ab283fd3032 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -496,7 +496,7 @@ static bool user_event_enabler_queue_fault(struct user_event_mm *mm, { struct user_event_enabler_fault *fault; - fault = kmem_cache_zalloc(fault_cache, GFP_NOWAIT | __GFP_NOWARN); + fault = kmem_cache_zalloc(fault_cache, GFP_NOWAIT); if (!fault) return false; From 3d62ab32df065e4a7797204a918f6489ddb8a237 Mon Sep 17 00:00:00 2001 From: Luo Gengkun Date: Tue, 19 Aug 2025 10:51:52 +0000 Subject: [PATCH 1053/1307] tracing: Fix tracing_marker may trigger page fault during preempt_disable Both tracing_mark_write and tracing_mark_raw_write call __copy_from_user_inatomic during preempt_disable. But in some case, __copy_from_user_inatomic may trigger page fault, and will call schedule() subtly. And if a task is migrated to other cpu, the following warning will be trigger: if (RB_WARN_ON(cpu_buffer, !local_read(&cpu_buffer->committing))) An example can illustrate this issue: process flow CPU --------------------------------------------------------------------- tracing_mark_raw_write(): cpu:0 ... ring_buffer_lock_reserve(): cpu:0 ... cpu = raw_smp_processor_id() cpu:0 cpu_buffer = buffer->buffers[cpu] cpu:0 ... ... __copy_from_user_inatomic(): cpu:0 ... # page fault do_mem_abort(): cpu:0 ... # Call schedule schedule() cpu:0 ... # the task schedule to cpu1 __buffer_unlock_commit(): cpu:1 ... ring_buffer_unlock_commit(): cpu:1 ... cpu = raw_smp_processor_id() cpu:1 cpu_buffer = buffer->buffers[cpu] cpu:1 As shown above, the process will acquire cpuid twice and the return values are not the same. To fix this problem using copy_from_user_nofault instead of __copy_from_user_inatomic, as the former performs 'access_ok' before copying. Link: https://lore.kernel.org/20250819105152.2766363-1-luogengkun@huaweicloud.com Fixes: 656c7f0d2d2b ("tracing: Replace kmap with copy_from_user() in trace_marker writing") Signed-off-by: Luo Gengkun Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1b7db732c0b1..2f1ae6c0ee81 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7209,7 +7209,7 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user entry = ring_buffer_event_data(event); entry->ip = ip; - len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); + len = copy_from_user_nofault(&entry->buf, ubuf, cnt); if (len) { memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); cnt = FAULTED_SIZE; @@ -7306,7 +7306,7 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, entry = ring_buffer_event_data(event); - len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); + len = copy_from_user_nofault(&entry->id, ubuf, cnt); if (len) { entry->id = -1; memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); From 664596bd98bb251dd417dfd3f9b615b661e1e44a Mon Sep 17 00:00:00 2001 From: Chiasheng Lee Date: Mon, 1 Sep 2025 20:59:43 +0800 Subject: [PATCH 1054/1307] i2c: i801: Hide Intel Birch Stream SoC TCO WDT Hide the Intel Birch Stream SoC TCO WDT feature since it was removed. On platforms with PCH TCO WDT, this redundant device might be rendering errors like this: [ 28.144542] sysfs: cannot create duplicate filename '/bus/platform/devices/iTCO_wdt' Fixes: 8c56f9ef25a3 ("i2c: i801: Add support for Intel Birch Stream SoC") Link: https://bugzilla.kernel.org/show_bug.cgi?id=220320 Signed-off-by: Chiasheng Lee Cc: # v6.7+ Reviewed-by: Mika Westerberg Reviewed-by: Jarkko Nikula Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250901125943.916522-1-chiasheng.lee@linux.intel.com --- drivers/i2c/busses/i2c-i801.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index a7f89946dad4..e94ac746a741 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1052,7 +1052,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_P_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_SOC_S_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_PCH_S_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, - { PCI_DEVICE_DATA(INTEL, BIRCH_STREAM_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, + { PCI_DEVICE_DATA(INTEL, BIRCH_STREAM_SMBUS, FEATURES_ICH5) }, { PCI_DEVICE_DATA(INTEL, ARROW_LAKE_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, PANTHER_LAKE_H_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, PANTHER_LAKE_P_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, From bdd5a14e660062114bdebaef9ad52adf04970a89 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 21 Aug 2025 14:23:41 +0200 Subject: [PATCH 1055/1307] drm/bridge: ti-sn65dsi86: fix REFCLK setting The bridge has three bootstrap pins which are sampled to determine the frequency of the external reference clock. The driver will also (over)write that setting. But it seems this is racy after the bridge is enabled. It was observed that although the driver write the correct value (by sniffing on the I2C bus), the register has the wrong value. The datasheet states that the GPIO lines have to be stable for at least 5us after asserting the EN signal. Thus, there seems to be some logic which samples the GPIO lines and this logic appears to overwrite the register value which was set by the driver. Waiting 20us after asserting the EN line resolves this issue. Fixes: a095f15c00e2 ("drm/bridge: add support for sn65dsi86 bridge driver") Signed-off-by: Michael Walle Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250821122341.1257286-1-mwalle@kernel.org --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 464390372b34..ae0d08e5e960 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -392,6 +392,17 @@ static int __maybe_unused ti_sn65dsi86_resume(struct device *dev) gpiod_set_value_cansleep(pdata->enable_gpio, 1); + /* + * After EN is deasserted and an external clock is detected, the bridge + * will sample GPIO3:1 to determine its frequency. The driver will + * overwrite this setting in ti_sn_bridge_set_refclk_freq(). But this is + * racy. Thus we have to wait a couple of us. According to the datasheet + * the GPIO lines has to be stable at least 5 us (td5) but it seems that + * is not enough and the refclk frequency value is still lost or + * overwritten by the bridge itself. Waiting for 20us seems to work. + */ + usleep_range(20, 30); + /* * If we have a reference clock we can enable communication w/ the * panel (including the aux channel) w/out any need for an input clock From 403bf043d9340196e06769065169df7444b91f7a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Aug 2025 10:35:26 -0700 Subject: [PATCH 1056/1307] ice: fix NULL access of tx->in_use in ice_ptp_ts_irq The E810 device has support for a "low latency" firmware interface to access and read the Tx timestamps. This interface does not use the standard Tx timestamp logic, due to the latency overhead of proxying sideband command requests over the firmware AdminQ. The logic still makes use of the Tx timestamp tracking structure, ice_ptp_tx, as it uses the same "ready" bitmap to track which Tx timestamps complete. Unfortunately, the ice_ptp_ts_irq() function does not check if the tracker is initialized before its first access. This results in NULL dereference or use-after-free bugs similar to the following: [245977.278756] BUG: kernel NULL pointer dereference, address: 0000000000000000 [245977.278774] RIP: 0010:_find_first_bit+0x19/0x40 [245977.278796] Call Trace: [245977.278809] ? ice_misc_intr+0x364/0x380 [ice] This can occur if a Tx timestamp interrupt races with the driver reset logic. Fix this by only checking the in_use bitmap (and other fields) if the tracker is marked as initialized. The reset flow will clear the init field under lock before it tears the tracker down, thus preventing any use-after-free or NULL access. Fixes: f9472aaabd1f ("ice: Process TSYN IRQ in a separate function") Signed-off-by: Jacob Keller Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e358eb1d719f..fb0f6365a6d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2701,16 +2701,19 @@ irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) */ if (hw->dev_caps.ts_dev_info.ts_ll_int_read) { struct ice_ptp_tx *tx = &pf->ptp.port.tx; - u8 idx; + u8 idx, last; if (!ice_pf_state_is_nominal(pf)) return IRQ_HANDLED; spin_lock(&tx->lock); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); + if (tx->init) { + last = tx->last_ll_ts_idx_read + 1; + idx = find_next_bit_wrap(tx->in_use, tx->len, + last); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } spin_unlock(&tx->lock); return IRQ_HANDLED; From f6486338fde3f04ed0ec59fe67a69a208c32734f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 7 Aug 2025 10:35:27 -0700 Subject: [PATCH 1057/1307] ice: fix NULL access of tx->in_use in ice_ll_ts_intr Recent versions of the E810 firmware have support for an extra interrupt to handle report of the "low latency" Tx timestamps coming from the specialized low latency firmware interface. Instead of polling the registers, software can wait until the low latency interrupt is fired. This logic makes use of the Tx timestamp tracking structure, ice_ptp_tx, as it uses the same "ready" bitmap to track which Tx timestamps complete. Unfortunately, the ice_ll_ts_intr() function does not check if the tracker is initialized before its first access. This results in NULL dereference or use-after-free bugs similar to the issues fixed in the ice_ptp_ts_irq() function. Fix this by only checking the in_use bitmap (and other fields) if the tracker is marked as initialized. The reset flow will clear the init field under lock before it tears the tracker down, thus preventing any use-after-free or NULL access. Fixes: 82e71b226e0e ("ice: Enable SW interrupt from FW for LL TS") Signed-off-by: Jacob Keller Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index cae992d8f03c..77781277aa8e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3176,12 +3176,14 @@ static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data) hw = &pf->hw; tx = &pf->ptp.port.tx; spin_lock_irqsave(&tx->lock, flags); - ice_ptp_complete_tx_single_tstamp(tx); + if (tx->init) { + ice_ptp_complete_tx_single_tstamp(tx); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } spin_unlock_irqrestore(&tx->lock, flags); val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | From 65637c3a181184ae25bd10d37bc83f8bb97708b5 Mon Sep 17 00:00:00 2001 From: Joshua Hay Date: Mon, 11 Aug 2025 17:19:21 -0700 Subject: [PATCH 1058/1307] idpf: fix UAF in RDMA core aux dev deinitialization Free the adev->id before auxiliary_device_uninit. The call to uninit triggers the release callback, which frees the iadev memory containing the adev. The previous flow results in a UAF during rmmod due to the adev->id access. [264939.604077] ================================================================== [264939.604093] BUG: KASAN: slab-use-after-free in idpf_idc_deinit_core_aux_device+0xe4/0x100 [idpf] [264939.604134] Read of size 4 at addr ff1100109eb6eaf8 by task rmmod/17842 ... [264939.604635] Allocated by task 17597: [264939.604643] kasan_save_stack+0x20/0x40 [264939.604654] kasan_save_track+0x14/0x30 [264939.604663] __kasan_kmalloc+0x8f/0xa0 [264939.604672] idpf_idc_init_aux_core_dev+0x4bd/0xb60 [idpf] [264939.604700] idpf_idc_init+0x55/0xd0 [idpf] [264939.604726] process_one_work+0x658/0xfe0 [264939.604742] worker_thread+0x6e1/0xf10 [264939.604750] kthread+0x382/0x740 [264939.604762] ret_from_fork+0x23a/0x310 [264939.604772] ret_from_fork_asm+0x1a/0x30 [264939.604785] Freed by task 17842: [264939.604790] kasan_save_stack+0x20/0x40 [264939.604799] kasan_save_track+0x14/0x30 [264939.604808] kasan_save_free_info+0x3b/0x60 [264939.604820] __kasan_slab_free+0x37/0x50 [264939.604830] kfree+0xf1/0x420 [264939.604840] device_release+0x9c/0x210 [264939.604850] kobject_put+0x17c/0x4b0 [264939.604860] idpf_idc_deinit_core_aux_device+0x4f/0x100 [idpf] [264939.604886] idpf_vc_core_deinit+0xba/0x3a0 [idpf] [264939.604915] idpf_remove+0xb0/0x7c0 [idpf] [264939.604944] pci_device_remove+0xab/0x1e0 [264939.604955] device_release_driver_internal+0x371/0x530 [264939.604969] driver_detach+0xbf/0x180 [264939.604981] bus_remove_driver+0x11b/0x2a0 [264939.604991] pci_unregister_driver+0x2a/0x250 [264939.605005] __do_sys_delete_module.constprop.0+0x2eb/0x540 [264939.605014] do_syscall_64+0x64/0x2c0 [264939.605024] entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: f4312e6bfa2a ("idpf: implement core RDMA auxiliary dev create, init, and destroy") Signed-off-by: Joshua Hay Reviewed-by: Aleksandr Loktionov Reviewed-by: Vadim Fedorenko Tested-by: Samuel Salin Reviewed-by: Jacob Keller Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_idc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index 4d2905103215..7e20a07e98e5 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -247,10 +247,10 @@ static void idpf_unplug_aux_dev(struct auxiliary_device *adev) if (!adev) return; + ida_free(&idpf_idc_ida, adev->id); + auxiliary_device_delete(adev); auxiliary_device_uninit(adev); - - ida_free(&idpf_idc_ida, adev->id); } /** From acf3a5c8be80fe238c1a7629db1c21c74a1f9dd4 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 14 Aug 2025 16:43:00 -0700 Subject: [PATCH 1059/1307] idpf: set mac type when adding and removing MAC filters On control planes that allow changing the MAC address of the interface, the driver must provide a MAC type to avoid errors such as: idpf 0000:0a:00.0: Transaction failed (op 535) idpf 0000:0a:00.0: Received invalid MAC filter payload (op 535) (len 0) idpf 0000:0a:00.0: Transaction failed (op 536) These errors occur during driver load or when changing the MAC via: ip link set address Add logic to set the MAC type when sending ADD/DEL (opcodes 535/536) to the control plane. Since only one primary MAC is supported per vport, the driver only needs to send an ADD opcode when setting it. Remove the old address by calling __idpf_del_mac_filter(), which skips the message and just clears the entry from the internal list. This avoids an error on DEL as it attempts to remove an address already cleared by the preceding ADD opcode. Fixes: ce1b75d0635c ("idpf: add ptypes and MAC filter support") Reported-by: Jian Liu Signed-off-by: Emil Tantilov Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 9 ++++++--- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 12 ++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 2c2a3e85d693..513032cb5f08 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2344,6 +2344,7 @@ static int idpf_set_mac(struct net_device *netdev, void *p) struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_vport_config *vport_config; struct sockaddr *addr = p; + u8 old_mac_addr[ETH_ALEN]; struct idpf_vport *vport; int err = 0; @@ -2367,17 +2368,19 @@ static int idpf_set_mac(struct net_device *netdev, void *p) if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) goto unlock_mutex; + ether_addr_copy(old_mac_addr, vport->default_mac_addr); + ether_addr_copy(vport->default_mac_addr, addr->sa_data); vport_config = vport->adapter->vport_config[vport->idx]; err = idpf_add_mac_filter(vport, np, addr->sa_data, false); if (err) { __idpf_del_mac_filter(vport_config, addr->sa_data); + ether_addr_copy(vport->default_mac_addr, netdev->dev_addr); goto unlock_mutex; } - if (is_valid_ether_addr(vport->default_mac_addr)) - idpf_del_mac_filter(vport, np, vport->default_mac_addr, false); + if (is_valid_ether_addr(old_mac_addr)) + __idpf_del_mac_filter(vport_config, old_mac_addr); - ether_addr_copy(vport->default_mac_addr, addr->sa_data); eth_hw_addr_set(netdev, addr->sa_data); unlock_mutex: diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index a028c69f7fdc..6330d4a0ae07 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3765,6 +3765,16 @@ u32 idpf_get_vport_id(struct idpf_vport *vport) return le32_to_cpu(vport_msg->vport_id); } +static void idpf_set_mac_type(struct idpf_vport *vport, + struct virtchnl2_mac_addr *mac_addr) +{ + bool is_primary; + + is_primary = ether_addr_equal(vport->default_mac_addr, mac_addr->addr); + mac_addr->type = is_primary ? VIRTCHNL2_MAC_ADDR_PRIMARY : + VIRTCHNL2_MAC_ADDR_EXTRA; +} + /** * idpf_mac_filter_async_handler - Async callback for mac filters * @adapter: private data struct @@ -3894,6 +3904,7 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, list) { if (add && f->add) { ether_addr_copy(mac_addr[i].addr, f->macaddr); + idpf_set_mac_type(vport, &mac_addr[i]); i++; f->add = false; if (i == total_filters) @@ -3901,6 +3912,7 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, } if (!add && f->remove) { ether_addr_copy(mac_addr[i].addr, f->macaddr); + idpf_set_mac_type(vport, &mac_addr[i]); i++; f->remove = false; if (i == total_filters) From 9fcdb1c3c4ba134434694c001dbff343f1ffa319 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Jul 2025 17:14:37 -0700 Subject: [PATCH 1060/1307] i40e: remove read access to debugfs files The 'command' and 'netdev_ops' debugfs files are a legacy debugging interface supported by the i40e driver since its early days by commit 02e9c290814c ("i40e: debugfs interface"). Both of these debugfs files provide a read handler which is mostly useless, and which is implemented with questionable logic. They both use a static 256 byte buffer which is initialized to the empty string. In the case of the 'command' file this buffer is literally never used and simply wastes space. In the case of the 'netdev_ops' file, the last command written is saved here. On read, the files contents are presented as the name of the device followed by a colon and then the contents of their respective static buffer. For 'command' this will always be ": ". For 'netdev_ops', this will be ": ". But note the buffer is shared between all devices operated by this module. At best, it is mostly meaningless information, and at worse it could be accessed simultaneously as there doesn't appear to be any locking mechanism. We have also recently received multiple reports for both read functions about their use of snprintf and potential overflow that could result in reading arbitrary kernel memory. For the 'command' file, this is definitely impossible, since the static buffer is always zero and never written to. For the 'netdev_ops' file, it does appear to be possible, if the user carefully crafts the command input, it will be copied into the buffer, which could be large enough to cause snprintf to truncate, which then causes the copy_to_user to read beyond the length of the buffer allocated by kzalloc. A minimal fix would be to replace snprintf() with scnprintf() which would cap the return to the number of bytes written, preventing an overflow. A more involved fix would be to drop the mostly useless static buffers, saving 512 bytes and modifying the read functions to stop needing those as input. Instead, lets just completely drop the read access to these files. These are debug interfaces exposed as part of debugfs, and I don't believe that dropping read access will break any script, as the provided output is pretty useless. You can find the netdev name through other more standard interfaces, and the 'netdev_ops' interface can easily result in garbage if you issue simultaneous writes to multiple devices at once. In order to properly remove the i40e_dbg_netdev_ops_buf, we need to refactor its write function to avoid using the static buffer. Instead, use the same logic as the i40e_dbg_command_write, with an allocated buffer. Update the code to use this instead of the static buffer, and ensure we free the buffer on exit. This fixes simultaneous writes to 'netdev_ops' on multiple devices, and allows us to remove the now unused static buffer along with removing the read access. Fixes: 02e9c290814c ("i40e: debugfs interface") Reported-by: Kunwu Chan Closes: https://lore.kernel.org/intel-wired-lan/20231208031950.47410-1-chentao@kylinos.cn/ Reported-by: Wang Haoran Closes: https://lore.kernel.org/all/CANZ3JQRRiOdtfQJoP9QM=6LS1Jto8PGBGw6y7-TL=BcnzHQn1Q@mail.gmail.com/ Reported-by: Amir Mohammad Jahangirzad Closes: https://lore.kernel.org/all/20250722115017.206969-1-a.jahangirzad@gmail.com/ Signed-off-by: Jacob Keller Reviewed-by: Dawid Osuchowski Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Reviewed-by: Kunwu Chan Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/i40e/i40e_debugfs.c | 123 +++--------------- 1 file changed, 19 insertions(+), 104 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 6cd6f23d42a6..c17b5d290f0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -40,48 +40,6 @@ static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid) * setup, adding or removing filters, or other things. Many of * these will be useful for some forms of unit testing. **************************************************************/ -static char i40e_dbg_command_buf[256] = ""; - -/** - * i40e_dbg_command_read - read for command datum - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - struct i40e_vsi *main_vsi; - int bytes_not_copied; - int buf_size = 256; - char *buf; - int len; - - /* don't allow partial reads */ - if (*ppos != 0) - return 0; - if (count < buf_size) - return -ENOSPC; - - buf = kzalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOSPC; - - main_vsi = i40e_pf_get_main_vsi(pf); - len = snprintf(buf, buf_size, "%s: %s\n", main_vsi->netdev->name, - i40e_dbg_command_buf); - - bytes_not_copied = copy_to_user(buffer, buf, len); - kfree(buf); - - if (bytes_not_copied) - return -EFAULT; - - *ppos = len; - return len; -} static char *i40e_filter_state_string[] = { "INVALID", @@ -1621,7 +1579,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, static const struct file_operations i40e_dbg_command_fops = { .owner = THIS_MODULE, .open = simple_open, - .read = i40e_dbg_command_read, .write = i40e_dbg_command_write, }; @@ -1630,48 +1587,6 @@ static const struct file_operations i40e_dbg_command_fops = { * The netdev_ops entry in debugfs is for giving the driver commands * to be executed from the netdev operations. **************************************************************/ -static char i40e_dbg_netdev_ops_buf[256] = ""; - -/** - * i40e_dbg_netdev_ops_read - read for netdev_ops datum - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - struct i40e_vsi *main_vsi; - int bytes_not_copied; - int buf_size = 256; - char *buf; - int len; - - /* don't allow partal reads */ - if (*ppos != 0) - return 0; - if (count < buf_size) - return -ENOSPC; - - buf = kzalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOSPC; - - main_vsi = i40e_pf_get_main_vsi(pf); - len = snprintf(buf, buf_size, "%s: %s\n", main_vsi->netdev->name, - i40e_dbg_netdev_ops_buf); - - bytes_not_copied = copy_to_user(buffer, buf, len); - kfree(buf); - - if (bytes_not_copied) - return -EFAULT; - - *ppos = len; - return len; -} /** * i40e_dbg_netdev_ops_write - write into netdev_ops datum @@ -1685,35 +1600,36 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, size_t count, loff_t *ppos) { struct i40e_pf *pf = filp->private_data; + char *cmd_buf, *buf_tmp; int bytes_not_copied; struct i40e_vsi *vsi; - char *buf_tmp; int vsi_seid; int i, cnt; /* don't allow partial writes */ if (*ppos != 0) return 0; - if (count >= sizeof(i40e_dbg_netdev_ops_buf)) - return -ENOSPC; - memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf)); - bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf, - buffer, count); - if (bytes_not_copied) + cmd_buf = kzalloc(count + 1, GFP_KERNEL); + if (!cmd_buf) + return count; + bytes_not_copied = copy_from_user(cmd_buf, buffer, count); + if (bytes_not_copied) { + kfree(cmd_buf); return -EFAULT; - i40e_dbg_netdev_ops_buf[count] = '\0'; + } + cmd_buf[count] = '\0'; - buf_tmp = strchr(i40e_dbg_netdev_ops_buf, '\n'); + buf_tmp = strchr(cmd_buf, '\n'); if (buf_tmp) { *buf_tmp = '\0'; - count = buf_tmp - i40e_dbg_netdev_ops_buf + 1; + count = buf_tmp - cmd_buf + 1; } - if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) { + if (strncmp(cmd_buf, "change_mtu", 10) == 0) { int mtu; - cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i %i", + cnt = sscanf(&cmd_buf[11], "%i %i", &vsi_seid, &mtu); if (cnt != 2) { dev_info(&pf->pdev->dev, "change_mtu \n"); @@ -1735,8 +1651,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n"); } - } else if (strncmp(i40e_dbg_netdev_ops_buf, "set_rx_mode", 11) == 0) { - cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i", &vsi_seid); + } else if (strncmp(cmd_buf, "set_rx_mode", 11) == 0) { + cnt = sscanf(&cmd_buf[11], "%i", &vsi_seid); if (cnt != 1) { dev_info(&pf->pdev->dev, "set_rx_mode \n"); goto netdev_ops_write_done; @@ -1756,8 +1672,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n"); } - } else if (strncmp(i40e_dbg_netdev_ops_buf, "napi", 4) == 0) { - cnt = sscanf(&i40e_dbg_netdev_ops_buf[4], "%i", &vsi_seid); + } else if (strncmp(cmd_buf, "napi", 4) == 0) { + cnt = sscanf(&cmd_buf[4], "%i", &vsi_seid); if (cnt != 1) { dev_info(&pf->pdev->dev, "napi \n"); goto netdev_ops_write_done; @@ -1775,21 +1691,20 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "napi called\n"); } } else { - dev_info(&pf->pdev->dev, "unknown command '%s'\n", - i40e_dbg_netdev_ops_buf); + dev_info(&pf->pdev->dev, "unknown command '%s'\n", cmd_buf); dev_info(&pf->pdev->dev, "available commands\n"); dev_info(&pf->pdev->dev, " change_mtu \n"); dev_info(&pf->pdev->dev, " set_rx_mode \n"); dev_info(&pf->pdev->dev, " napi \n"); } netdev_ops_write_done: + kfree(cmd_buf); return count; } static const struct file_operations i40e_dbg_netdev_ops_fops = { .owner = THIS_MODULE, .open = simple_open, - .read = i40e_dbg_netdev_ops_read, .write = i40e_dbg_netdev_ops_write, }; From a556f06338e1d5a85af0e32ecb46e365547f92b9 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Wed, 27 Aug 2025 19:56:31 +0800 Subject: [PATCH 1061/1307] i40e: Fix potential invalid access when MAC list is empty list_first_entry() never returns NULL - if the list is empty, it still returns a pointer to an invalid object, leading to potential invalid memory access when dereferenced. Fix this by using list_first_entry_or_null instead of list_first_entry. Fixes: e3219ce6a775 ("i40e: Add support for client interface for IWARP driver") Signed-off-by: Zhen Ni Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 5f1a405cbbf8..518bc738ea3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -359,8 +359,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf) if (i40e_client_get_params(vsi, &cdev->lan_info.params)) goto free_cdev; - mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list, - struct netdev_hw_addr, list); + mac = list_first_entry_or_null(&cdev->lan_info.netdev->dev_addrs.list, + struct netdev_hw_addr, list); if (mac) ether_addr_copy(cdev->lan_info.lanmac, mac->addr); else From b7e5c3e3bfa9dc8af75ff6d8633ad7070e1985e4 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 10 Aug 2025 10:01:14 -0700 Subject: [PATCH 1062/1307] ixgbe: fix incorrect map used in eee linkmode incorrectly used ixgbe_lp_map in loops intended to populate the supported and advertised EEE linkmode bitmaps based on ixgbe_ls_map. This results in incorrect bit setting and potential out-of-bounds access, since ixgbe_lp_map and ixgbe_ls_map have different sizes and purposes. ixgbe_lp_map[i] -> ixgbe_ls_map[i] Use ixgbe_ls_map for supported and advertised linkmodes, and keep ixgbe_lp_map usage only for link partner (lp_advertised) mapping. Fixes: 9356b6db9d05 ("net: ethernet: ixgbe: Convert EEE to use linkmodes") Signed-off-by: Alok Tiwari Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 25c3a09ad7f1..1a2f1bdb91aa 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3571,13 +3571,13 @@ ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_keee *edata) for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) { if (hw->phy.eee_speeds_supported & ixgbe_ls_map[i].mac_speed) - linkmode_set_bit(ixgbe_lp_map[i].link_mode, + linkmode_set_bit(ixgbe_ls_map[i].link_mode, edata->supported); } for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) { if (hw->phy.eee_speeds_advertised & ixgbe_ls_map[i].mac_speed) - linkmode_set_bit(ixgbe_lp_map[i].link_mode, + linkmode_set_bit(ixgbe_ls_map[i].link_mode, edata->advertised); } From 90fb7db49c6dbac961c6b8ebfd741141ffbc8545 Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Sun, 17 Aug 2025 12:25:47 +0300 Subject: [PATCH 1063/1307] e1000e: fix heap overflow in e1000_set_eeprom Fix a possible heap overflow in e1000_set_eeprom function by adding input validation for the requested length of the change in the EEPROM. In addition, change the variable type from int to size_t for better code practices and rearrange declarations to RCT. Cc: stable@vger.kernel.org Fixes: bc7f75fa9788 ("[E1000E]: New pci-express e1000 driver (currently for ICH9 devices only)") Co-developed-by: Mikael Wessel Signed-off-by: Mikael Wessel Signed-off-by: Vitaly Lifshits Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ethtool.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index c0bbb12eed2e..cf01a108a5bb 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -549,12 +549,12 @@ static int e1000_set_eeprom(struct net_device *netdev, { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + size_t total_len, max_len; u16 *eeprom_buff; - void *ptr; - int max_len; + int ret_val = 0; int first_word; int last_word; - int ret_val = 0; + void *ptr; u16 i; if (eeprom->len == 0) @@ -569,6 +569,10 @@ static int e1000_set_eeprom(struct net_device *netdev, max_len = hw->nvm.word_size * 2; + if (check_add_overflow(eeprom->offset, eeprom->len, &total_len) || + total_len > max_len) + return -EFBIG; + first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; eeprom_buff = kmalloc(max_len, GFP_KERNEL); From de134cb54c3a67644ff95b1c9bffe545e752c912 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 20 Aug 2025 14:52:05 -0700 Subject: [PATCH 1064/1307] btrfs: fix squota compressed stats leak The following workload on a squota enabled fs: btrfs subvol create mnt/subvol # ensure subvol extents get accounted sync btrfs qgroup create 1/1 mnt btrfs qgroup assign mnt/subvol 1/1 mnt btrfs qgroup delete mnt/subvol # make the cleaner thread run btrfs filesystem sync mnt sleep 1 btrfs filesystem sync mnt btrfs qgroup destroy 1/1 mnt will fail with EBUSY. The reason is that 1/1 does the quick accounting when we assign subvol to it, gaining its exclusive usage as excl and excl_cmpr. But then when we delete subvol, the decrement happens via record_squota_delta() which does not update excl_cmpr, as squotas does not make any distinction between compressed and normal extents. Thus, we increment excl_cmpr but never decrement it, and are unable to delete 1/1. The two possible fixes are to make squota always mirror excl and excl_cmpr or to make the fast accounting separately track the plain and cmpr numbers. The latter felt cleaner to me so that is what I opted for. Fixes: 1e0e9d5771c3 ("btrfs: add helper for recording simple quota deltas") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index ccaa9a3cf1ce..da102da169fd 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1455,6 +1455,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, struct btrfs_qgroup *qgroup; LIST_HEAD(qgroup_list); u64 num_bytes = src->excl; + u64 num_bytes_cmpr = src->excl_cmpr; int ret = 0; qgroup = find_qgroup_rb(fs_info, ref_root); @@ -1466,11 +1467,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, struct btrfs_qgroup_list *glist; qgroup->rfer += sign * num_bytes; - qgroup->rfer_cmpr += sign * num_bytes; + qgroup->rfer_cmpr += sign * num_bytes_cmpr; WARN_ON(sign < 0 && qgroup->excl < num_bytes); + WARN_ON(sign < 0 && qgroup->excl_cmpr < num_bytes_cmpr); qgroup->excl += sign * num_bytes; - qgroup->excl_cmpr += sign * num_bytes; + qgroup->excl_cmpr += sign * num_bytes_cmpr; if (sign > 0) qgroup_rsv_add_by_qgroup(fs_info, qgroup, src); From 6db1df415d73fcad12134a54f97dc6c8a64ab181 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Mon, 25 Aug 2025 18:32:04 +0930 Subject: [PATCH 1065/1307] btrfs: accept and ignore compression level for lzo The compression level is meaningless for lzo, but before commit 3f093ccb95f30 ("btrfs: harden parsing of compression mount options"), it was silently ignored if passed. After that commit, passing a level with lzo fails to mount: BTRFS error: unrecognized compression value lzo:1 It seems reasonable for users to expect that lzo would permit a numeric level option, as all the other algos do, even though the kernel's implementation of LZO currently only supports a single level. Because it has always worked to pass a level, it seems likely to me that users in the real world are relying on doing so. This patch restores the old behavior, giving "lzo:N" the same semantics as all of the other compression algos. To be clear, silly variants like "lzo:one", "lzo:the_first_option", or "lzo:armageddon" also used to work. This isn't meant to suggest that any possible mis-interpretation of mount options that once worked must continue to work forever. This is an exceptional case where it makes sense to preserve compatibility, both because the mis-interpretation is reasonable, and because nothing tangible is sacrificed. Finally update btrfs_show_options() to ignore the level of LZO, as it is only the default level without any extra meaning. Fixes: 3f093ccb95f30 ("btrfs: harden parsing of compression mount options") Reviewed-by: Daniel Vacek Reviewed-by: Qu Wenruo Signed-off-by: Calvin Owens Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7f31f8bd63ba..e708faf1892f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -299,9 +299,12 @@ static int btrfs_parse_compress(struct btrfs_fs_context *ctx, btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); - } else if (btrfs_match_compress_type(string, "lzo", false)) { + } else if (btrfs_match_compress_type(string, "lzo", true)) { ctx->compress_type = BTRFS_COMPRESS_LZO; - ctx->compress_level = 0; + ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_LZO, + string + 3); + if (string[3] == ':' && string[4]) + btrfs_warn(NULL, "Compression level ignored for LZO"); btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); @@ -1079,7 +1082,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_printf(seq, ",compress-force=%s", compress_type); else seq_printf(seq, ",compress=%s", compress_type); - if (info->compress_level) + if (info->compress_level && info->compress_type != BTRFS_COMPRESS_LZO) seq_printf(seq, ":%d", info->compress_level); } if (btrfs_test_opt(info, NOSSD)) From 9786531399a679fc2f4630d2c0a186205282ab2f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 22 Aug 2025 16:06:13 +0930 Subject: [PATCH 1066/1307] btrfs: fix corruption reading compressed range when block size is smaller than page size [BUG] With 64K page size (aarch64 with 64K page size config) and 4K btrfs block size, the following workload can easily lead to a corrupted read: mkfs.btrfs -f -s 4k $dev > /dev/null mount -o compress $dev $mnt xfs_io -f -c "pwrite -S 0xff 0 64k" $mnt/base > /dev/null echo "correct result:" od -Ad -t x1 $mnt/base xfs_io -f -c "reflink $mnt/base 32k 0 32k" \ -c "reflink $mnt/base 0 32k 32k" \ -c "pwrite -S 0xff 60k 4k" $mnt/new > /dev/null echo "incorrect result:" od -Ad -t x1 $mnt/new umount $mnt This shows the following result: correct result: 0000000 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff * 0065536 incorrect result: 0000000 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff * 0032768 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0061440 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff * 0065536 Notice the zero in the range [32K, 60K), which is incorrect. [CAUSE] With extra trace printk, it shows the following events during od: (some unrelated info removed like CPU and context) od-3457 btrfs_do_readpage: enter r/i=5/258 folio=0(65536) prev_em_start=0000000000000000 The "r/i" is indicating the root and inode number. In our case the file "new" is using ino 258 from fs tree (root 5). Here notice the @prev_em_start pointer is NULL. This means the btrfs_do_readpage() is called from btrfs_read_folio(), not from btrfs_readahead(). od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=0 got em start=0 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=4096 got em start=0 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=8192 got em start=0 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=12288 got em start=0 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=16384 got em start=0 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=20480 got em start=0 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=24576 got em start=0 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=28672 got em start=0 len=32768 These above 32K blocks will be read from the first half of the compressed data extent. od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=32768 got em start=32768 len=32768 Note here there is no btrfs_submit_compressed_read() call. Which is incorrect now. Although both extent maps at 0 and 32K are pointing to the same compressed data, their offsets are different thus can not be merged into the same read. So this means the compressed data read merge check is doing something wrong. od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=36864 got em start=32768 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=40960 got em start=32768 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=45056 got em start=32768 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=49152 got em start=32768 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=53248 got em start=32768 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=57344 got em start=32768 len=32768 od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=61440 skip uptodate od-3457 btrfs_submit_compressed_read: cb orig_bio: file off=0 len=61440 The function btrfs_submit_compressed_read() is only called at the end of folio read. The compressed bio will only have an extent map of range [0, 32K), but the original bio passed in is for the whole 64K folio. This will cause the decompression part to only fill the first 32K, leaving the rest untouched (aka, filled with zero). This incorrect compressed read merge leads to the above data corruption. There were similar problems that happened in the past, commit 808f80b46790 ("Btrfs: update fix for read corruption of compressed and shared extents") is doing pretty much the same fix for readahead. But that's back to 2015, where btrfs still only supports bs (block size) == ps (page size) cases. This means btrfs_do_readpage() only needs to handle a folio which contains exactly one block. Only btrfs_readahead() can lead to a read covering multiple blocks. Thus only btrfs_readahead() passes a non-NULL @prev_em_start pointer. With v5.15 kernel btrfs introduced bs < ps support. This breaks the above assumption that a folio can only contain one block. Now btrfs_read_folio() can also read multiple blocks in one go. But btrfs_read_folio() doesn't pass a @prev_em_start pointer, thus the existing bio force submission check will never be triggered. In theory, this can also happen for btrfs with large folios, but since large folio is still experimental, we don't need to bother it, thus only bs < ps support is affected for now. [FIX] Instead of passing @prev_em_start to do the proper compressed extent check, introduce one new member, btrfs_bio_ctrl::last_em_start, so that the existing bio force submission logic will always be triggered. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c953297aa89a..b21cb72835cc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -111,6 +111,24 @@ struct btrfs_bio_ctrl { */ unsigned long submit_bitmap; struct readahead_control *ractl; + + /* + * The start offset of the last used extent map by a read operation. + * + * This is for proper compressed read merge. + * U64_MAX means we are starting the read and have made no progress yet. + * + * The current btrfs_bio_is_contig() only uses disk_bytenr as + * the condition to check if the read can be merged with previous + * bio, which is not correct. E.g. two file extents pointing to the + * same extent but with different offset. + * + * So here we need to do extra checks to only merge reads that are + * covered by the same extent map. + * Just extent_map::start will be enough, as they are unique + * inside the same inode. + */ + u64 last_em_start; }; static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) @@ -909,7 +927,7 @@ static void btrfs_readahead_expand(struct readahead_control *ractl, * return 0 on success, otherwise return error */ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, - struct btrfs_bio_ctrl *bio_ctrl, u64 *prev_em_start) + struct btrfs_bio_ctrl *bio_ctrl) { struct inode *inode = folio->mapping->host; struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); @@ -1019,12 +1037,11 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, * non-optimal behavior (submitting 2 bios for the same extent). */ if (compress_type != BTRFS_COMPRESS_NONE && - prev_em_start && *prev_em_start != (u64)-1 && - *prev_em_start != em->start) + bio_ctrl->last_em_start != U64_MAX && + bio_ctrl->last_em_start != em->start) force_bio_submit = true; - if (prev_em_start) - *prev_em_start = em->start; + bio_ctrl->last_em_start = em->start; btrfs_free_extent_map(em); em = NULL; @@ -1238,12 +1255,15 @@ int btrfs_read_folio(struct file *file, struct folio *folio) const u64 start = folio_pos(folio); const u64 end = start + folio_size(folio) - 1; struct extent_state *cached_state = NULL; - struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ }; + struct btrfs_bio_ctrl bio_ctrl = { + .opf = REQ_OP_READ, + .last_em_start = U64_MAX, + }; struct extent_map *em_cached = NULL; int ret; lock_extents_for_read(inode, start, end, &cached_state); - ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL); + ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl); btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state); btrfs_free_extent_map(em_cached); @@ -2583,7 +2603,8 @@ void btrfs_readahead(struct readahead_control *rac) { struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD, - .ractl = rac + .ractl = rac, + .last_em_start = U64_MAX, }; struct folio *folio; struct btrfs_inode *inode = BTRFS_I(rac->mapping->host); @@ -2591,12 +2612,11 @@ void btrfs_readahead(struct readahead_control *rac) const u64 end = start + readahead_length(rac) - 1; struct extent_state *cached_state = NULL; struct extent_map *em_cached = NULL; - u64 prev_em_start = (u64)-1; lock_extents_for_read(inode, start, end, &cached_state); while ((folio = readahead_folio(rac)) != NULL) - btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start); + btrfs_do_readpage(folio, &em_cached, &bio_ctrl); btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state); From f6a6c280059c4ddc23e12e3de1b01098e240036f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 26 Aug 2025 11:24:38 -0700 Subject: [PATCH 1067/1307] btrfs: fix subvolume deletion lockup caused by inodes xarray race There is a race condition between inode eviction and inode caching that can cause a live struct btrfs_inode to be missing from the root->inodes xarray. Specifically, there is a window during evict() between the inode being unhashed and deleted from the xarray. If btrfs_iget() is called for the same inode in that window, it will be recreated and inserted into the xarray, but then eviction will delete the new entry, leaving nothing in the xarray: Thread 1 Thread 2 --------------------------------------------------------------- evict() remove_inode_hash() btrfs_iget_path() btrfs_iget_locked() btrfs_read_locked_inode() btrfs_add_inode_to_root() destroy_inode() btrfs_destroy_inode() btrfs_del_inode_from_root() __xa_erase In turn, this can cause issues for subvolume deletion. Specifically, if an inode is in this lost state, and all other inodes are evicted, then btrfs_del_inode_from_root() will call btrfs_add_dead_root() prematurely. If the lost inode has a delayed_node attached to it, then when btrfs_clean_one_deleted_snapshot() calls btrfs_kill_all_delayed_nodes(), it will loop forever because the delayed_nodes xarray will never become empty (unless memory pressure forces the inode out). We saw this manifest as soft lockups in production. Fix it by only deleting the xarray entry if it matches the given inode (using __xa_cmpxchg()). Fixes: 310b2f5d5a94 ("btrfs: use an xarray to track open inodes in a root") Cc: stable@vger.kernel.org # 6.11+ Reviewed-by: Josef Bacik Reviewed-by: Filipe Manana Co-authored-by: Leo Martins Signed-off-by: Leo Martins Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- fs/btrfs/inode.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dd82dcc7b2b7..e7218e78bff4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5696,7 +5696,17 @@ static void btrfs_del_inode_from_root(struct btrfs_inode *inode) bool empty = false; xa_lock(&root->inodes); - entry = __xa_erase(&root->inodes, btrfs_ino(inode)); + /* + * This btrfs_inode is being freed and has already been unhashed at this + * point. It's possible that another btrfs_inode has already been + * allocated for the same inode and inserted itself into the root, so + * don't delete it in that case. + * + * Note that this shouldn't need to allocate memory, so the gfp flags + * don't really matter. + */ + entry = __xa_cmpxchg(&root->inodes, btrfs_ino(inode), inode, NULL, + GFP_ATOMIC); if (entry == inode) empty = xa_empty(&root->inodes); xa_unlock(&root->inodes); From 7ac3c2889bc060c3f67cf44df0dbb093a835c176 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 25 Aug 2025 15:32:49 +0200 Subject: [PATCH 1068/1307] nvme: fix PI insert on write I recently ran into an issue where the PI generated using the block layer integrity code differs from that from a kernel using the PRACT fallback when the block layer integrity code is disabled, and I tracked this down to us using PRACT incorrectly. The NVM Command Set Specification (section 5.33 in 1.2, similar in older versions) specifies the PRACT insert behavior as: Inserted protection information consists of the computed CRC for the protection information format (refer to section 5.3.1) in the Guard field, the LBAT field value in the Application Tag field, the LBST field value in the Storage Tag field, if defined, and the computed reference tag in the Logical Block Reference Tag. Where the computed reference tag is defined as following for type 1 and type 2 using the text below that is duplicated in the respective bullet points: the value of the computed reference tag for the first logical block of the command is the value contained in the Initial Logical Block Reference Tag (ILBRT) or Expected Initial Logical Block Reference Tag (EILBRT) field in the command, and the computed reference tag is incremented for each subsequent logical block. So we need to set ILBRT field, but we currently don't. Interestingly this works fine on my older type 1 formatted SSD, but Qemu trips up on this. We already set ILBRT for Write Same since commit aeb7bb061be5 ("nvme: set the PRACT bit when using Write Zeroes with T10 PI"). To ease this, move the PI type check into nvme_set_ref_tag. Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 812c1565114f..6b7493934535 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -903,6 +903,15 @@ static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd, u32 upper, lower; u64 ref48; + /* only type1 and type 2 PI formats have a reftag */ + switch (ns->head->pi_type) { + case NVME_NS_DPS_PI_TYPE1: + case NVME_NS_DPS_PI_TYPE2: + break; + default: + return; + } + /* both rw and write zeroes share the same reftag format */ switch (ns->head->guard_type) { case NVME_NVM_NS_16B_GUARD: @@ -942,13 +951,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, if (nvme_ns_has_pi(ns->head)) { cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT); - - switch (ns->head->pi_type) { - case NVME_NS_DPS_PI_TYPE1: - case NVME_NS_DPS_PI_TYPE2: - nvme_set_ref_tag(ns, cmnd, req); - break; - } + nvme_set_ref_tag(ns, cmnd, req); } return BLK_STS_OK; @@ -1039,6 +1042,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, if (WARN_ON_ONCE(!nvme_ns_has_pi(ns->head))) return BLK_STS_NOTSUPP; control |= NVME_RW_PRINFO_PRACT; + nvme_set_ref_tag(ns, cmnd, req); } if (bio_integrity_flagged(req->bio, BIP_CHECK_GUARD)) From ddbf0e78a8b20ec18d314d31336a0230fdc9b394 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sun, 31 Aug 2025 12:59:07 +0200 Subject: [PATCH 1069/1307] net: sfp: add quirk for FLYPRO copper SFP+ module Add quirk for a copper SFP that identifies itself as "FLYPRO" "SFP-10GT-CS-30M". It uses RollBall protocol to talk to the PHY. Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250831105910.3174-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 5347c95d1e77..4cd1d6c51dc2 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -492,6 +492,9 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK("ALCATELLUCENT", "3FE46541AA", sfp_quirk_2500basex, sfp_fixup_nokia), + // FLYPRO SFP-10GT-CS-30M uses Rollball protocol to talk to the PHY. + SFP_QUIRK_F("FLYPRO", "SFP-10GT-CS-30M", sfp_fixup_rollball), + // Fiberstore SFP-10G-T doesn't identify as copper, uses the Rollball // protocol to talk to the PHY and needs 4 sec wait before probing the // PHY. From d7b67dd6f9db7bd2c49b415e901849b182ff0735 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 2 Sep 2025 11:17:11 -0700 Subject: [PATCH 1070/1307] perf bpf-event: Fix use-after-free in synthesis Calls to perf_env__insert_bpf_prog_info may fail as a sideband thread may already have inserted the bpf_prog_info. Such failures may yield info_linear being freed which then causes use-after-free issues with the internal bpf_prog_info info struct. Make it so that perf_env__insert_bpf_prog_info trigger early non-error paths and fix the use-after-free in perf_event__synthesize_one_bpf_prog. Add proper return error handling to perf_env__add_bpf_info (that calls perf_env__insert_bpf_prog_info) and propagate the return value in its callers. Closes: https://lore.kernel.org/lkml/CAP-5=fWJQcmUOP7MuCA2ihKnDAHUCOBLkQFEkQES-1ZZTrgf8Q@mail.gmail.com/ Fixes: 03edb7020bb9 ("perf bpf: Fix two memory leakages when calling perf_env__insert_bpf_prog_info()") Reviewed-by: Namhyung Kim Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250902181713.309797-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/bpf-event.c | 39 +++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 5b6d3e899e11..2298cd396c42 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -657,9 +657,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, info_node->info_linear = info_linear; info_node->metadata = NULL; if (!perf_env__insert_bpf_prog_info(env, info_node)) { - free(info_linear); + /* + * Insert failed, likely because of a duplicate event + * made by the sideband thread. Ignore synthesizing the + * metadata. + */ free(info_node); + goto out; } + /* info_linear is now owned by info_node and shouldn't be freed below. */ info_linear = NULL; /* @@ -827,18 +833,18 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, return err; } -static void perf_env__add_bpf_info(struct perf_env *env, u32 id) +static int perf_env__add_bpf_info(struct perf_env *env, u32 id) { struct bpf_prog_info_node *info_node; struct perf_bpil *info_linear; struct btf *btf = NULL; u64 arrays; u32 btf_id; - int fd; + int fd, err = 0; fd = bpf_prog_get_fd_by_id(id); if (fd < 0) - return; + return -EINVAL; arrays = 1UL << PERF_BPIL_JITED_KSYMS; arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; @@ -852,6 +858,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("%s: failed to get BPF program info. aborting\n", __func__); + err = PTR_ERR(info_linear); goto out; } @@ -862,38 +869,46 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) info_node->info_linear = info_linear; info_node->metadata = bpf_metadata_create(&info_linear->info); if (!perf_env__insert_bpf_prog_info(env, info_node)) { + pr_debug("%s: duplicate add bpf info request for id %u\n", + __func__, btf_id); free(info_linear); free(info_node); + goto out; } - } else + } else { free(info_linear); + err = -ENOMEM; + goto out; + } if (btf_id == 0) goto out; btf = btf__load_from_kernel_by_id(btf_id); - if (libbpf_get_error(btf)) { - pr_debug("%s: failed to get BTF of id %u, aborting\n", - __func__, btf_id); - goto out; + if (!btf) { + err = -errno; + pr_debug("%s: failed to get BTF of id %u %d\n", __func__, btf_id, err); + } else { + perf_env__fetch_btf(env, btf_id, btf); } - perf_env__fetch_btf(env, btf_id, btf); out: btf__free(btf); close(fd); + return err; } static int bpf_event__sb_cb(union perf_event *event, void *data) { struct perf_env *env = data; + int ret = 0; if (event->header.type != PERF_RECORD_BPF_EVENT) return -1; switch (event->bpf.type) { case PERF_BPF_EVENT_PROG_LOAD: - perf_env__add_bpf_info(env, event->bpf.id); + ret = perf_env__add_bpf_info(env, event->bpf.id); case PERF_BPF_EVENT_PROG_UNLOAD: /* @@ -907,7 +922,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data) break; } - return 0; + return ret; } int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) From 1654a0e4d576d9e43fbb10ccf6a1b307c5c18566 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 2 Sep 2025 11:17:12 -0700 Subject: [PATCH 1071/1307] perf bpf-utils: Constify bpil_array_desc The array's contents is a compile time constant. Constify to make the code more intention revealing and avoid unintended errors. Reviewed-by: Namhyung Kim Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250902181713.309797-3-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/bpf-utils.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c index 80b1d2b3729b..64a558344696 100644 --- a/tools/perf/util/bpf-utils.c +++ b/tools/perf/util/bpf-utils.c @@ -20,7 +20,7 @@ struct bpil_array_desc { */ }; -static struct bpil_array_desc bpil_array_desc[] = { +static const struct bpil_array_desc bpil_array_desc[] = { [PERF_BPIL_JITED_INSNS] = { offsetof(struct bpf_prog_info, jited_prog_insns), offsetof(struct bpf_prog_info, jited_prog_len), @@ -129,12 +129,10 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) /* step 2: calculate total size of all arrays */ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + const struct bpil_array_desc *desc = &bpil_array_desc[i]; bool include_array = (arrays & (1UL << i)) > 0; - struct bpil_array_desc *desc; __u32 count, size; - desc = bpil_array_desc + i; - /* kernel is too old to support this field */ if (info_len < desc->array_offset + sizeof(__u32) || info_len < desc->count_offset + sizeof(__u32) || @@ -163,13 +161,12 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) ptr = info_linear->data; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u32 count, size; if ((arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); bpf_prog_info_set_offset_u32(&info_linear->info, @@ -192,13 +189,12 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) /* step 6: verify the data */ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u32 v1, v2; if ((arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); v2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->count_offset); @@ -224,13 +220,12 @@ void bpil_addr_to_offs(struct perf_bpil *info_linear) int i; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u64 addr, offs; if ((info_linear->arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; addr = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); offs = addr - ptr_to_u64(info_linear->data); @@ -244,13 +239,12 @@ void bpil_offs_to_addr(struct perf_bpil *info_linear) int i; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u64 addr, offs; if ((info_linear->arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; offs = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); addr = offs + ptr_to_u64(info_linear->data); From 01be43f2a0eaeed83e94dee054742f37625c86d9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 2 Sep 2025 11:17:13 -0700 Subject: [PATCH 1072/1307] perf bpf-utils: Harden get_bpf_prog_info_linear In get_bpf_prog_info_linear two calls to bpf_obj_get_info_by_fd are made, the first to compute memory requirements for a struct perf_bpil and the second to fill it in. Previously the code would warn when the second call didn't match the first. Such races can be common place in things like perf test, whose perf trace tests will frequently load BPF programs. Rather than a debug message, return actual errors for this case. Out of paranoia also validate the read bpf_prog_info array value. Change the type of ptr to avoid mismatched pointer type compiler warnings. Add some additional debug print outs and sanity asserts. Closes: https://lore.kernel.org/lkml/CAP-5=fWJQcmUOP7MuCA2ihKnDAHUCOBLkQFEkQES-1ZZTrgf8Q@mail.gmail.com/ Fixes: 6ac22d036f86 ("perf bpf: Pull in bpf_program__get_prog_info_linear()") Reviewed-by: Namhyung Kim Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250902181713.309797-4-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/bpf-utils.c | 43 ++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c index 64a558344696..5a66dc8594aa 100644 --- a/tools/perf/util/bpf-utils.c +++ b/tools/perf/util/bpf-utils.c @@ -115,7 +115,7 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) __u32 info_len = sizeof(info); __u32 data_len = 0; int i, err; - void *ptr; + __u8 *ptr; if (arrays >> PERF_BPIL_LAST_ARRAY) return ERR_PTR(-EINVAL); @@ -126,6 +126,8 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) pr_debug("can't get prog info: %s", strerror(errno)); return ERR_PTR(-EFAULT); } + if (info.type >= __MAX_BPF_PROG_TYPE) + pr_debug("%s:%d: unexpected program type %u\n", __func__, __LINE__, info.type); /* step 2: calculate total size of all arrays */ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { @@ -173,6 +175,8 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) desc->count_offset, count); bpf_prog_info_set_offset_u32(&info_linear->info, desc->size_offset, size); + assert(ptr >= info_linear->data); + assert(ptr < &info_linear->data[data_len]); bpf_prog_info_set_offset_u64(&info_linear->info, desc->array_offset, ptr_to_u64(ptr)); @@ -186,26 +190,45 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) free(info_linear); return ERR_PTR(-EFAULT); } + if (info_linear->info.type >= __MAX_BPF_PROG_TYPE) { + pr_debug("%s:%d: unexpected program type %u\n", + __func__, __LINE__, info_linear->info.type); + } /* step 6: verify the data */ + ptr = info_linear->data; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { const struct bpil_array_desc *desc = &bpil_array_desc[i]; - __u32 v1, v2; + __u32 count1, count2, size1, size2; + __u64 ptr2; if ((arrays & (1UL << i)) == 0) continue; - v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + count1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + count2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->count_offset); - if (v1 != v2) - pr_warning("%s: mismatch in element count\n", __func__); + if (count1 != count2) { + pr_warning("%s: mismatch in element count %u vs %u\n", __func__, count1, count2); + free(info_linear); + return ERR_PTR(-ERANGE); + } - v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + size1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + size2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->size_offset); - if (v1 != v2) - pr_warning("%s: mismatch in rec size\n", __func__); + if (size1 != size2) { + pr_warning("%s: mismatch in rec size %u vs %u\n", __func__, size1, size2); + free(info_linear); + return ERR_PTR(-ERANGE); + } + ptr2 = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); + if (ptr_to_u64(ptr) != ptr2) { + pr_warning("%s: mismatch in array %p vs %llx\n", __func__, ptr, ptr2); + free(info_linear); + return ERR_PTR(-ERANGE); + } + ptr += roundup(count1 * size1, sizeof(__u64)); } /* step 7: update info_len and data_len */ From 22c55fb9eb92395d999b8404d73e58540d11bdd8 Mon Sep 17 00:00:00 2001 From: Ramya Gnanasekar Date: Fri, 1 Aug 2025 16:19:20 +0530 Subject: [PATCH 1073/1307] wifi: ath12k: Set EMLSR support flag in MLO flags for EML-capable stations Currently, when updating EMLSR capabilities of a multi-link (ML) station, only the EMLSR parameters (e.g., padding delay, transition delay, and timeout) are sent to firmware. However, firmware also requires the EMLSR support flag to be set in the MLO flags of the peer assoc WMI command to properly handle EML operating mode notification frames. Set the ATH12K_WMI_FLAG_MLO_EMLSR_SUPPORT flag in the peer assoc WMI command when the ML station is EMLSR-capable, so that the firmware can respond to EHT EML action frames from associated stations. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: 4bcf9525bc49 ("wifi: ath12k: update EMLSR capabilities of ML Station") Signed-off-by: Ramya Gnanasekar Signed-off-by: Rameshkumar Sundaram Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250801104920.3326352-1-rameshkumar.sundaram@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index da85c28ec355..742ffeb48bce 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -2423,6 +2423,7 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, eml_cap = arg->ml.eml_cap; if (u16_get_bits(eml_cap, IEEE80211_EML_CAP_EMLSR_SUPP)) { + ml_params->flags |= cpu_to_le32(ATH12K_WMI_FLAG_MLO_EMLSR_SUPPORT); /* Padding delay */ eml_pad_delay = ieee80211_emlsr_pad_delay_in_us(eml_cap); ml_params->emlsr_padding_delay_us = cpu_to_le32(eml_pad_delay); From 97acb0259cc9cbfbd7ab689e25684f3d8ce10e26 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Sun, 10 Aug 2025 22:30:18 +0530 Subject: [PATCH 1074/1307] wifi: ath11k: fix group data packet drops during rekey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During GTK rekey, mac80211 issues a clear key (if the old key exists) followed by an install key operation in the same context. This causes ath11k to send two WMI commands in quick succession: one to clear the old key and another to install the new key in the same slot. Under certain conditions—especially under high load or time sensitive scenarios, firmware may process these commands asynchronously in a way that firmware assumes the key is cleared whereas hardware has a valid key. This inconsistency between hardware and firmware leads to group addressed packet drops. Only setting the same key again can restore a valid key in firmware and allow packets to be transmitted. This issue remained latent because the host's clear key commands were not effective in firmware until commit 436a4e886598 ("ath11k: clear the keys properly via DISABLE_KEY"). That commit enabled the host to explicitly clear group keys, which inadvertently exposed the race. To mitigate this, restrict group key clearing across all modes (AP, STA, MESH). During rekey, the new key can simply be set on top of the previous one, avoiding the need for a clear followed by a set. However, in AP mode specifically, permit group key clearing when no stations are associated. This exception supports transitions from secure modes (e.g., WPA2/WPA3) to open mode, during which all associated peers are removed and the group key is cleared as part of the transition. Add a per-BSS station counter to track the presence of stations during set key operations. Also add a reset_group_keys flag to track the key re-installation state and avoid repeated installation of the same key when the number of connected stations transitions to non-zero within a rekey period. Additionally, for AP and Mesh modes, when the first station associates, reinstall the same group key that was last set. This ensures that the firmware recovers from any race that may have occurred during a previous key clear when no stations were associated. This change ensures that key clearing is permitted only when no clients are connected, avoiding packet loss while enabling dynamic security mode transitions. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.41 Reported-by: Steffen Moser Closes: https://lore.kernel.org/linux-wireless/c6366409-9928-4dd7-bf7b-ba7fcf20eabf@steffen-moser.de Fixes: 436a4e886598 ("ath11k: clear the keys properly via DISABLE_KEY") Signed-off-by: Rameshkumar Sundaram Tested-by: Nicolas Escande Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250810170018.1124014-1-rameshkumar.sundaram@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.h | 2 + drivers/net/wireless/ath/ath11k/mac.c | 111 +++++++++++++++++++++++-- 2 files changed, 104 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 220d69a7a429..e8780b05ce11 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -411,6 +411,8 @@ struct ath11k_vif { bool do_not_send_tmpl; struct ath11k_arp_ns_offload arp_ns_offload; struct ath11k_rekey_data rekey_data; + u32 num_stations; + bool reinstall_group_keys; struct ath11k_reg_tpc_power_info reg_tpc_info; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 1fadf5faafb8..106e2530b64e 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -4317,6 +4317,40 @@ static int ath11k_clear_peer_keys(struct ath11k_vif *arvif, return first_errno; } +static int ath11k_set_group_keys(struct ath11k_vif *arvif) +{ + struct ath11k *ar = arvif->ar; + struct ath11k_base *ab = ar->ab; + const u8 *addr = arvif->bssid; + int i, ret, first_errno = 0; + struct ath11k_peer *peer; + + spin_lock_bh(&ab->base_lock); + peer = ath11k_peer_find(ab, arvif->vdev_id, addr); + spin_unlock_bh(&ab->base_lock); + + if (!peer) + return -ENOENT; + + for (i = 0; i < ARRAY_SIZE(peer->keys); i++) { + struct ieee80211_key_conf *key = peer->keys[i]; + + if (!key || (key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) + continue; + + ret = ath11k_install_key(arvif, key, SET_KEY, addr, + WMI_KEY_GROUP); + if (ret < 0 && first_errno == 0) + first_errno = ret; + + if (ret < 0) + ath11k_warn(ab, "failed to set group key of idx %d for vdev %d: %d\n", + i, arvif->vdev_id, ret); + } + + return first_errno; +} + static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key) @@ -4326,6 +4360,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif); struct ath11k_peer *peer; struct ath11k_sta *arsta; + bool is_ap_with_no_sta; const u8 *peer_addr; int ret = 0; u32 flags = 0; @@ -4386,16 +4421,57 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, else flags |= WMI_KEY_GROUP; - ret = ath11k_install_key(arvif, key, cmd, peer_addr, flags); - if (ret) { - ath11k_warn(ab, "ath11k_install_key failed (%d)\n", ret); - goto exit; - } + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "%s for peer %pM on vdev %d flags 0x%X, type = %d, num_sta %d\n", + cmd == SET_KEY ? "SET_KEY" : "DEL_KEY", peer_addr, arvif->vdev_id, + flags, arvif->vdev_type, arvif->num_stations); - ret = ath11k_dp_peer_rx_pn_replay_config(arvif, peer_addr, cmd, key); - if (ret) { - ath11k_warn(ab, "failed to offload PN replay detection %d\n", ret); - goto exit; + /* Allow group key clearing only in AP mode when no stations are + * associated. There is a known race condition in firmware where + * group addressed packets may be dropped if the key is cleared + * and immediately set again during rekey. + * + * During GTK rekey, mac80211 issues a clear key (if the old key + * exists) followed by an install key operation for same key + * index. This causes ath11k to send two WMI commands in quick + * succession: one to clear the old key and another to install the + * new key in the same slot. + * + * Under certain conditions—especially under high load or time + * sensitive scenarios, firmware may process these commands + * asynchronously in a way that firmware assumes the key is + * cleared whereas hardware has a valid key. This inconsistency + * between hardware and firmware leads to group addressed packet + * drops after rekey. + * Only setting the same key again can restore a valid key in + * firmware and allow packets to be transmitted. + * + * There is a use case where an AP can transition from Secure mode + * to open mode without a vdev restart by just deleting all + * associated peers and clearing key, Hence allow clear key for + * that case alone. Mark arvif->reinstall_group_keys in such cases + * and reinstall the same key when the first peer is added, + * allowing firmware to recover from the race if it had occurred. + */ + + is_ap_with_no_sta = (vif->type == NL80211_IFTYPE_AP && + !arvif->num_stations); + if ((flags & WMI_KEY_PAIRWISE) || cmd == SET_KEY || is_ap_with_no_sta) { + ret = ath11k_install_key(arvif, key, cmd, peer_addr, flags); + if (ret) { + ath11k_warn(ab, "ath11k_install_key failed (%d)\n", ret); + goto exit; + } + + ret = ath11k_dp_peer_rx_pn_replay_config(arvif, peer_addr, cmd, key); + if (ret) { + ath11k_warn(ab, "failed to offload PN replay detection %d\n", + ret); + goto exit; + } + + if ((flags & WMI_KEY_GROUP) && cmd == SET_KEY && is_ap_with_no_sta) + arvif->reinstall_group_keys = true; } spin_lock_bh(&ab->base_lock); @@ -4994,6 +5070,7 @@ static int ath11k_mac_inc_num_stations(struct ath11k_vif *arvif, return -ENOBUFS; ar->num_stations++; + arvif->num_stations++; return 0; } @@ -5009,6 +5086,7 @@ static void ath11k_mac_dec_num_stations(struct ath11k_vif *arvif, return; ar->num_stations--; + arvif->num_stations--; } static u32 ath11k_mac_ieee80211_sta_bw_to_wmi(struct ath11k *ar, @@ -9540,6 +9618,21 @@ static int ath11k_mac_station_add(struct ath11k *ar, goto exit; } + /* Driver allows the DEL KEY followed by SET KEY sequence for + * group keys for only when there is no clients associated, if at + * all firmware has entered the race during that window, + * reinstalling the same key when the first sta connects will allow + * firmware to recover from the race. + */ + if (arvif->num_stations == 1 && arvif->reinstall_group_keys) { + ath11k_dbg(ab, ATH11K_DBG_MAC, "set group keys on 1st station add for vdev %d\n", + arvif->vdev_id); + ret = ath11k_set_group_keys(arvif); + if (ret) + goto dec_num_station; + arvif->reinstall_group_keys = false; + } + arsta->rx_stats = kzalloc(sizeof(*arsta->rx_stats), GFP_KERNEL); if (!arsta->rx_stats) { ret = -ENOMEM; From 8b3332c1331c7c260bdff89bfdfd24ea263be764 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Sep 2025 14:08:17 -0700 Subject: [PATCH 1075/1307] Revert "eth: remove the DLink/Sundance (ST201) driver" This reverts commit 8401a108a63302a5a198c7075d857895ca624851. I got a report from an (anonymous) Sundance user: Ethernet controller: Sundance Technology Inc / IC Plus Corp IC Plus IP100A Integrated 10/100 Ethernet MAC + PHY (rev 31) Revert the driver back in. Make following changes: - update Denis's email address in MAINTAINERS - adjust to timer API renames: - del_timer_sync() -> timer_delete_sync() - from_timer() -> timer_container_of() Fixes: 8401a108a633 ("eth: remove the DLink/Sundance (ST201) driver") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901210818.1025316-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 + arch/mips/configs/mtx1_defconfig | 1 + arch/powerpc/configs/ppc6xx_defconfig | 1 + drivers/net/ethernet/dlink/Kconfig | 20 + drivers/net/ethernet/dlink/Makefile | 1 + drivers/net/ethernet/dlink/sundance.c | 1985 +++++++++++++++++++++++++ 6 files changed, 2014 insertions(+) create mode 100644 drivers/net/ethernet/dlink/sundance.c diff --git a/MAINTAINERS b/MAINTAINERS index 09b34bbd0c48..1819c477eee3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24252,6 +24252,12 @@ S: Maintained F: Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml F: drivers/input/keyboard/sun4i-lradc-keys.c +SUNDANCE NETWORK DRIVER +M: Denis Kirjanov +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/dlink/sundance.c + SUNPLUS ETHERNET DRIVER M: Wells Lu L: netdev@vger.kernel.org diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index e4bcdb64df6c..2707ab134639 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -273,6 +273,7 @@ CONFIG_DM9102=m CONFIG_ULI526X=m CONFIG_PCMCIA_XIRCOM=m CONFIG_DL2K=m +CONFIG_SUNDANCE=m CONFIG_PCMCIA_FMVJ18X=m CONFIG_E100=m CONFIG_E1000=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index bb359643ddc1..b082c1fae13c 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -433,6 +433,7 @@ CONFIG_DM9102=m CONFIG_ULI526X=m CONFIG_PCMCIA_XIRCOM=m CONFIG_DL2K=m +CONFIG_SUNDANCE=m CONFIG_S2IO=m CONFIG_FEC_MPC52xx=m CONFIG_GIANFAR=m diff --git a/drivers/net/ethernet/dlink/Kconfig b/drivers/net/ethernet/dlink/Kconfig index e9e13654812c..0d77f84c8e7b 100644 --- a/drivers/net/ethernet/dlink/Kconfig +++ b/drivers/net/ethernet/dlink/Kconfig @@ -32,4 +32,24 @@ config DL2K To compile this driver as a module, choose M here: the module will be called dl2k. +config SUNDANCE + tristate "Sundance Alta support" + depends on PCI + select CRC32 + select MII + help + This driver is for the Sundance "Alta" chip. + More specific information and updates are available from + . + +config SUNDANCE_MMIO + bool "Use MMIO instead of PIO" + depends on SUNDANCE + help + Enable memory-mapped I/O for interaction with Sundance NIC registers. + Do NOT enable this by default, PIO (enabled when MMIO is disabled) + is known to solve bugs on certain chips. + + If unsure, say N. + endif # NET_VENDOR_DLINK diff --git a/drivers/net/ethernet/dlink/Makefile b/drivers/net/ethernet/dlink/Makefile index 38c236eb6007..3ff503c747db 100644 --- a/drivers/net/ethernet/dlink/Makefile +++ b/drivers/net/ethernet/dlink/Makefile @@ -4,3 +4,4 @@ # obj-$(CONFIG_DL2K) += dl2k.o +obj-$(CONFIG_SUNDANCE) += sundance.o diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c new file mode 100644 index 000000000000..29d59c42dfa3 --- /dev/null +++ b/drivers/net/ethernet/dlink/sundance.c @@ -0,0 +1,1985 @@ +/* sundance.c: A Linux device driver for the Sundance ST201 "Alta". */ +/* + Written 1999-2000 by Donald Becker. + + This software may be used and distributed according to the terms of + the GNU General Public License (GPL), incorporated herein by reference. + Drivers based on or derived from this code fall under the GPL and must + retain the authorship, copyright and license notice. This file is not + a complete program and may only be used when the entire operating + system is licensed under the GPL. + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + + Support and updates available at + http://www.scyld.com/network/sundance.html + [link no longer provides useful info -jgarzik] + Archives of the mailing list are still available at + https://www.beowulf.org/pipermail/netdrivers/ + +*/ + +#define DRV_NAME "sundance" + +/* The user-configurable values. + These may be modified when a driver module is loaded.*/ +static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ +/* Maximum number of multicast addresses to filter (vs. rx-all-multicast). + Typical is a 64 element hash table based on the Ethernet CRC. */ +static const int multicast_filter_limit = 32; + +/* Set the copy breakpoint for the copy-only-tiny-frames scheme. + Setting to > 1518 effectively disables this feature. + This chip can receive into offset buffers, so the Alpha does not + need a copy-align. */ +static int rx_copybreak; +static int flowctrl=1; + +/* media[] specifies the media type the NIC operates at. + autosense Autosensing active media. + 10mbps_hd 10Mbps half duplex. + 10mbps_fd 10Mbps full duplex. + 100mbps_hd 100Mbps half duplex. + 100mbps_fd 100Mbps full duplex. + 0 Autosensing active media. + 1 10Mbps half duplex. + 2 10Mbps full duplex. + 3 100Mbps half duplex. + 4 100Mbps full duplex. +*/ +#define MAX_UNITS 8 +static char *media[MAX_UNITS]; + + +/* Operational parameters that are set at compile time. */ + +/* Keep the ring sizes a power of two for compile efficiency. + The compiler will convert '%'<2^N> into a bit mask. + Making the Tx ring too large decreases the effectiveness of channel + bonding and packet priority, and more than 128 requires modifying the + Tx error recovery. + Large receive rings merely waste memory. */ +#define TX_RING_SIZE 32 +#define TX_QUEUE_LEN (TX_RING_SIZE - 1) /* Limit ring entries actually used. */ +#define RX_RING_SIZE 64 +#define RX_BUDGET 32 +#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct netdev_desc) +#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct netdev_desc) + +/* Operational parameters that usually are not changed. */ +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (4*HZ) +#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ + +/* Include files, designed to support most kernel versions 2.0.0 and later. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* Processor type for cache alignment. */ +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Donald Becker "); +MODULE_DESCRIPTION("Sundance Alta Ethernet driver"); +MODULE_LICENSE("GPL"); + +module_param(debug, int, 0); +module_param(rx_copybreak, int, 0); +module_param_array(media, charp, NULL, 0); +module_param(flowctrl, int, 0); +MODULE_PARM_DESC(debug, "Sundance Alta debug level (0-5)"); +MODULE_PARM_DESC(rx_copybreak, "Sundance Alta copy breakpoint for copy-only-tiny-frames"); +MODULE_PARM_DESC(flowctrl, "Sundance Alta flow control [0|1]"); + +/* + Theory of Operation + +I. Board Compatibility + +This driver is designed for the Sundance Technologies "Alta" ST201 chip. + +II. Board-specific settings + +III. Driver operation + +IIIa. Ring buffers + +This driver uses two statically allocated fixed-size descriptor lists +formed into rings by a branch from the final descriptor to the beginning of +the list. The ring sizes are set at compile time by RX/TX_RING_SIZE. +Some chips explicitly use only 2^N sized rings, while others use a +'next descriptor' pointer that the driver forms into rings. + +IIIb/c. Transmit/Receive Structure + +This driver uses a zero-copy receive and transmit scheme. +The driver allocates full frame size skbuffs for the Rx ring buffers at +open() time and passes the skb->data field to the chip as receive data +buffers. When an incoming frame is less than RX_COPYBREAK bytes long, +a fresh skbuff is allocated and the frame is copied to the new skbuff. +When the incoming frame is larger, the skbuff is passed directly up the +protocol stack. Buffers consumed this way are replaced by newly allocated +skbuffs in a later phase of receives. + +The RX_COPYBREAK value is chosen to trade-off the memory wasted by +using a full-sized skbuff for small frames vs. the copying costs of larger +frames. New boards are typically used in generously configured machines +and the underfilled buffers have negligible impact compared to the benefit of +a single allocation size, so the default value of zero results in never +copying packets. When copying is done, the cost is usually mitigated by using +a combined copy/checksum routine. Copying also preloads the cache, which is +most useful with small frames. + +A subtle aspect of the operation is that the IP header at offset 14 in an +ethernet frame isn't longword aligned for further processing. +Unaligned buffers are permitted by the Sundance hardware, so +frames are received into the skbuff at an offset of "+2", 16-byte aligning +the IP header. + +IIId. Synchronization + +The driver runs as two independent, single-threaded flows of control. One +is the send-packet routine, which enforces single-threaded use by the +dev->tbusy flag. The other thread is the interrupt handler, which is single +threaded by the hardware and interrupt handling software. + +The send packet thread has partial control over the Tx ring and 'dev->tbusy' +flag. It sets the tbusy flag whenever it's queuing a Tx packet. If the next +queue slot is empty, it clears the tbusy flag when finished otherwise it sets +the 'lp->tx_full' flag. + +The interrupt handler has exclusive control over the Rx ring and records stats +from the Tx ring. After reaping the stats, it marks the Tx queue entry as +empty by incrementing the dirty_tx mark. Iff the 'lp->tx_full' flag is set, it +clears both the tx_full and tbusy flags. + +IV. Notes + +IVb. References + +The Sundance ST201 datasheet, preliminary version. +The Kendin KS8723 datasheet, preliminary version. +The ICplus IP100 datasheet, preliminary version. +http://www.scyld.com/expert/100mbps.html +http://www.scyld.com/expert/NWay.html + +IVc. Errata + +*/ + +/* Work-around for Kendin chip bugs. */ +#ifndef CONFIG_SUNDANCE_MMIO +#define USE_IO_OPS 1 +#endif + +static const struct pci_device_id sundance_pci_tbl[] = { + { 0x1186, 0x1002, 0x1186, 0x1002, 0, 0, 0 }, + { 0x1186, 0x1002, 0x1186, 0x1003, 0, 0, 1 }, + { 0x1186, 0x1002, 0x1186, 0x1012, 0, 0, 2 }, + { 0x1186, 0x1002, 0x1186, 0x1040, 0, 0, 3 }, + { 0x1186, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 }, + { 0x13F0, 0x0201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 }, + { 0x13F0, 0x0200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6 }, + { } +}; +MODULE_DEVICE_TABLE(pci, sundance_pci_tbl); + +enum { + netdev_io_size = 128 +}; + +struct pci_id_info { + const char *name; +}; +static const struct pci_id_info pci_id_tbl[] = { + {"D-Link DFE-550TX FAST Ethernet Adapter"}, + {"D-Link DFE-550FX 100Mbps Fiber-optics Adapter"}, + {"D-Link DFE-580TX 4 port Server Adapter"}, + {"D-Link DFE-530TXS FAST Ethernet Adapter"}, + {"D-Link DL10050-based FAST Ethernet Adapter"}, + {"Sundance Technology Alta"}, + {"IC Plus Corporation IP100A FAST Ethernet Adapter"}, + { } /* terminate list. */ +}; + +/* This driver was written to use PCI memory space, however x86-oriented + hardware often uses I/O space accesses. */ + +/* Offsets to the device registers. + Unlike software-only systems, device drivers interact with complex hardware. + It's not useful to define symbolic names for every register bit in the + device. The name can only partially document the semantics and make + the driver longer and more difficult to read. + In general, only the important configuration values or bits changed + multiple times should be defined symbolically. +*/ +enum alta_offsets { + DMACtrl = 0x00, + TxListPtr = 0x04, + TxDMABurstThresh = 0x08, + TxDMAUrgentThresh = 0x09, + TxDMAPollPeriod = 0x0a, + RxDMAStatus = 0x0c, + RxListPtr = 0x10, + DebugCtrl0 = 0x1a, + DebugCtrl1 = 0x1c, + RxDMABurstThresh = 0x14, + RxDMAUrgentThresh = 0x15, + RxDMAPollPeriod = 0x16, + LEDCtrl = 0x1a, + ASICCtrl = 0x30, + EEData = 0x34, + EECtrl = 0x36, + FlashAddr = 0x40, + FlashData = 0x44, + WakeEvent = 0x45, + TxStatus = 0x46, + TxFrameId = 0x47, + DownCounter = 0x18, + IntrClear = 0x4a, + IntrEnable = 0x4c, + IntrStatus = 0x4e, + MACCtrl0 = 0x50, + MACCtrl1 = 0x52, + StationAddr = 0x54, + MaxFrameSize = 0x5A, + RxMode = 0x5c, + MIICtrl = 0x5e, + MulticastFilter0 = 0x60, + MulticastFilter1 = 0x64, + RxOctetsLow = 0x68, + RxOctetsHigh = 0x6a, + TxOctetsLow = 0x6c, + TxOctetsHigh = 0x6e, + TxFramesOK = 0x70, + RxFramesOK = 0x72, + StatsCarrierError = 0x74, + StatsLateColl = 0x75, + StatsMultiColl = 0x76, + StatsOneColl = 0x77, + StatsTxDefer = 0x78, + RxMissed = 0x79, + StatsTxXSDefer = 0x7a, + StatsTxAbort = 0x7b, + StatsBcastTx = 0x7c, + StatsBcastRx = 0x7d, + StatsMcastTx = 0x7e, + StatsMcastRx = 0x7f, + /* Aliased and bogus values! */ + RxStatus = 0x0c, +}; + +#define ASIC_HI_WORD(x) ((x) + 2) + +enum ASICCtrl_HiWord_bit { + GlobalReset = 0x0001, + RxReset = 0x0002, + TxReset = 0x0004, + DMAReset = 0x0008, + FIFOReset = 0x0010, + NetworkReset = 0x0020, + HostReset = 0x0040, + ResetBusy = 0x0400, +}; + +/* Bits in the interrupt status/mask registers. */ +enum intr_status_bits { + IntrSummary=0x0001, IntrPCIErr=0x0002, IntrMACCtrl=0x0008, + IntrTxDone=0x0004, IntrRxDone=0x0010, IntrRxStart=0x0020, + IntrDrvRqst=0x0040, + StatsMax=0x0080, LinkChange=0x0100, + IntrTxDMADone=0x0200, IntrRxDMADone=0x0400, +}; + +/* Bits in the RxMode register. */ +enum rx_mode_bits { + AcceptAllIPMulti=0x20, AcceptMultiHash=0x10, AcceptAll=0x08, + AcceptBroadcast=0x04, AcceptMulticast=0x02, AcceptMyPhys=0x01, +}; +/* Bits in MACCtrl. */ +enum mac_ctrl0_bits { + EnbFullDuplex=0x20, EnbRcvLargeFrame=0x40, + EnbFlowCtrl=0x100, EnbPassRxCRC=0x200, +}; +enum mac_ctrl1_bits { + StatsEnable=0x0020, StatsDisable=0x0040, StatsEnabled=0x0080, + TxEnable=0x0100, TxDisable=0x0200, TxEnabled=0x0400, + RxEnable=0x0800, RxDisable=0x1000, RxEnabled=0x2000, +}; + +/* Bits in WakeEvent register. */ +enum wake_event_bits { + WakePktEnable = 0x01, + MagicPktEnable = 0x02, + LinkEventEnable = 0x04, + WolEnable = 0x80, +}; + +/* The Rx and Tx buffer descriptors. */ +/* Note that using only 32 bit fields simplifies conversion to big-endian + architectures. */ +struct netdev_desc { + __le32 next_desc; + __le32 status; + struct desc_frag { __le32 addr, length; } frag; +}; + +/* Bits in netdev_desc.status */ +enum desc_status_bits { + DescOwn=0x8000, + DescEndPacket=0x4000, + DescEndRing=0x2000, + LastFrag=0x80000000, + DescIntrOnTx=0x8000, + DescIntrOnDMADone=0x80000000, + DisableAlign = 0x00000001, +}; + +#define PRIV_ALIGN 15 /* Required alignment mask */ +/* Use __attribute__((aligned (L1_CACHE_BYTES))) to maintain alignment + within the structure. */ +#define MII_CNT 4 +struct netdev_private { + /* Descriptor rings first for alignment. */ + struct netdev_desc *rx_ring; + struct netdev_desc *tx_ring; + struct sk_buff* rx_skbuff[RX_RING_SIZE]; + struct sk_buff* tx_skbuff[TX_RING_SIZE]; + dma_addr_t tx_ring_dma; + dma_addr_t rx_ring_dma; + struct timer_list timer; /* Media monitoring timer. */ + struct net_device *ndev; /* backpointer */ + /* ethtool extra stats */ + struct { + u64 tx_multiple_collisions; + u64 tx_single_collisions; + u64 tx_late_collisions; + u64 tx_deferred; + u64 tx_deferred_excessive; + u64 tx_aborted; + u64 tx_bcasts; + u64 rx_bcasts; + u64 tx_mcasts; + u64 rx_mcasts; + } xstats; + /* Frequently used values: keep some adjacent for cache effect. */ + spinlock_t lock; + int msg_enable; + int chip_id; + unsigned int cur_rx, dirty_rx; /* Producer/consumer ring indices */ + unsigned int rx_buf_sz; /* Based on MTU+slack. */ + struct netdev_desc *last_tx; /* Last Tx descriptor used. */ + unsigned int cur_tx, dirty_tx; + /* These values are keep track of the transceiver/media in use. */ + unsigned int flowctrl:1; + unsigned int default_port:4; /* Last dev->if_port value. */ + unsigned int an_enable:1; + unsigned int speed; + unsigned int wol_enabled:1; /* Wake on LAN enabled */ + struct tasklet_struct rx_tasklet; + struct tasklet_struct tx_tasklet; + int budget; + int cur_task; + /* Multicast and receive mode. */ + spinlock_t mcastlock; /* SMP lock multicast updates. */ + u16 mcast_filter[4]; + /* MII transceiver section. */ + struct mii_if_info mii_if; + int mii_preamble_required; + unsigned char phys[MII_CNT]; /* MII device addresses, only first one used. */ + struct pci_dev *pci_dev; + void __iomem *base; + spinlock_t statlock; +}; + +/* The station address location in the EEPROM. */ +#define EEPROM_SA_OFFSET 0x10 +#define DEFAULT_INTR (IntrRxDMADone | IntrPCIErr | \ + IntrDrvRqst | IntrTxDone | StatsMax | \ + LinkChange) + +static int change_mtu(struct net_device *dev, int new_mtu); +static int eeprom_read(void __iomem *ioaddr, int location); +static int mdio_read(struct net_device *dev, int phy_id, int location); +static void mdio_write(struct net_device *dev, int phy_id, int location, int value); +static int mdio_wait_link(struct net_device *dev, int wait); +static int netdev_open(struct net_device *dev); +static void check_duplex(struct net_device *dev); +static void netdev_timer(struct timer_list *t); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); +static void init_ring(struct net_device *dev); +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); +static int reset_tx (struct net_device *dev); +static irqreturn_t intr_handler(int irq, void *dev_instance); +static void rx_poll(struct tasklet_struct *t); +static void tx_poll(struct tasklet_struct *t); +static void refill_rx (struct net_device *dev); +static void netdev_error(struct net_device *dev, int intr_status); +static void netdev_error(struct net_device *dev, int intr_status); +static void set_rx_mode(struct net_device *dev); +static int __set_mac_addr(struct net_device *dev); +static int sundance_set_mac_addr(struct net_device *dev, void *data); +static struct net_device_stats *get_stats(struct net_device *dev); +static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int netdev_close(struct net_device *dev); +static const struct ethtool_ops ethtool_ops; + +static void sundance_reset(struct net_device *dev, unsigned long reset_cmd) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base + ASICCtrl; + int countdown; + + /* ST201 documentation states ASICCtrl is a 32bit register */ + iowrite32 (reset_cmd | ioread32 (ioaddr), ioaddr); + /* ST201 documentation states reset can take up to 1 ms */ + countdown = 10 + 1; + while (ioread32 (ioaddr) & (ResetBusy << 16)) { + if (--countdown == 0) { + printk(KERN_WARNING "%s : reset not completed !!\n", dev->name); + break; + } + udelay(100); + } +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void sundance_poll_controller(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + + disable_irq(np->pci_dev->irq); + intr_handler(np->pci_dev->irq, dev); + enable_irq(np->pci_dev->irq); +} +#endif + +static const struct net_device_ops netdev_ops = { + .ndo_open = netdev_open, + .ndo_stop = netdev_close, + .ndo_start_xmit = start_tx, + .ndo_get_stats = get_stats, + .ndo_set_rx_mode = set_rx_mode, + .ndo_eth_ioctl = netdev_ioctl, + .ndo_tx_timeout = tx_timeout, + .ndo_change_mtu = change_mtu, + .ndo_set_mac_address = sundance_set_mac_addr, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = sundance_poll_controller, +#endif +}; + +static int sundance_probe1(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct net_device *dev; + struct netdev_private *np; + static int card_idx; + int chip_idx = ent->driver_data; + int irq; + int i; + void __iomem *ioaddr; + u16 mii_ctl; + void *ring_space; + dma_addr_t ring_dma; +#ifdef USE_IO_OPS + int bar = 0; +#else + int bar = 1; +#endif + int phy, phy_end, phy_idx = 0; + __le16 addr[ETH_ALEN / 2]; + + if (pci_enable_device(pdev)) + return -EIO; + pci_set_master(pdev); + + irq = pdev->irq; + + dev = alloc_etherdev(sizeof(*np)); + if (!dev) + return -ENOMEM; + SET_NETDEV_DEV(dev, &pdev->dev); + + if (pci_request_regions(pdev, DRV_NAME)) + goto err_out_netdev; + + ioaddr = pci_iomap(pdev, bar, netdev_io_size); + if (!ioaddr) + goto err_out_res; + + for (i = 0; i < 3; i++) + addr[i] = + cpu_to_le16(eeprom_read(ioaddr, i + EEPROM_SA_OFFSET)); + eth_hw_addr_set(dev, (u8 *)addr); + + np = netdev_priv(dev); + np->ndev = dev; + np->base = ioaddr; + np->pci_dev = pdev; + np->chip_id = chip_idx; + np->msg_enable = (1 << debug) - 1; + spin_lock_init(&np->lock); + spin_lock_init(&np->statlock); + tasklet_setup(&np->rx_tasklet, rx_poll); + tasklet_setup(&np->tx_tasklet, tx_poll); + + ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, + &ring_dma, GFP_KERNEL); + if (!ring_space) + goto err_out_cleardev; + np->tx_ring = (struct netdev_desc *)ring_space; + np->tx_ring_dma = ring_dma; + + ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, + &ring_dma, GFP_KERNEL); + if (!ring_space) + goto err_out_unmap_tx; + np->rx_ring = (struct netdev_desc *)ring_space; + np->rx_ring_dma = ring_dma; + + np->mii_if.dev = dev; + np->mii_if.mdio_read = mdio_read; + np->mii_if.mdio_write = mdio_write; + np->mii_if.phy_id_mask = 0x1f; + np->mii_if.reg_num_mask = 0x1f; + + /* The chip-specific entries in the device structure. */ + dev->netdev_ops = &netdev_ops; + dev->ethtool_ops = ðtool_ops; + dev->watchdog_timeo = TX_TIMEOUT; + + /* MTU range: 68 - 8191 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 8191; + + pci_set_drvdata(pdev, dev); + + i = register_netdev(dev); + if (i) + goto err_out_unmap_rx; + + printk(KERN_INFO "%s: %s at %p, %pM, IRQ %d.\n", + dev->name, pci_id_tbl[chip_idx].name, ioaddr, + dev->dev_addr, irq); + + np->phys[0] = 1; /* Default setting */ + np->mii_preamble_required++; + + /* + * It seems some phys doesn't deal well with address 0 being accessed + * first + */ + if (sundance_pci_tbl[np->chip_id].device == 0x0200) { + phy = 0; + phy_end = 31; + } else { + phy = 1; + phy_end = 32; /* wraps to zero, due to 'phy & 0x1f' */ + } + for (; phy <= phy_end && phy_idx < MII_CNT; phy++) { + int phyx = phy & 0x1f; + int mii_status = mdio_read(dev, phyx, MII_BMSR); + if (mii_status != 0xffff && mii_status != 0x0000) { + np->phys[phy_idx++] = phyx; + np->mii_if.advertising = mdio_read(dev, phyx, MII_ADVERTISE); + if ((mii_status & 0x0040) == 0) + np->mii_preamble_required++; + printk(KERN_INFO "%s: MII PHY found at address %d, status " + "0x%4.4x advertising %4.4x.\n", + dev->name, phyx, mii_status, np->mii_if.advertising); + } + } + np->mii_preamble_required--; + + if (phy_idx == 0) { + printk(KERN_INFO "%s: No MII transceiver found, aborting. ASIC status %x\n", + dev->name, ioread32(ioaddr + ASICCtrl)); + goto err_out_unregister; + } + + np->mii_if.phy_id = np->phys[0]; + + /* Parse override configuration */ + np->an_enable = 1; + if (card_idx < MAX_UNITS) { + if (media[card_idx] != NULL) { + np->an_enable = 0; + if (strcmp (media[card_idx], "100mbps_fd") == 0 || + strcmp (media[card_idx], "4") == 0) { + np->speed = 100; + np->mii_if.full_duplex = 1; + } else if (strcmp (media[card_idx], "100mbps_hd") == 0 || + strcmp (media[card_idx], "3") == 0) { + np->speed = 100; + np->mii_if.full_duplex = 0; + } else if (strcmp (media[card_idx], "10mbps_fd") == 0 || + strcmp (media[card_idx], "2") == 0) { + np->speed = 10; + np->mii_if.full_duplex = 1; + } else if (strcmp (media[card_idx], "10mbps_hd") == 0 || + strcmp (media[card_idx], "1") == 0) { + np->speed = 10; + np->mii_if.full_duplex = 0; + } else { + np->an_enable = 1; + } + } + if (flowctrl == 1) + np->flowctrl = 1; + } + + /* Fibre PHY? */ + if (ioread32 (ioaddr + ASICCtrl) & 0x80) { + /* Default 100Mbps Full */ + if (np->an_enable) { + np->speed = 100; + np->mii_if.full_duplex = 1; + np->an_enable = 0; + } + } + /* Reset PHY */ + mdio_write (dev, np->phys[0], MII_BMCR, BMCR_RESET); + mdelay (300); + /* If flow control enabled, we need to advertise it.*/ + if (np->flowctrl) + mdio_write (dev, np->phys[0], MII_ADVERTISE, np->mii_if.advertising | 0x0400); + mdio_write (dev, np->phys[0], MII_BMCR, BMCR_ANENABLE|BMCR_ANRESTART); + /* Force media type */ + if (!np->an_enable) { + mii_ctl = 0; + mii_ctl |= (np->speed == 100) ? BMCR_SPEED100 : 0; + mii_ctl |= (np->mii_if.full_duplex) ? BMCR_FULLDPLX : 0; + mdio_write (dev, np->phys[0], MII_BMCR, mii_ctl); + printk (KERN_INFO "Override speed=%d, %s duplex\n", + np->speed, np->mii_if.full_duplex ? "Full" : "Half"); + + } + + /* Perhaps move the reset here? */ + /* Reset the chip to erase previous misconfiguration. */ + if (netif_msg_hw(np)) + printk("ASIC Control is %x.\n", ioread32(ioaddr + ASICCtrl)); + sundance_reset(dev, 0x00ff << 16); + if (netif_msg_hw(np)) + printk("ASIC Control is now %x.\n", ioread32(ioaddr + ASICCtrl)); + + card_idx++; + return 0; + +err_out_unregister: + unregister_netdev(dev); +err_out_unmap_rx: + dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, + np->rx_ring, np->rx_ring_dma); +err_out_unmap_tx: + dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, + np->tx_ring, np->tx_ring_dma); +err_out_cleardev: + pci_iounmap(pdev, ioaddr); +err_out_res: + pci_release_regions(pdev); +err_out_netdev: + free_netdev (dev); + return -ENODEV; +} + +static int change_mtu(struct net_device *dev, int new_mtu) +{ + if (netif_running(dev)) + return -EBUSY; + WRITE_ONCE(dev->mtu, new_mtu); + return 0; +} + +#define eeprom_delay(ee_addr) ioread32(ee_addr) +/* Read the EEPROM and MII Management Data I/O (MDIO) interfaces. */ +static int eeprom_read(void __iomem *ioaddr, int location) +{ + int boguscnt = 10000; /* Typical 1900 ticks. */ + iowrite16(0x0200 | (location & 0xff), ioaddr + EECtrl); + do { + eeprom_delay(ioaddr + EECtrl); + if (! (ioread16(ioaddr + EECtrl) & 0x8000)) { + return ioread16(ioaddr + EEData); + } + } while (--boguscnt > 0); + return 0; +} + +/* MII transceiver control section. + Read and write the MII registers using software-generated serial + MDIO protocol. See the MII specifications or DP83840A data sheet + for details. + + The maximum data clock rate is 2.5 Mhz. The minimum timing is usually + met by back-to-back 33Mhz PCI cycles. */ +#define mdio_delay() ioread8(mdio_addr) + +enum mii_reg_bits { + MDIO_ShiftClk=0x0001, MDIO_Data=0x0002, MDIO_EnbOutput=0x0004, +}; +#define MDIO_EnbIn (0) +#define MDIO_WRITE0 (MDIO_EnbOutput) +#define MDIO_WRITE1 (MDIO_Data | MDIO_EnbOutput) + +/* Generate the preamble required for initial synchronization and + a few older transceivers. */ +static void mdio_sync(void __iomem *mdio_addr) +{ + int bits = 32; + + /* Establish sync by sending at least 32 logic ones. */ + while (--bits >= 0) { + iowrite8(MDIO_WRITE1, mdio_addr); + mdio_delay(); + iowrite8(MDIO_WRITE1 | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } +} + +static int mdio_read(struct net_device *dev, int phy_id, int location) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *mdio_addr = np->base + MIICtrl; + int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location; + int i, retval = 0; + + if (np->mii_preamble_required) + mdio_sync(mdio_addr); + + /* Shift the read command bits out. */ + for (i = 15; i >= 0; i--) { + int dataval = (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0; + + iowrite8(dataval, mdio_addr); + mdio_delay(); + iowrite8(dataval | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + /* Read the two transition, 16 data, and wire-idle bits. */ + for (i = 19; i > 0; i--) { + iowrite8(MDIO_EnbIn, mdio_addr); + mdio_delay(); + retval = (retval << 1) | ((ioread8(mdio_addr) & MDIO_Data) ? 1 : 0); + iowrite8(MDIO_EnbIn | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + return (retval>>1) & 0xffff; +} + +static void mdio_write(struct net_device *dev, int phy_id, int location, int value) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *mdio_addr = np->base + MIICtrl; + int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location<<18) | value; + int i; + + if (np->mii_preamble_required) + mdio_sync(mdio_addr); + + /* Shift the command bits out. */ + for (i = 31; i >= 0; i--) { + int dataval = (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0; + + iowrite8(dataval, mdio_addr); + mdio_delay(); + iowrite8(dataval | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + /* Clear out extra bits. */ + for (i = 2; i > 0; i--) { + iowrite8(MDIO_EnbIn, mdio_addr); + mdio_delay(); + iowrite8(MDIO_EnbIn | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } +} + +static int mdio_wait_link(struct net_device *dev, int wait) +{ + int bmsr; + int phy_id; + struct netdev_private *np; + + np = netdev_priv(dev); + phy_id = np->phys[0]; + + do { + bmsr = mdio_read(dev, phy_id, MII_BMSR); + if (bmsr & 0x0004) + return 0; + mdelay(1); + } while (--wait > 0); + return -1; +} + +static int netdev_open(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + const int irq = np->pci_dev->irq; + unsigned long flags; + int i; + + sundance_reset(dev, 0x00ff << 16); + + i = request_irq(irq, intr_handler, IRQF_SHARED, dev->name, dev); + if (i) + return i; + + if (netif_msg_ifup(np)) + printk(KERN_DEBUG "%s: netdev_open() irq %d\n", dev->name, irq); + + init_ring(dev); + + iowrite32(np->rx_ring_dma, ioaddr + RxListPtr); + /* The Tx list pointer is written as packets are queued. */ + + /* Initialize other registers. */ + __set_mac_addr(dev); +#if IS_ENABLED(CONFIG_VLAN_8021Q) + iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize); +#else + iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize); +#endif + if (dev->mtu > 2047) + iowrite32(ioread32(ioaddr + ASICCtrl) | 0x0C, ioaddr + ASICCtrl); + + /* Configure the PCI bus bursts and FIFO thresholds. */ + + if (dev->if_port == 0) + dev->if_port = np->default_port; + + spin_lock_init(&np->mcastlock); + + set_rx_mode(dev); + iowrite16(0, ioaddr + IntrEnable); + iowrite16(0, ioaddr + DownCounter); + /* Set the chip to poll every N*320nsec. */ + iowrite8(100, ioaddr + RxDMAPollPeriod); + iowrite8(127, ioaddr + TxDMAPollPeriod); + /* Fix DFE-580TX packet drop issue */ + if (np->pci_dev->revision >= 0x14) + iowrite8(0x01, ioaddr + DebugCtrl1); + netif_start_queue(dev); + + spin_lock_irqsave(&np->lock, flags); + reset_tx(dev); + spin_unlock_irqrestore(&np->lock, flags); + + iowrite16 (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1); + + /* Disable Wol */ + iowrite8(ioread8(ioaddr + WakeEvent) | 0x00, ioaddr + WakeEvent); + np->wol_enabled = 0; + + if (netif_msg_ifup(np)) + printk(KERN_DEBUG "%s: Done netdev_open(), status: Rx %x Tx %x " + "MAC Control %x, %4.4x %4.4x.\n", + dev->name, ioread32(ioaddr + RxStatus), ioread8(ioaddr + TxStatus), + ioread32(ioaddr + MACCtrl0), + ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); + + /* Set the timer to check for link beat. */ + timer_setup(&np->timer, netdev_timer, 0); + np->timer.expires = jiffies + 3*HZ; + add_timer(&np->timer); + + /* Enable interrupts by setting the interrupt mask. */ + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + + return 0; +} + +static void check_duplex(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + int mii_lpa = mdio_read(dev, np->phys[0], MII_LPA); + int negotiated = mii_lpa & np->mii_if.advertising; + int duplex; + + /* Force media */ + if (!np->an_enable || mii_lpa == 0xffff) { + if (np->mii_if.full_duplex) + iowrite16 (ioread16 (ioaddr + MACCtrl0) | EnbFullDuplex, + ioaddr + MACCtrl0); + return; + } + + /* Autonegotiation */ + duplex = (negotiated & 0x0100) || (negotiated & 0x01C0) == 0x0040; + if (np->mii_if.full_duplex != duplex) { + np->mii_if.full_duplex = duplex; + if (netif_msg_link(np)) + printk(KERN_INFO "%s: Setting %s-duplex based on MII #%d " + "negotiated capability %4.4x.\n", dev->name, + duplex ? "full" : "half", np->phys[0], negotiated); + iowrite16(ioread16(ioaddr + MACCtrl0) | (duplex ? 0x20 : 0), ioaddr + MACCtrl0); + } +} + +static void netdev_timer(struct timer_list *t) +{ + struct netdev_private *np = timer_container_of(np, t, timer); + struct net_device *dev = np->mii_if.dev; + void __iomem *ioaddr = np->base; + int next_tick = 10*HZ; + + if (netif_msg_timer(np)) { + printk(KERN_DEBUG "%s: Media selection timer tick, intr status %4.4x, " + "Tx %x Rx %x.\n", + dev->name, ioread16(ioaddr + IntrEnable), + ioread8(ioaddr + TxStatus), ioread32(ioaddr + RxStatus)); + } + check_duplex(dev); + np->timer.expires = jiffies + next_tick; + add_timer(&np->timer); +} + +static void tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + unsigned long flag; + + netif_stop_queue(dev); + tasklet_disable_in_atomic(&np->tx_tasklet); + iowrite16(0, ioaddr + IntrEnable); + printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x " + "TxFrameId %2.2x," + " resetting...\n", dev->name, ioread8(ioaddr + TxStatus), + ioread8(ioaddr + TxFrameId)); + + { + int i; + for (i=0; itx_ring_dma + i*sizeof(*np->tx_ring)), + le32_to_cpu(np->tx_ring[i].next_desc), + le32_to_cpu(np->tx_ring[i].status), + (le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff, + le32_to_cpu(np->tx_ring[i].frag.addr), + le32_to_cpu(np->tx_ring[i].frag.length)); + } + printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n", + ioread32(np->base + TxListPtr), + netif_queue_stopped(dev)); + printk(KERN_DEBUG "cur_tx=%d(%02x) dirty_tx=%d(%02x)\n", + np->cur_tx, np->cur_tx % TX_RING_SIZE, + np->dirty_tx, np->dirty_tx % TX_RING_SIZE); + printk(KERN_DEBUG "cur_rx=%d dirty_rx=%d\n", np->cur_rx, np->dirty_rx); + printk(KERN_DEBUG "cur_task=%d\n", np->cur_task); + } + spin_lock_irqsave(&np->lock, flag); + + /* Stop and restart the chip's Tx processes . */ + reset_tx(dev); + spin_unlock_irqrestore(&np->lock, flag); + + dev->if_port = 0; + + netif_trans_update(dev); /* prevent tx timeout */ + dev->stats.tx_errors++; + if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) { + netif_wake_queue(dev); + } + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + tasklet_enable(&np->tx_tasklet); +} + + +/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ +static void init_ring(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int i; + + np->cur_rx = np->cur_tx = 0; + np->dirty_rx = np->dirty_tx = 0; + np->cur_task = 0; + + np->rx_buf_sz = (dev->mtu <= 1520 ? PKT_BUF_SZ : dev->mtu + 16); + + /* Initialize all Rx descriptors. */ + for (i = 0; i < RX_RING_SIZE; i++) { + np->rx_ring[i].next_desc = cpu_to_le32(np->rx_ring_dma + + ((i+1)%RX_RING_SIZE)*sizeof(*np->rx_ring)); + np->rx_ring[i].status = 0; + np->rx_ring[i].frag.length = 0; + np->rx_skbuff[i] = NULL; + } + + /* Fill in the Rx buffers. Handle allocation failure gracefully. */ + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *skb = + netdev_alloc_skb(dev, np->rx_buf_sz + 2); + np->rx_skbuff[i] = skb; + if (skb == NULL) + break; + skb_reserve(skb, 2); /* 16 byte align the IP header. */ + np->rx_ring[i].frag.addr = cpu_to_le32( + dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE)); + if (dma_mapping_error(&np->pci_dev->dev, + np->rx_ring[i].frag.addr)) { + dev_kfree_skb(skb); + np->rx_skbuff[i] = NULL; + break; + } + np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); + } + np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); + + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_skbuff[i] = NULL; + np->tx_ring[i].status = 0; + } +} + +static void tx_poll(struct tasklet_struct *t) +{ + struct netdev_private *np = from_tasklet(np, t, tx_tasklet); + unsigned head = np->cur_task % TX_RING_SIZE; + struct netdev_desc *txdesc = + &np->tx_ring[(np->cur_tx - 1) % TX_RING_SIZE]; + + /* Chain the next pointer */ + for (; np->cur_tx - np->cur_task > 0; np->cur_task++) { + int entry = np->cur_task % TX_RING_SIZE; + txdesc = &np->tx_ring[entry]; + if (np->last_tx) { + np->last_tx->next_desc = cpu_to_le32(np->tx_ring_dma + + entry*sizeof(struct netdev_desc)); + } + np->last_tx = txdesc; + } + /* Indicate the latest descriptor of tx ring */ + txdesc->status |= cpu_to_le32(DescIntrOnTx); + + if (ioread32 (np->base + TxListPtr) == 0) + iowrite32 (np->tx_ring_dma + head * sizeof(struct netdev_desc), + np->base + TxListPtr); +} + +static netdev_tx_t +start_tx (struct sk_buff *skb, struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + struct netdev_desc *txdesc; + unsigned entry; + + /* Calculate the next Tx descriptor entry. */ + entry = np->cur_tx % TX_RING_SIZE; + np->tx_skbuff[entry] = skb; + txdesc = &np->tx_ring[entry]; + + txdesc->next_desc = 0; + txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign); + txdesc->frag.addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev, + skb->data, skb->len, DMA_TO_DEVICE)); + if (dma_mapping_error(&np->pci_dev->dev, + txdesc->frag.addr)) + goto drop_frame; + txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag); + + /* Increment cur_tx before tasklet_schedule() */ + np->cur_tx++; + mb(); + /* Schedule a tx_poll() task */ + tasklet_schedule(&np->tx_tasklet); + + /* On some architectures: explicitly flush cache lines here. */ + if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 1 && + !netif_queue_stopped(dev)) { + /* do nothing */ + } else { + netif_stop_queue (dev); + } + if (netif_msg_tx_queued(np)) { + printk (KERN_DEBUG + "%s: Transmit frame #%d queued in slot %d.\n", + dev->name, np->cur_tx, entry); + } + return NETDEV_TX_OK; + +drop_frame: + dev_kfree_skb_any(skb); + np->tx_skbuff[entry] = NULL; + dev->stats.tx_dropped++; + return NETDEV_TX_OK; +} + +/* Reset hardware tx and free all of tx buffers */ +static int +reset_tx (struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + struct sk_buff *skb; + int i; + + /* Reset tx logic, TxListPtr will be cleaned */ + iowrite16 (TxDisable, ioaddr + MACCtrl1); + sundance_reset(dev, (NetworkReset|FIFOReset|DMAReset|TxReset) << 16); + + /* free all tx skbuff */ + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_ring[i].next_desc = 0; + + skb = np->tx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[i].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + np->tx_skbuff[i] = NULL; + dev->stats.tx_dropped++; + } + } + np->cur_tx = np->dirty_tx = 0; + np->cur_task = 0; + + np->last_tx = NULL; + iowrite8(127, ioaddr + TxDMAPollPeriod); + + iowrite16 (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1); + return 0; +} + +/* The interrupt handler cleans up after the Tx thread, + and schedule a Rx thread work */ +static irqreturn_t intr_handler(int irq, void *dev_instance) +{ + struct net_device *dev = (struct net_device *)dev_instance; + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + int hw_frame_id; + int tx_cnt; + int tx_status; + int handled = 0; + int i; + + do { + int intr_status = ioread16(ioaddr + IntrStatus); + iowrite16(intr_status, ioaddr + IntrStatus); + + if (netif_msg_intr(np)) + printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n", + dev->name, intr_status); + + if (!(intr_status & DEFAULT_INTR)) + break; + + handled = 1; + + if (intr_status & (IntrRxDMADone)) { + iowrite16(DEFAULT_INTR & ~(IntrRxDone|IntrRxDMADone), + ioaddr + IntrEnable); + if (np->budget < 0) + np->budget = RX_BUDGET; + tasklet_schedule(&np->rx_tasklet); + } + if (intr_status & (IntrTxDone | IntrDrvRqst)) { + tx_status = ioread16 (ioaddr + TxStatus); + for (tx_cnt=32; tx_status & 0x80; --tx_cnt) { + if (netif_msg_tx_done(np)) + printk + ("%s: Transmit status is %2.2x.\n", + dev->name, tx_status); + if (tx_status & 0x1e) { + if (netif_msg_tx_err(np)) + printk("%s: Transmit error status %4.4x.\n", + dev->name, tx_status); + dev->stats.tx_errors++; + if (tx_status & 0x10) + dev->stats.tx_fifo_errors++; + if (tx_status & 0x08) + dev->stats.collisions++; + if (tx_status & 0x04) + dev->stats.tx_fifo_errors++; + if (tx_status & 0x02) + dev->stats.tx_window_errors++; + + /* + ** This reset has been verified on + ** DFE-580TX boards ! phdm@macqel.be. + */ + if (tx_status & 0x10) { /* TxUnderrun */ + /* Restart Tx FIFO and transmitter */ + sundance_reset(dev, (NetworkReset|FIFOReset|TxReset) << 16); + /* No need to reset the Tx pointer here */ + } + /* Restart the Tx. Need to make sure tx enabled */ + i = 10; + do { + iowrite16(ioread16(ioaddr + MACCtrl1) | TxEnable, ioaddr + MACCtrl1); + if (ioread16(ioaddr + MACCtrl1) & TxEnabled) + break; + mdelay(1); + } while (--i); + } + /* Yup, this is a documentation bug. It cost me *hours*. */ + iowrite16 (0, ioaddr + TxStatus); + if (tx_cnt < 0) { + iowrite32(5000, ioaddr + DownCounter); + break; + } + tx_status = ioread16 (ioaddr + TxStatus); + } + hw_frame_id = (tx_status >> 8) & 0xff; + } else { + hw_frame_id = ioread8(ioaddr + TxFrameId); + } + + if (np->pci_dev->revision >= 0x14) { + spin_lock(&np->lock); + for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) { + int entry = np->dirty_tx % TX_RING_SIZE; + struct sk_buff *skb; + int sw_frame_id; + sw_frame_id = (le32_to_cpu( + np->tx_ring[entry].status) >> 2) & 0xff; + if (sw_frame_id == hw_frame_id && + !(le32_to_cpu(np->tx_ring[entry].status) + & 0x00010000)) + break; + if (sw_frame_id == (hw_frame_id + 1) % + TX_RING_SIZE) + break; + skb = np->tx_skbuff[entry]; + /* Free the original skb. */ + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[entry].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_consume_skb_irq(np->tx_skbuff[entry]); + np->tx_skbuff[entry] = NULL; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; + } + spin_unlock(&np->lock); + } else { + spin_lock(&np->lock); + for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) { + int entry = np->dirty_tx % TX_RING_SIZE; + struct sk_buff *skb; + if (!(le32_to_cpu(np->tx_ring[entry].status) + & 0x00010000)) + break; + skb = np->tx_skbuff[entry]; + /* Free the original skb. */ + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[entry].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_consume_skb_irq(np->tx_skbuff[entry]); + np->tx_skbuff[entry] = NULL; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; + } + spin_unlock(&np->lock); + } + + if (netif_queue_stopped(dev) && + np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) { + /* The ring is no longer full, clear busy flag. */ + netif_wake_queue (dev); + } + /* Abnormal error summary/uncommon events handlers. */ + if (intr_status & (IntrPCIErr | LinkChange | StatsMax)) + netdev_error(dev, intr_status); + } while (0); + if (netif_msg_intr(np)) + printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n", + dev->name, ioread16(ioaddr + IntrStatus)); + return IRQ_RETVAL(handled); +} + +static void rx_poll(struct tasklet_struct *t) +{ + struct netdev_private *np = from_tasklet(np, t, rx_tasklet); + struct net_device *dev = np->ndev; + int entry = np->cur_rx % RX_RING_SIZE; + int boguscnt = np->budget; + void __iomem *ioaddr = np->base; + int received = 0; + + /* If EOP is set on the next entry, it's a new packet. Send it up. */ + while (1) { + struct netdev_desc *desc = &(np->rx_ring[entry]); + u32 frame_status = le32_to_cpu(desc->status); + int pkt_len; + + if (--boguscnt < 0) { + goto not_done; + } + if (!(frame_status & DescOwn)) + break; + pkt_len = frame_status & 0x1fff; /* Chip omits the CRC. */ + if (netif_msg_rx_status(np)) + printk(KERN_DEBUG " netdev_rx() status was %8.8x.\n", + frame_status); + if (frame_status & 0x001f4000) { + /* There was a error. */ + if (netif_msg_rx_err(np)) + printk(KERN_DEBUG " netdev_rx() Rx error was %8.8x.\n", + frame_status); + dev->stats.rx_errors++; + if (frame_status & 0x00100000) + dev->stats.rx_length_errors++; + if (frame_status & 0x00010000) + dev->stats.rx_fifo_errors++; + if (frame_status & 0x00060000) + dev->stats.rx_frame_errors++; + if (frame_status & 0x00080000) + dev->stats.rx_crc_errors++; + if (frame_status & 0x00100000) { + printk(KERN_WARNING "%s: Oversized Ethernet frame," + " status %8.8x.\n", + dev->name, frame_status); + } + } else { + struct sk_buff *skb; +#ifndef final_version + if (netif_msg_rx_status(np)) + printk(KERN_DEBUG " netdev_rx() normal Rx pkt length %d" + ", bogus_cnt %d.\n", + pkt_len, boguscnt); +#endif + /* Check if the packet is long enough to accept without copying + to a minimally-sized skbuff. */ + if (pkt_len < rx_copybreak && + (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { + skb_reserve(skb, 2); /* 16 byte align the IP header */ + dma_sync_single_for_cpu(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); + dma_sync_single_for_device(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_put(skb, pkt_len); + } else { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_put(skb = np->rx_skbuff[entry], pkt_len); + np->rx_skbuff[entry] = NULL; + } + skb->protocol = eth_type_trans(skb, dev); + /* Note: checksum -> skb->ip_summed = CHECKSUM_UNNECESSARY; */ + netif_rx(skb); + } + entry = (entry + 1) % RX_RING_SIZE; + received++; + } + np->cur_rx = entry; + refill_rx (dev); + np->budget -= received; + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + return; + +not_done: + np->cur_rx = entry; + refill_rx (dev); + if (!received) + received = 1; + np->budget -= received; + if (np->budget <= 0) + np->budget = RX_BUDGET; + tasklet_schedule(&np->rx_tasklet); +} + +static void refill_rx (struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int entry; + + /* Refill the Rx ring buffers. */ + for (;(np->cur_rx - np->dirty_rx + RX_RING_SIZE) % RX_RING_SIZE > 0; + np->dirty_rx = (np->dirty_rx + 1) % RX_RING_SIZE) { + struct sk_buff *skb; + entry = np->dirty_rx % RX_RING_SIZE; + if (np->rx_skbuff[entry] == NULL) { + skb = netdev_alloc_skb(dev, np->rx_buf_sz + 2); + np->rx_skbuff[entry] = skb; + if (skb == NULL) + break; /* Better luck next round. */ + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + np->rx_ring[entry].frag.addr = cpu_to_le32( + dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE)); + if (dma_mapping_error(&np->pci_dev->dev, + np->rx_ring[entry].frag.addr)) { + dev_kfree_skb_irq(skb); + np->rx_skbuff[entry] = NULL; + break; + } + } + /* Perhaps we need not reset this field. */ + np->rx_ring[entry].frag.length = + cpu_to_le32(np->rx_buf_sz | LastFrag); + np->rx_ring[entry].status = 0; + } +} +static void netdev_error(struct net_device *dev, int intr_status) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u16 mii_ctl, mii_advertise, mii_lpa; + int speed; + + if (intr_status & LinkChange) { + if (mdio_wait_link(dev, 10) == 0) { + printk(KERN_INFO "%s: Link up\n", dev->name); + if (np->an_enable) { + mii_advertise = mdio_read(dev, np->phys[0], + MII_ADVERTISE); + mii_lpa = mdio_read(dev, np->phys[0], MII_LPA); + mii_advertise &= mii_lpa; + printk(KERN_INFO "%s: Link changed: ", + dev->name); + if (mii_advertise & ADVERTISE_100FULL) { + np->speed = 100; + printk("100Mbps, full duplex\n"); + } else if (mii_advertise & ADVERTISE_100HALF) { + np->speed = 100; + printk("100Mbps, half duplex\n"); + } else if (mii_advertise & ADVERTISE_10FULL) { + np->speed = 10; + printk("10Mbps, full duplex\n"); + } else if (mii_advertise & ADVERTISE_10HALF) { + np->speed = 10; + printk("10Mbps, half duplex\n"); + } else + printk("\n"); + + } else { + mii_ctl = mdio_read(dev, np->phys[0], MII_BMCR); + speed = (mii_ctl & BMCR_SPEED100) ? 100 : 10; + np->speed = speed; + printk(KERN_INFO "%s: Link changed: %dMbps ,", + dev->name, speed); + printk("%s duplex.\n", + (mii_ctl & BMCR_FULLDPLX) ? + "full" : "half"); + } + check_duplex(dev); + if (np->flowctrl && np->mii_if.full_duplex) { + iowrite16(ioread16(ioaddr + MulticastFilter1+2) | 0x0200, + ioaddr + MulticastFilter1+2); + iowrite16(ioread16(ioaddr + MACCtrl0) | EnbFlowCtrl, + ioaddr + MACCtrl0); + } + netif_carrier_on(dev); + } else { + printk(KERN_INFO "%s: Link down\n", dev->name); + netif_carrier_off(dev); + } + } + if (intr_status & StatsMax) { + get_stats(dev); + } + if (intr_status & IntrPCIErr) { + printk(KERN_ERR "%s: Something Wicked happened! %4.4x.\n", + dev->name, intr_status); + /* We must do a global reset of DMA to continue. */ + } +} + +static struct net_device_stats *get_stats(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + unsigned long flags; + u8 late_coll, single_coll, mult_coll; + + spin_lock_irqsave(&np->statlock, flags); + /* The chip only need report frame silently dropped. */ + dev->stats.rx_missed_errors += ioread8(ioaddr + RxMissed); + dev->stats.tx_packets += ioread16(ioaddr + TxFramesOK); + dev->stats.rx_packets += ioread16(ioaddr + RxFramesOK); + dev->stats.tx_carrier_errors += ioread8(ioaddr + StatsCarrierError); + + mult_coll = ioread8(ioaddr + StatsMultiColl); + np->xstats.tx_multiple_collisions += mult_coll; + single_coll = ioread8(ioaddr + StatsOneColl); + np->xstats.tx_single_collisions += single_coll; + late_coll = ioread8(ioaddr + StatsLateColl); + np->xstats.tx_late_collisions += late_coll; + dev->stats.collisions += mult_coll + + single_coll + + late_coll; + + np->xstats.tx_deferred += ioread8(ioaddr + StatsTxDefer); + np->xstats.tx_deferred_excessive += ioread8(ioaddr + StatsTxXSDefer); + np->xstats.tx_aborted += ioread8(ioaddr + StatsTxAbort); + np->xstats.tx_bcasts += ioread8(ioaddr + StatsBcastTx); + np->xstats.rx_bcasts += ioread8(ioaddr + StatsBcastRx); + np->xstats.tx_mcasts += ioread8(ioaddr + StatsMcastTx); + np->xstats.rx_mcasts += ioread8(ioaddr + StatsMcastRx); + + dev->stats.tx_bytes += ioread16(ioaddr + TxOctetsLow); + dev->stats.tx_bytes += ioread16(ioaddr + TxOctetsHigh) << 16; + dev->stats.rx_bytes += ioread16(ioaddr + RxOctetsLow); + dev->stats.rx_bytes += ioread16(ioaddr + RxOctetsHigh) << 16; + + spin_unlock_irqrestore(&np->statlock, flags); + + return &dev->stats; +} + +static void set_rx_mode(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u16 mc_filter[4]; /* Multicast hash filter */ + u32 rx_mode; + int i; + + if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ + memset(mc_filter, 0xff, sizeof(mc_filter)); + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptAll | AcceptMyPhys; + } else if ((netdev_mc_count(dev) > multicast_filter_limit) || + (dev->flags & IFF_ALLMULTI)) { + /* Too many to match, or accept all multicasts. */ + memset(mc_filter, 0xff, sizeof(mc_filter)); + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; + } else if (!netdev_mc_empty(dev)) { + struct netdev_hw_addr *ha; + int bit; + int index; + int crc; + memset (mc_filter, 0, sizeof (mc_filter)); + netdev_for_each_mc_addr(ha, dev) { + crc = ether_crc_le(ETH_ALEN, ha->addr); + for (index=0, bit=0; bit < 6; bit++, crc <<= 1) + if (crc & 0x80000000) index |= 1 << bit; + mc_filter[index/16] |= (1 << (index % 16)); + } + rx_mode = AcceptBroadcast | AcceptMultiHash | AcceptMyPhys; + } else { + iowrite8(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode); + return; + } + if (np->mii_if.full_duplex && np->flowctrl) + mc_filter[3] |= 0x0200; + + for (i = 0; i < 4; i++) + iowrite16(mc_filter[i], ioaddr + MulticastFilter0 + i*2); + iowrite8(rx_mode, ioaddr + RxMode); +} + +static int __set_mac_addr(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + u16 addr16; + + addr16 = (dev->dev_addr[0] | (dev->dev_addr[1] << 8)); + iowrite16(addr16, np->base + StationAddr); + addr16 = (dev->dev_addr[2] | (dev->dev_addr[3] << 8)); + iowrite16(addr16, np->base + StationAddr+2); + addr16 = (dev->dev_addr[4] | (dev->dev_addr[5] << 8)); + iowrite16(addr16, np->base + StationAddr+4); + return 0; +} + +/* Invoked with rtnl_lock held */ +static int sundance_set_mac_addr(struct net_device *dev, void *data) +{ + const struct sockaddr *addr = data; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + eth_hw_addr_set(dev, addr->sa_data); + __set_mac_addr(dev); + + return 0; +} + +static const struct { + const char name[ETH_GSTRING_LEN]; +} sundance_stats[] = { + { "tx_multiple_collisions" }, + { "tx_single_collisions" }, + { "tx_late_collisions" }, + { "tx_deferred" }, + { "tx_deferred_excessive" }, + { "tx_aborted" }, + { "tx_bcasts" }, + { "rx_bcasts" }, + { "tx_mcasts" }, + { "rx_mcasts" }, +}; + +static int check_if_running(struct net_device *dev) +{ + if (!netif_running(dev)) + return -EINVAL; + return 0; +} + +static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct netdev_private *np = netdev_priv(dev); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info)); +} + +static int get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct netdev_private *np = netdev_priv(dev); + spin_lock_irq(&np->lock); + mii_ethtool_get_link_ksettings(&np->mii_if, cmd); + spin_unlock_irq(&np->lock); + return 0; +} + +static int set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct netdev_private *np = netdev_priv(dev); + int res; + spin_lock_irq(&np->lock); + res = mii_ethtool_set_link_ksettings(&np->mii_if, cmd); + spin_unlock_irq(&np->lock); + return res; +} + +static int nway_reset(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return mii_nway_restart(&np->mii_if); +} + +static u32 get_link(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return mii_link_ok(&np->mii_if); +} + +static u32 get_msglevel(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return np->msg_enable; +} + +static void set_msglevel(struct net_device *dev, u32 val) +{ + struct netdev_private *np = netdev_priv(dev); + np->msg_enable = val; +} + +static void get_strings(struct net_device *dev, u32 stringset, + u8 *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, sundance_stats, sizeof(sundance_stats)); +} + +static int get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(sundance_stats); + default: + return -EOPNOTSUPP; + } +} + +static void get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct netdev_private *np = netdev_priv(dev); + int i = 0; + + get_stats(dev); + data[i++] = np->xstats.tx_multiple_collisions; + data[i++] = np->xstats.tx_single_collisions; + data[i++] = np->xstats.tx_late_collisions; + data[i++] = np->xstats.tx_deferred; + data[i++] = np->xstats.tx_deferred_excessive; + data[i++] = np->xstats.tx_aborted; + data[i++] = np->xstats.tx_bcasts; + data[i++] = np->xstats.rx_bcasts; + data[i++] = np->xstats.tx_mcasts; + data[i++] = np->xstats.rx_mcasts; +} + +#ifdef CONFIG_PM + +static void sundance_get_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u8 wol_bits; + + wol->wolopts = 0; + + wol->supported = (WAKE_PHY | WAKE_MAGIC); + if (!np->wol_enabled) + return; + + wol_bits = ioread8(ioaddr + WakeEvent); + if (wol_bits & MagicPktEnable) + wol->wolopts |= WAKE_MAGIC; + if (wol_bits & LinkEventEnable) + wol->wolopts |= WAKE_PHY; +} + +static int sundance_set_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u8 wol_bits; + + if (!device_can_wakeup(&np->pci_dev->dev)) + return -EOPNOTSUPP; + + np->wol_enabled = !!(wol->wolopts); + wol_bits = ioread8(ioaddr + WakeEvent); + wol_bits &= ~(WakePktEnable | MagicPktEnable | + LinkEventEnable | WolEnable); + + if (np->wol_enabled) { + if (wol->wolopts & WAKE_MAGIC) + wol_bits |= (MagicPktEnable | WolEnable); + if (wol->wolopts & WAKE_PHY) + wol_bits |= (LinkEventEnable | WolEnable); + } + iowrite8(wol_bits, ioaddr + WakeEvent); + + device_set_wakeup_enable(&np->pci_dev->dev, np->wol_enabled); + + return 0; +} +#else +#define sundance_get_wol NULL +#define sundance_set_wol NULL +#endif /* CONFIG_PM */ + +static const struct ethtool_ops ethtool_ops = { + .begin = check_if_running, + .get_drvinfo = get_drvinfo, + .nway_reset = nway_reset, + .get_link = get_link, + .get_wol = sundance_get_wol, + .set_wol = sundance_set_wol, + .get_msglevel = get_msglevel, + .set_msglevel = set_msglevel, + .get_strings = get_strings, + .get_sset_count = get_sset_count, + .get_ethtool_stats = get_ethtool_stats, + .get_link_ksettings = get_link_ksettings, + .set_link_ksettings = set_link_ksettings, +}; + +static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct netdev_private *np = netdev_priv(dev); + int rc; + + if (!netif_running(dev)) + return -EINVAL; + + spin_lock_irq(&np->lock); + rc = generic_mii_ioctl(&np->mii_if, if_mii(rq), cmd, NULL); + spin_unlock_irq(&np->lock); + + return rc; +} + +static int netdev_close(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + struct sk_buff *skb; + int i; + + /* Wait and kill tasklet */ + tasklet_kill(&np->rx_tasklet); + tasklet_kill(&np->tx_tasklet); + np->cur_tx = 0; + np->dirty_tx = 0; + np->cur_task = 0; + np->last_tx = NULL; + + netif_stop_queue(dev); + + if (netif_msg_ifdown(np)) { + printk(KERN_DEBUG "%s: Shutting down ethercard, status was Tx %2.2x " + "Rx %4.4x Int %2.2x.\n", + dev->name, ioread8(ioaddr + TxStatus), + ioread32(ioaddr + RxStatus), ioread16(ioaddr + IntrStatus)); + printk(KERN_DEBUG "%s: Queue pointers were Tx %d / %d, Rx %d / %d.\n", + dev->name, np->cur_tx, np->dirty_tx, np->cur_rx, np->dirty_rx); + } + + /* Disable interrupts by clearing the interrupt mask. */ + iowrite16(0x0000, ioaddr + IntrEnable); + + /* Disable Rx and Tx DMA for safely release resource */ + iowrite32(0x500, ioaddr + DMACtrl); + + /* Stop the chip's Tx and Rx processes. */ + iowrite16(TxDisable | RxDisable | StatsDisable, ioaddr + MACCtrl1); + + for (i = 2000; i > 0; i--) { + if ((ioread32(ioaddr + DMACtrl) & 0xc000) == 0) + break; + mdelay(1); + } + + iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset, + ioaddr + ASIC_HI_WORD(ASICCtrl)); + + for (i = 2000; i > 0; i--) { + if ((ioread16(ioaddr + ASIC_HI_WORD(ASICCtrl)) & ResetBusy) == 0) + break; + mdelay(1); + } + +#ifdef __i386__ + if (netif_msg_hw(np)) { + printk(KERN_DEBUG " Tx ring at %8.8x:\n", + (int)(np->tx_ring_dma)); + for (i = 0; i < TX_RING_SIZE; i++) + printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n", + i, np->tx_ring[i].status, np->tx_ring[i].frag.addr, + np->tx_ring[i].frag.length); + printk(KERN_DEBUG " Rx ring %8.8x:\n", + (int)(np->rx_ring_dma)); + for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) { + printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n", + i, np->rx_ring[i].status, np->rx_ring[i].frag.addr, + np->rx_ring[i].frag.length); + } + } +#endif /* __i386__ debugging only */ + + free_irq(np->pci_dev->irq, dev); + + timer_delete_sync(&np->timer); + + /* Free all the skbuffs in the Rx queue. */ + for (i = 0; i < RX_RING_SIZE; i++) { + np->rx_ring[i].status = 0; + skb = np->rx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->rx_ring[i].frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + dev_kfree_skb(skb); + np->rx_skbuff[i] = NULL; + } + np->rx_ring[i].frag.addr = cpu_to_le32(0xBADF00D0); /* poison */ + } + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_ring[i].next_desc = 0; + skb = np->tx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[i].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_kfree_skb(skb); + np->tx_skbuff[i] = NULL; + } + } + + return 0; +} + +static void sundance_remove1(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev) { + struct netdev_private *np = netdev_priv(dev); + unregister_netdev(dev); + dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, + np->rx_ring, np->rx_ring_dma); + dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, + np->tx_ring, np->tx_ring_dma); + pci_iounmap(pdev, np->base); + pci_release_regions(pdev); + free_netdev(dev); + } +} + +static int __maybe_unused sundance_suspend(struct device *dev_d) +{ + struct net_device *dev = dev_get_drvdata(dev_d); + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + + if (!netif_running(dev)) + return 0; + + netdev_close(dev); + netif_device_detach(dev); + + if (np->wol_enabled) { + iowrite8(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode); + iowrite16(RxEnable, ioaddr + MACCtrl1); + } + + device_set_wakeup_enable(dev_d, np->wol_enabled); + + return 0; +} + +static int __maybe_unused sundance_resume(struct device *dev_d) +{ + struct net_device *dev = dev_get_drvdata(dev_d); + int err = 0; + + if (!netif_running(dev)) + return 0; + + err = netdev_open(dev); + if (err) { + printk(KERN_ERR "%s: Can't resume interface!\n", + dev->name); + goto out; + } + + netif_device_attach(dev); + +out: + return err; +} + +static SIMPLE_DEV_PM_OPS(sundance_pm_ops, sundance_suspend, sundance_resume); + +static struct pci_driver sundance_driver = { + .name = DRV_NAME, + .id_table = sundance_pci_tbl, + .probe = sundance_probe1, + .remove = sundance_remove1, + .driver.pm = &sundance_pm_ops, +}; + +module_pci_driver(sundance_driver); From d2644cbc736f737142a7595fa9346f63e6fc9b33 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Sep 2025 14:08:18 -0700 Subject: [PATCH 1076/1307] eth: sundance: fix endian issues Fix sparse warnings about endianness. Store DMA addr to a variable of correct type and then only convert it when writing to the descriptor. Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901210818.1025316-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/sundance.c | 35 +++++++++++++++------------ 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 29d59c42dfa3..277c50ef773f 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -1033,21 +1033,22 @@ static void init_ring(struct net_device *dev) /* Fill in the Rx buffers. Handle allocation failure gracefully. */ for (i = 0; i < RX_RING_SIZE; i++) { + dma_addr_t addr; + struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + 2); np->rx_skbuff[i] = skb; if (skb == NULL) break; skb_reserve(skb, 2); /* 16 byte align the IP header. */ - np->rx_ring[i].frag.addr = cpu_to_le32( - dma_map_single(&np->pci_dev->dev, skb->data, - np->rx_buf_sz, DMA_FROM_DEVICE)); - if (dma_mapping_error(&np->pci_dev->dev, - np->rx_ring[i].frag.addr)) { + addr = dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) { dev_kfree_skb(skb); np->rx_skbuff[i] = NULL; break; } + np->rx_ring[i].frag.addr = cpu_to_le32(addr); np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); } np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); @@ -1088,6 +1089,7 @@ start_tx (struct sk_buff *skb, struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); struct netdev_desc *txdesc; + dma_addr_t addr; unsigned entry; /* Calculate the next Tx descriptor entry. */ @@ -1095,13 +1097,14 @@ start_tx (struct sk_buff *skb, struct net_device *dev) np->tx_skbuff[entry] = skb; txdesc = &np->tx_ring[entry]; + addr = dma_map_single(&np->pci_dev->dev, skb->data, skb->len, + DMA_TO_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) + goto drop_frame; + txdesc->next_desc = 0; txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign); - txdesc->frag.addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev, - skb->data, skb->len, DMA_TO_DEVICE)); - if (dma_mapping_error(&np->pci_dev->dev, - txdesc->frag.addr)) - goto drop_frame; + txdesc->frag.addr = cpu_to_le32(addr); txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag); /* Increment cur_tx before tasklet_schedule() */ @@ -1419,6 +1422,8 @@ static void refill_rx (struct net_device *dev) for (;(np->cur_rx - np->dirty_rx + RX_RING_SIZE) % RX_RING_SIZE > 0; np->dirty_rx = (np->dirty_rx + 1) % RX_RING_SIZE) { struct sk_buff *skb; + dma_addr_t addr; + entry = np->dirty_rx % RX_RING_SIZE; if (np->rx_skbuff[entry] == NULL) { skb = netdev_alloc_skb(dev, np->rx_buf_sz + 2); @@ -1426,15 +1431,15 @@ static void refill_rx (struct net_device *dev) if (skb == NULL) break; /* Better luck next round. */ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ - np->rx_ring[entry].frag.addr = cpu_to_le32( - dma_map_single(&np->pci_dev->dev, skb->data, - np->rx_buf_sz, DMA_FROM_DEVICE)); - if (dma_mapping_error(&np->pci_dev->dev, - np->rx_ring[entry].frag.addr)) { + addr = dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) { dev_kfree_skb_irq(skb); np->rx_skbuff[entry] = NULL; break; } + + np->rx_ring[entry].frag.addr = cpu_to_le32(addr); } /* Perhaps we need not reset this field. */ np->rx_ring[entry].frag.length = From fa390321aba0a54d0f7ae95ee4ecde1358bb9234 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sat, 30 Aug 2025 15:55:38 -0700 Subject: [PATCH 1077/1307] net/tcp: Fix socket memory leak in TCP-AO failure handling for IPv6 When tcp_ao_copy_all_matching() fails in tcp_v6_syn_recv_sock() it just exits the function. This ends up causing a memory-leak: unreferenced object 0xffff0000281a8200 (size 2496): comm "softirq", pid 0, jiffies 4295174684 hex dump (first 32 bytes): 7f 00 00 06 7f 00 00 06 00 00 00 00 cb a8 88 13 ................ 0a 00 03 61 00 00 00 00 00 00 00 00 00 00 00 00 ...a............ backtrace (crc 5ebdbe15): kmemleak_alloc+0x44/0xe0 kmem_cache_alloc_noprof+0x248/0x470 sk_prot_alloc+0x48/0x120 sk_clone_lock+0x38/0x3b0 inet_csk_clone_lock+0x34/0x150 tcp_create_openreq_child+0x3c/0x4a8 tcp_v6_syn_recv_sock+0x1c0/0x620 tcp_check_req+0x588/0x790 tcp_v6_rcv+0x5d0/0xc18 ip6_protocol_deliver_rcu+0x2d8/0x4c0 ip6_input_finish+0x74/0x148 ip6_input+0x50/0x118 ip6_sublist_rcv+0x2fc/0x3b0 ipv6_list_rcv+0x114/0x170 __netif_receive_skb_list_core+0x16c/0x200 netif_receive_skb_list_internal+0x1f0/0x2d0 This is because in tcp_v6_syn_recv_sock (and the IPv4 counterpart), when exiting upon error, inet_csk_prepare_forced_close() and tcp_done() need to be called. They make sure the newsk will end up being correctly free'd. tcp_v4_syn_recv_sock() makes this very clear by having the put_and_exit label that takes care of things. So, this patch here makes sure tcp_v4_syn_recv_sock and tcp_v6_syn_recv_sock have similar error-handling and thus fixes the leak for TCP-AO. Fixes: 06b22ef29591 ("net/tcp: Wire TCP-AO to request sockets") Signed-off-by: Christoph Paasch Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20250830-tcpao_leak-v1-1-e5878c2c3173@openai.com Signed-off-by: Jakub Kicinski --- net/ipv6/tcp_ipv6.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7577e7eb2c97..e885629312a4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1431,17 +1431,17 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) - goto out_overflow; + goto exit_overflow; if (!dst) { dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); if (!dst) - goto out; + goto exit; } newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) - goto out_nonewsk; + goto exit_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks @@ -1525,25 +1525,19 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * const union tcp_md5_addr *addr; addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; - if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { - inet_csk_prepare_forced_close(newsk); - tcp_done(newsk); - goto out; - } + if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) + goto put_and_exit; } } #endif #ifdef CONFIG_TCP_AO /* Copy over tcp_ao_info if any */ if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) - goto out; /* OOM */ + goto put_and_exit; /* OOM */ #endif - if (__inet_inherit_port(sk, newsk) < 0) { - inet_csk_prepare_forced_close(newsk); - tcp_done(newsk); - goto out; - } + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), &found_dup_sk); if (*own_req) { @@ -1570,13 +1564,17 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * return newsk; -out_overflow: +exit_overflow: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); -out_nonewsk: +exit_nonewsk: dst_release(dst); -out: +exit: tcp_listendrop(sk); return NULL; +put_and_exit: + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto exit; } INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, From 4beb44a2d62dddfe450f310aa1a950901731cb3a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 31 Aug 2025 18:34:33 +0100 Subject: [PATCH 1078/1307] net: phy: add phy_interface_weight() Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1uslwn-00000001SOx-0a7H@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/phy.h b/include/linux/phy.h index 4c2b8b6e7187..bb45787d8684 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf) return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX); } +static inline unsigned int phy_interface_weight(const unsigned long *intf) +{ + return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX); +} + static inline void phy_interface_and(unsigned long *dst, const unsigned long *a, const unsigned long *b) { From 1bd905dfea9897eafef532000702e63a66849f54 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 31 Aug 2025 18:34:38 +0100 Subject: [PATCH 1079/1307] net: phylink: provide phylink_get_inband_type() Provide a function to get the type of the inband signalling used for a PHY interface type. This will be used in the subsequent patch to address problems with 10G optical modules. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1uslws-00000001SP5-1R2R@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 79 ++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index c7f867b361dd..8283416ccf5d 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1016,6 +1016,42 @@ static void phylink_pcs_an_restart(struct phylink *pl) pl->pcs->ops->pcs_an_restart(pl->pcs); } +enum inband_type { + INBAND_NONE, + INBAND_CISCO_SGMII, + INBAND_BASEX, +}; + +static enum inband_type phylink_get_inband_type(phy_interface_t interface) +{ + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_QUSGMII: + case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: + /* These protocols are designed for use with a PHY which + * communicates its negotiation result back to the MAC via + * inband communication. Note: there exist PHYs that run + * with SGMII but do not send the inband data. + */ + return INBAND_CISCO_SGMII; + + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + /* 1000base-X is designed for use media-side for Fibre + * connections, and thus the Autoneg bit needs to be + * taken into account. We also do this for 2500base-X + * as well, but drivers may not support this, so may + * need to override this. + */ + return INBAND_BASEX; + + default: + return INBAND_NONE; + } +} + /** * phylink_pcs_neg_mode() - helper to determine PCS inband mode * @pl: a pointer to a &struct phylink returned from phylink_create() @@ -1043,46 +1079,19 @@ static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs, unsigned int pcs_ib_caps = 0; unsigned int phy_ib_caps = 0; unsigned int neg_mode, mode; - enum { - INBAND_CISCO_SGMII, - INBAND_BASEX, - } type; + enum inband_type type; + + type = phylink_get_inband_type(interface); + if (type == INBAND_NONE) { + pl->pcs_neg_mode = PHYLINK_PCS_NEG_NONE; + pl->act_link_an_mode = pl->req_link_an_mode; + return; + } mode = pl->req_link_an_mode; pl->phy_ib_mode = 0; - switch (interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_QUSGMII: - case PHY_INTERFACE_MODE_USXGMII: - case PHY_INTERFACE_MODE_10G_QXGMII: - /* These protocols are designed for use with a PHY which - * communicates its negotiation result back to the MAC via - * inband communication. Note: there exist PHYs that run - * with SGMII but do not send the inband data. - */ - type = INBAND_CISCO_SGMII; - break; - - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - /* 1000base-X is designed for use media-side for Fibre - * connections, and thus the Autoneg bit needs to be - * taken into account. We also do this for 2500base-X - * as well, but drivers may not support this, so may - * need to override this. - */ - type = INBAND_BASEX; - break; - - default: - pl->pcs_neg_mode = PHYLINK_PCS_NEG_NONE; - pl->act_link_an_mode = mode; - return; - } - if (pcs) pcs_ib_caps = phylink_pcs_inband_caps(pcs, interface); From a21202743f9ce4063e86b99cccaef48ef9813379 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 31 Aug 2025 18:34:43 +0100 Subject: [PATCH 1080/1307] net: phylink: disable autoneg for interfaces that have no inband Mathew reports that as a result of commit 6561f0e547be ("net: pcs: pcs-lynx: implement pcs_inband_caps() method"), 10G SFP modules no longer work with the Lynx PCS. This problem is not specific to the Lynx PCS, but is caused by commit df874f9e52c3 ("net: phylink: add pcs_inband_caps() method") which added validation of the autoneg state to the optical SFP configuration path. Fix this by handling interface modes that fundamentally have no inband negotiation more correctly - if we only have a single interface mode, clear the Autoneg support bit and the advertising mask. If the module can operate with several different interface modes, autoneg may be supported for other modes, so leave the support mask alone and just clear the Autoneg bit in the advertising mask. This restores 10G optical module functionality with PCS that supply their inband support, and makes ethtool output look sane. Reported-by: Mathew McBride Closes: https://lore.kernel.org/r/025c0ebe-5537-4fa3-b05a-8b835e5ad317@app.fastmail.com Fixes: df874f9e52c3 ("net: phylink: add pcs_inband_caps() method") Signed-off-by: Russell King (Oracle) Tested-by: Vladimir Oltean Link: https://patch.msgid.link/E1uslwx-00000001SPB-2kiM@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 8283416ccf5d..f1b57e3fdf30 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3634,6 +3634,7 @@ static int phylink_sfp_config_optical(struct phylink *pl) { __ETHTOOL_DECLARE_LINK_MODE_MASK(support); struct phylink_link_state config; + enum inband_type inband_type; phy_interface_t interface; int ret; @@ -3680,6 +3681,23 @@ static int phylink_sfp_config_optical(struct phylink *pl) phylink_dbg(pl, "optical SFP: chosen %s interface\n", phy_modes(interface)); + inband_type = phylink_get_inband_type(interface); + if (inband_type == INBAND_NONE) { + /* If this is the sole interface, and there is no inband + * support, clear the advertising mask and Autoneg bit in + * the support mask. Otherwise, just clear the Autoneg bit + * in the advertising mask. + */ + if (phy_interface_weight(pl->sfp_interfaces) == 1) { + linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + pl->sfp_support); + linkmode_zero(config.advertising); + } else { + linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + config.advertising); + } + } + if (!phylink_validate_pcs_inband_autoneg(pl, interface, config.advertising)) { phylink_err(pl, "autoneg setting not compatible with PCS"); From d4736737110ffa83d29f1c5d17b26113864205f6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 31 Aug 2025 20:20:07 +0200 Subject: [PATCH 1081/1307] net: ethernet: mtk_eth_soc: fix tx vlan tag for llc packets When sending llc packets with vlan tx offload, the hardware fails to actually add the tag. Deal with this by fixing it up in software. Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet") Reported-by: Thibaut VARENE Signed-off-by: Felix Fietkau Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250831182007.51619-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 5a5fcde76dc0..e68997a29191 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1761,6 +1761,13 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) bool gso = false; int tx_num; + if (skb_vlan_tag_present(skb) && + !eth_proto_is_802_3(eth_hdr(skb)->h_proto)) { + skb = __vlan_hwaccel_push_inside(skb); + if (!skb) + goto dropped; + } + /* normally we can rely on the stack not calling this more than once, * however we have 2 queues running on the same ring so we need to lock * the ring access @@ -1806,8 +1813,9 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: spin_unlock(ð->page_lock); - stats->tx_dropped++; dev_kfree_skb_any(skb); +dropped: + stats->tx_dropped++; return NETDEV_TX_OK; } From a7195a3d67dace056af7ca65144a11874df79562 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 1 Sep 2025 12:20:19 +0100 Subject: [PATCH 1082/1307] net: pcs: rzn1-miic: Correct MODCTRL register offset Correct the Mode Control Register (MODCTRL) offset for RZ/N MIIC. According to the R-IN Engine and Ethernet Peripherals Manual (Rev.1.30) [0], Table 10.1 "Ethernet Accessory Register List", MODCTRL is at offset 0x8, not 0x20 as previously defined. Offset 0x20 actually maps to the Port Trigger Control Register (PTCTRL), which controls PTP_MODE[3:0] and RGMII_CLKSEL[4]. Using this incorrect definition prevented the driver from configuring the SW_MODE[4:0] bits in MODCTRL, which control the internal connection of Ethernet ports. As a result, the MIIC could not be switched into the correct mode, leading to link setup failures and non-functional Ethernet ports on affected systems. [0] https://www.renesas.com/en/document/mah/rzn1d-group-rzn1s-group-rzn1l-group-users-manual-r-engine-and-ethernet-peripherals?r=1054571 Fixes: 7dc54d3b8d91 ("net: pcs: add Renesas MII converter driver") Cc: stable@kernel.org Signed-off-by: Lad Prabhakar Reviewed-by: Wolfram Sang Reviewed-by: Russell King (Oracle) Reviewed-by: Geert Uytterhoeven Tested-by: Wolfram Sang Link: https://patch.msgid.link/20250901112019.16278-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-rzn1-miic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c index d79bb9b06cd2..ce73d9474d5b 100644 --- a/drivers/net/pcs/pcs-rzn1-miic.c +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -19,7 +19,7 @@ #define MIIC_PRCMD 0x0 #define MIIC_ESID_CODE 0x4 -#define MIIC_MODCTRL 0x20 +#define MIIC_MODCTRL 0x8 #define MIIC_MODCTRL_SW_MODE GENMASK(4, 0) #define MIIC_CONVCTRL(port) (0x100 + (port) * 4) From 6ead38147ebb813f08be6ea8ef547a0e4c09559a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Sep 2025 09:50:33 +0300 Subject: [PATCH 1083/1307] vxlan: Fix NPD when refreshing an FDB entry with a nexthop object VXLAN FDB entries can point to either a remote destination or an FDB nexthop group. The latter is usually used in EVPN deployments where learning is disabled. However, when learning is enabled, an incoming packet might try to refresh an FDB entry that points to an FDB nexthop group and therefore does not have a remote. Such packets should be dropped, but they are only dropped after dereferencing the non-existent remote, resulting in a NPD [1] which can be reproduced using [2]. Fix by dropping such packets earlier. Remove the misleading comment from first_remote_rcu(). [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] CPU: 13 UID: 0 PID: 361 Comm: mausezahn Not tainted 6.17.0-rc1-virtme-g9f6b606b6b37 #1 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-4.fc41 04/01/2014 RIP: 0010:vxlan_snoop+0x98/0x1e0 [...] Call Trace: vxlan_encap_bypass+0x209/0x240 encap_bypass_if_local+0xb1/0x100 vxlan_xmit_one+0x1375/0x17e0 vxlan_xmit+0x6b4/0x15f0 dev_hard_start_xmit+0x5d/0x1c0 __dev_queue_xmit+0x246/0xfd0 packet_sendmsg+0x113a/0x1850 __sock_sendmsg+0x38/0x70 __sys_sendto+0x126/0x180 __x64_sys_sendto+0x24/0x30 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x4b/0x53 [2] #!/bin/bash ip address add 192.0.2.1/32 dev lo ip address add 192.0.2.2/32 dev lo ip nexthop add id 1 via 192.0.2.3 fdb ip nexthop add id 10 group 1 fdb ip link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass ip link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning bridge fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020 bridge fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries") Reported-by: Marlin Cremers Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250901065035.159644-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 8 ++++---- drivers/net/vxlan/vxlan_private.h | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index f32be2e301f2..0f6a7c89a669 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1445,6 +1445,10 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, if (READ_ONCE(f->updated) != now) WRITE_ONCE(f->updated, now); + /* Don't override an fdb with nexthop with a learnt entry */ + if (rcu_access_pointer(f->nh)) + return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS; + if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) && rdst->remote_ifindex == ifindex)) return SKB_NOT_DROPPED_YET; @@ -1453,10 +1457,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, if (f->state & (NUD_PERMANENT | NUD_NOARP)) return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS; - /* Don't override an fdb with nexthop with a learnt entry */ - if (rcu_access_pointer(f->nh)) - return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS; - if (net_ratelimit()) netdev_info(dev, "%pM migrated from %pIS to %pIS\n", diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index 6c625fb29c6c..99fe772ad679 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -61,9 +61,7 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port) return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; } -/* First remote destination for a forwarding entry. - * Guaranteed to be non-NULL because remotes are never deleted. - */ +/* First remote destination for a forwarding entry. */ static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb) { if (rcu_access_pointer(fdb->nh)) From 1f5d2fd1ca04a23c18b1bde9a43ce2fa2ffa1bce Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Sep 2025 09:50:34 +0300 Subject: [PATCH 1084/1307] vxlan: Fix NPD in {arp,neigh}_reduce() when using nexthop objects When the "proxy" option is enabled on a VXLAN device, the device will suppress ARP requests and IPv6 Neighbor Solicitation messages if it is able to reply on behalf of the remote host. That is, if a matching and valid neighbor entry is configured on the VXLAN device whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The code currently assumes that the FDB entry for the neighbor's MAC address points to a valid remote destination, but this is incorrect if the entry is associated with an FDB nexthop group. This can result in a NPD [1][3] which can be reproduced using [2][4]. Fix by checking that the remote destination exists before dereferencing it. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] CPU: 4 UID: 0 PID: 365 Comm: arping Not tainted 6.17.0-rc2-virtme-g2a89cb21162c #2 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-4.fc41 04/01/2014 RIP: 0010:vxlan_xmit+0xb58/0x15f0 [...] Call Trace: dev_hard_start_xmit+0x5d/0x1c0 __dev_queue_xmit+0x246/0xfd0 packet_sendmsg+0x113a/0x1850 __sock_sendmsg+0x38/0x70 __sys_sendto+0x126/0x180 __x64_sys_sendto+0x24/0x30 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x4b/0x53 [2] #!/bin/bash ip address add 192.0.2.1/32 dev lo ip nexthop add id 1 via 192.0.2.2 fdb ip nexthop add id 10 group 1 fdb ip link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 4789 proxy ip neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm dev vx0 bridge fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10 arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3 [3] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] CPU: 13 UID: 0 PID: 372 Comm: ndisc6 Not tainted 6.17.0-rc2-virtmne-g6ee90cb26014 #3 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1v996), BIOS 1.17.0-4.fc41 04/01/2x014 RIP: 0010:vxlan_xmit+0x803/0x1600 [...] Call Trace: dev_hard_start_xmit+0x5d/0x1c0 __dev_queue_xmit+0x246/0xfd0 ip6_finish_output2+0x210/0x6c0 ip6_finish_output+0x1af/0x2b0 ip6_mr_output+0x92/0x3e0 ip6_send_skb+0x30/0x90 rawv6_sendmsg+0xe6e/0x12e0 __sock_sendmsg+0x38/0x70 __sys_sendto+0x126/0x180 __x64_sys_sendto+0x24/0x30 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f383422ec77 [4] #!/bin/bash ip address add 2001:db8:1::1/128 dev lo ip nexthop add id 1 via 2001:db8:1::1 fdb ip nexthop add id 10 group 1 fdb ip link add name vx0 up type vxlan id 10010 local 2001:db8:1::1 dstport 4789 proxy ip neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud perm dev vx0 bridge fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10 ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0 Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries") Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250901065035.159644-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 0f6a7c89a669..dab864bc733c 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1877,6 +1877,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) n = neigh_lookup(&arp_tbl, &tip, dev); if (n) { + struct vxlan_rdst *rdst = NULL; struct vxlan_fdb *f; struct sk_buff *reply; @@ -1887,7 +1888,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) rcu_read_lock(); f = vxlan_find_mac_tx(vxlan, n->ha, vni); - if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { + if (f) + rdst = first_remote_rcu(f); + if (rdst && vxlan_addr_any(&rdst->remote_ip)) { /* bridge-local neighbor */ neigh_release(n); rcu_read_unlock(); @@ -2044,6 +2047,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev); if (n) { + struct vxlan_rdst *rdst = NULL; struct vxlan_fdb *f; struct sk_buff *reply; @@ -2053,7 +2057,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) } f = vxlan_find_mac_tx(vxlan, n->ha, vni); - if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { + if (f) + rdst = first_remote_rcu(f); + if (rdst && vxlan_addr_any(&rdst->remote_ip)) { /* bridge-local neighbor */ neigh_release(n); goto out; From 2c9fb925c2ccc6ee475134840cff6c6b73851730 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 1 Sep 2025 09:50:35 +0300 Subject: [PATCH 1085/1307] selftests: net: Add a selftest for VXLAN with FDB nexthop groups Add test cases for VXLAN with FDB nexthop groups, testing both IPv4 and IPv6. Test basic Tx functionality as well as some corner cases. Example output: # ./test_vxlan_nh.sh TEST: VXLAN FDB nexthop: IPv4 basic Tx [ OK ] TEST: VXLAN FDB nexthop: IPv6 basic Tx [ OK ] TEST: VXLAN FDB nexthop: learning [ OK ] TEST: VXLAN FDB nexthop: IPv4 proxy [ OK ] TEST: VXLAN FDB nexthop: IPv6 proxy [ OK ] Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250901065035.159644-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/test_vxlan_nh.sh | 223 +++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100755 tools/testing/selftests/net/test_vxlan_nh.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b31a71f2b372..c7e03e1d6f63 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -99,6 +99,7 @@ TEST_GEN_PROGS += bind_wildcard TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh +TEST_PROGS += test_vxlan_nh.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += test_neigh.sh diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh new file mode 100755 index 000000000000..20f3369f776b --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nh.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + basic_tx_ipv4 + basic_tx_ipv6 + learning + proxy_ipv4 + proxy_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Cleanup + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +nh_stats_get() +{ + ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]" +} + +tc_stats_get() +{ + tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1" +} + +basic_tx_common() +{ + local af_str=$1; shift + local proto=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + + RET=0 + + # Test basic Tx functionality. Check that stats are incremented on + # both the FDB nexthop group and the egress device. + + run_cmd "ip -n $ns1 link add name dummy1 up type dummy" + run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1" + run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact" + run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass" + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789" + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null + check_err $? "FDB nexthop group stats did not increase" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null + check_err $? "tc filter stats did not increase" + + log_test "VXLAN FDB nexthop: $af_str basic Tx" +} + +basic_tx_ipv4() +{ + basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2 +} + +basic_tx_ipv6() +{ + basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2 +} + +learning() +{ + RET=0 + + # When learning is enabled on the VXLAN device, an incoming packet + # might try to refresh an FDB entry that points to an FDB nexthop group + # instead of an ordinary remote destination. Check that the kernel does + # not crash in this situation. + + run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo" + run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass" + run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020" + run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q" + + log_test "VXLAN FDB nexthop: learning" +} + +proxy_common() +{ + local af_str=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + local neigh_addr=$1; shift + local ping_cmd=$1; shift + + RET=0 + + # When the "proxy" option is enabled on the VXLAN device, the device + # will suppress ARP requests and IPv6 Neighbor Solicitation messages if + # it is able to reply on behalf of the remote host. That is, if a + # matching and valid neighbor entry is configured on the VXLAN device + # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The + # FDB entry for the neighbor's MAC address might point to an FDB + # nexthop group instead of an ordinary remote destination. Check that + # the kernel does not crash in this situation. + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy" + + run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 $ping_cmd" + + log_test "VXLAN FDB nexthop: $af_str proxy" +} + +proxy_ipv4() +{ + proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \ + "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3" +} + +proxy_ipv6() +{ + proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \ + "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0" +} + +################################################################################ +# Usage + +usage() +{ + cat < Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command mausezahn +require_command arping +require_command ndisc6 +require_command jq + +if ! ip nexthop help 2>&1 | grep -q "stats"; then + echo "SKIP: iproute2 ip too old, missing nexthop stats support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup_ns ns1; $t; cleanup_all_ns; +done From 3a5f55500f3e93cf4d62351c753452279b088b4b Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 1 Sep 2025 20:37:25 +0800 Subject: [PATCH 1086/1307] ipv6: annotate data-races around devconf->rpl_seg_enabled devconf->rpl_seg_enabled can be changed concurrently from /proc/sys/net/ipv6/conf, annotate lockless reads on it. Signed-off-by: Yue Haibing Link: https://patch.msgid.link/20250901123726.1972881-2-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index d1ef9644f826..a23eb8734e15 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -494,10 +494,8 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); - accept_rpl_seg = net->ipv6.devconf_all->rpl_seg_enabled; - if (accept_rpl_seg > idev->cnf.rpl_seg_enabled) - accept_rpl_seg = idev->cnf.rpl_seg_enabled; - + accept_rpl_seg = min(READ_ONCE(net->ipv6.devconf_all->rpl_seg_enabled), + READ_ONCE(idev->cnf.rpl_seg_enabled)); if (!accept_rpl_seg) { kfree_skb(skb); return -1; From f63e7c8a83892781f6ceb55566f9497639c44555 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 1 Sep 2025 15:32:23 +0800 Subject: [PATCH 1087/1307] net: dsa: mv88e6xxx: Fix fwnode reference leaks in mv88e6xxx_port_setup_leds Fix multiple fwnode reference leaks: 1. The function calls fwnode_get_named_child_node() to get the "leds" node, but never calls fwnode_handle_put(leds) to release this reference. 2. Within the fwnode_for_each_child_node() loop, the early return paths that don't properly release the "led" fwnode reference. This fix follows the same pattern as commit d029edefed39 ("net dsa: qca8k: fix usages of device_get_named_child_node()") Fixes: 94a2a84f5e9e ("net: dsa: mv88e6xxx: Support LED control") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20250901073224.2273103-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/leds.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/leds.c b/drivers/net/dsa/mv88e6xxx/leds.c index 1c88bfaea46b..ab3bc645da56 100644 --- a/drivers/net/dsa/mv88e6xxx/leds.c +++ b/drivers/net/dsa/mv88e6xxx/leds.c @@ -779,7 +779,8 @@ int mv88e6xxx_port_setup_leds(struct mv88e6xxx_chip *chip, int port) continue; if (led_num > 1) { dev_err(dev, "invalid LED specified port %d\n", port); - return -EINVAL; + ret = -EINVAL; + goto err_put_led; } if (led_num == 0) @@ -823,17 +824,25 @@ int mv88e6xxx_port_setup_leds(struct mv88e6xxx_chip *chip, int port) init_data.devname_mandatory = true; init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d:0%d", chip->info->name, port, led_num); - if (!init_data.devicename) - return -ENOMEM; + if (!init_data.devicename) { + ret = -ENOMEM; + goto err_put_led; + } ret = devm_led_classdev_register_ext(dev, l, &init_data); kfree(init_data.devicename); if (ret) { dev_err(dev, "Failed to init LED %d for port %d", led_num, port); - return ret; + goto err_put_led; } } + fwnode_handle_put(leds); return 0; + +err_put_led: + fwnode_handle_put(led); + fwnode_handle_put(leds); + return ret; } From 0c3813d855b2006f021f7b5055f231977a58c784 Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Tue, 2 Sep 2025 00:02:24 +0530 Subject: [PATCH 1088/1307] smb: client: fix spellings in comments correct spellings in comments Signed-off-by: Bharath SM Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/reparse.c | 2 +- fs/smb/client/smb1ops.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 7869cec58f52..10c84c095fe7 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -278,7 +278,7 @@ static int detect_directory_symlink_target(struct cifs_sb_info *cifs_sb, } /* - * For absolute symlinks it is not possible to determinate + * For absolute symlinks it is not possible to determine * if it should point to directory or file. */ if (symname[0] == '/') { diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 893a1ea8c000..a02d41d1ce4a 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -1005,7 +1005,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, rc = -EOPNOTSUPP; } - /* Fallback to SMB_COM_SETATTR command when absolutelty needed. */ + /* Fallback to SMB_COM_SETATTR command when absolutely needed. */ if (rc == -EOPNOTSUPP) { cifs_dbg(FYI, "calling SetInformation since SetPathInfo for attrs/times not supported by this server\n"); rc = SMBSetInformation(xid, tcon, full_path, @@ -1039,7 +1039,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, cifsFileInfo_put(open_file); /* - * Setting the read-only bit is not honered on non-NT servers when done + * Setting the read-only bit is not honored on non-NT servers when done * via open-semantics. So for setting it, use SMB_COM_SETATTR command. * This command works only after the file is closed, so use it only when * operation was called without the filehandle. From 72595cb6da1841b355644fe8882d60e725205c32 Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Tue, 2 Sep 2025 20:10:25 +0530 Subject: [PATCH 1089/1307] smb: client: add new tracepoint to trace lease break notification Add smb3_lease_break_enter to trace lease break notifications, recording lease state, flags, epoch, and lease key. Align smb3_lease_not_found to use the same payload and print format. Signed-off-by: Bharath SM Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/smb2misc.c | 19 +++++++++++---- fs/smb/client/smb2pdu.c | 4 ++-- fs/smb/client/trace.h | 52 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index cddf273c14ae..89d933b4a8bc 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -614,6 +614,15 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) struct cifs_tcon *tcon; struct cifs_pending_open *open; + /* Trace receipt of lease break request from server */ + trace_smb3_lease_break_enter(le32_to_cpu(rsp->CurrentLeaseState), + le32_to_cpu(rsp->Flags), + le16_to_cpu(rsp->Epoch), + le32_to_cpu(rsp->hdr.Id.SyncId.TreeId), + le64_to_cpu(rsp->hdr.SessionId), + *((u64 *)rsp->LeaseKey), + *((u64 *)&rsp->LeaseKey[8])); + cifs_dbg(FYI, "Checking for lease break\n"); /* If server is a channel, select the primary channel */ @@ -660,10 +669,12 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) spin_unlock(&cifs_tcp_ses_lock); cifs_dbg(FYI, "Can not process lease break - no lease matched\n"); trace_smb3_lease_not_found(le32_to_cpu(rsp->CurrentLeaseState), - le32_to_cpu(rsp->hdr.Id.SyncId.TreeId), - le64_to_cpu(rsp->hdr.SessionId), - *((u64 *)rsp->LeaseKey), - *((u64 *)&rsp->LeaseKey[8])); + le32_to_cpu(rsp->Flags), + le16_to_cpu(rsp->Epoch), + le32_to_cpu(rsp->hdr.Id.SyncId.TreeId), + le64_to_cpu(rsp->hdr.SessionId), + *((u64 *)rsp->LeaseKey), + *((u64 *)&rsp->LeaseKey[8])); return false; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 2df93a75e3b8..c3b9d3f6210f 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -6192,11 +6192,11 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, please_key_high = (__u64 *)(lease_key+8); if (rc) { cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); - trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid, + trace_smb3_lease_ack_err(le32_to_cpu(lease_state), tcon->tid, ses->Suid, *please_key_low, *please_key_high, rc); cifs_dbg(FYI, "Send error in Lease Break = %d\n", rc); } else - trace_smb3_lease_done(le32_to_cpu(lease_state), tcon->tid, + trace_smb3_lease_ack_done(le32_to_cpu(lease_state), tcon->tid, ses->Suid, *please_key_low, *please_key_high); return rc; diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 93e5b2bb9f28..fe0e075bc63c 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -1171,8 +1171,54 @@ DEFINE_EVENT(smb3_lease_done_class, smb3_##name, \ __u64 lease_key_high), \ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high)) -DEFINE_SMB3_LEASE_DONE_EVENT(lease_done); -DEFINE_SMB3_LEASE_DONE_EVENT(lease_not_found); +DEFINE_SMB3_LEASE_DONE_EVENT(lease_ack_done); +/* Tracepoint when a lease break request is received/entered (includes epoch and flags) */ +DECLARE_EVENT_CLASS(smb3_lease_enter_class, + TP_PROTO(__u32 lease_state, + __u32 flags, + __u16 epoch, + __u32 tid, + __u64 sesid, + __u64 lease_key_low, + __u64 lease_key_high), + TP_ARGS(lease_state, flags, epoch, tid, sesid, lease_key_low, lease_key_high), + TP_STRUCT__entry( + __field(__u32, lease_state) + __field(__u32, flags) + __field(__u16, epoch) + __field(__u32, tid) + __field(__u64, sesid) + __field(__u64, lease_key_low) + __field(__u64, lease_key_high) + ), + TP_fast_assign( + __entry->lease_state = lease_state; + __entry->flags = flags; + __entry->epoch = epoch; + __entry->tid = tid; + __entry->sesid = sesid; + __entry->lease_key_low = lease_key_low; + __entry->lease_key_high = lease_key_high; + ), + TP_printk("sid=0x%llx tid=0x%x lease_key=0x%llx%llx lease_state=0x%x flags=0x%x epoch=%u", + __entry->sesid, __entry->tid, __entry->lease_key_high, + __entry->lease_key_low, __entry->lease_state, __entry->flags, __entry->epoch) +) + +#define DEFINE_SMB3_LEASE_ENTER_EVENT(name) \ +DEFINE_EVENT(smb3_lease_enter_class, smb3_##name, \ + TP_PROTO(__u32 lease_state, \ + __u32 flags, \ + __u16 epoch, \ + __u32 tid, \ + __u64 sesid, \ + __u64 lease_key_low, \ + __u64 lease_key_high), \ + TP_ARGS(lease_state, flags, epoch, tid, sesid, lease_key_low, lease_key_high)) + +DEFINE_SMB3_LEASE_ENTER_EVENT(lease_break_enter); +/* Lease not found: reuse lease_enter payload (includes epoch and flags) */ +DEFINE_SMB3_LEASE_ENTER_EVENT(lease_not_found); DECLARE_EVENT_CLASS(smb3_lease_err_class, TP_PROTO(__u32 lease_state, @@ -1213,7 +1259,7 @@ DEFINE_EVENT(smb3_lease_err_class, smb3_##name, \ int rc), \ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high, rc)) -DEFINE_SMB3_LEASE_ERR_EVENT(lease_err); +DEFINE_SMB3_LEASE_ERR_EVENT(lease_ack_err); DECLARE_EVENT_CLASS(smb3_connect_class, TP_PROTO(char *hostname, From 91be128b496c0de60a7dceb70d34935a29d38bbd Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Tue, 2 Sep 2025 15:28:57 +0530 Subject: [PATCH 1090/1307] smb: client: show negotiated cipher in DebugData Print the negotiated encryption cipher type in DebugData Signed-off-by: Bharath SM Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index edb2e7f7fc23..2337cf795db3 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -347,6 +347,22 @@ static __always_inline const char *compression_alg_str(__le16 alg) } } +static __always_inline const char *cipher_alg_str(__le16 cipher) +{ + switch (cipher) { + case SMB2_ENCRYPTION_AES128_CCM: + return "AES128-CCM"; + case SMB2_ENCRYPTION_AES128_GCM: + return "AES128-GCM"; + case SMB2_ENCRYPTION_AES256_CCM: + return "AES256-CCM"; + case SMB2_ENCRYPTION_AES256_GCM: + return "AES256-GCM"; + default: + return "UNKNOWN"; + } +} + static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct mid_q_entry *mid_entry; @@ -539,6 +555,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) else seq_puts(m, "disabled (not supported by this server)"); + /* Show negotiated encryption cipher, even if not required */ + seq_puts(m, "\nEncryption: "); + if (server->cipher_type) + seq_printf(m, "Negotiated cipher (%s)", cipher_alg_str(server->cipher_type)); + seq_printf(m, "\n\n\tSessions: "); i = 0; list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { @@ -576,12 +597,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) /* dump session id helpful for use with network trace */ seq_printf(m, " SessionId: 0x%llx", ses->Suid); - if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) { + if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) seq_puts(m, " encrypted"); - /* can help in debugging to show encryption type */ - if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM) - seq_puts(m, "(gcm256)"); - } if (ses->sign) seq_puts(m, " signed"); From 827733acbe4c3a0e117b6ebde5ed269fb7686427 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Sep 2025 09:50:11 -0700 Subject: [PATCH 1091/1307] crypto: sha1 - Implement export_core() and import_core() Since commit 9d7a0ab1c753 ("crypto: ahash - Handle partial blocks in API"), the recently-added export_core() and import_core() methods in struct shash_alg have effectively become mandatory (even though it is not tested or enforced), since legacy drivers that need a fallback depend on them. Make crypto/sha1.c compatible with these legacy drivers by adding export_core() and import_core() methods to it. Reported-by: Giovanni Cabiddu Reported-by: Ovidiu Panait Closes: https://lore.kernel.org/r/aLSnCc9Ws5L9y+8X@gcabiddu-mobl.ger.corp.intel.com Fixes: b10a74abcfc5 ("crypto: sha1 - Use same state format as legacy drivers") Tested-by: Giovanni Cabiddu Tested-by: Ovidiu Panait Link: https://lore.kernel.org/r/20250901165013.48649-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- crypto/sha1.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/crypto/sha1.c b/crypto/sha1.c index ecef4bf2d9c0..4fbf61cf0370 100644 --- a/crypto/sha1.c +++ b/crypto/sha1.c @@ -49,6 +49,18 @@ static int __crypto_sha1_import(struct sha1_ctx *ctx, const void *in) return 0; } +static int __crypto_sha1_export_core(const struct sha1_ctx *ctx, void *out) +{ + memcpy(out, ctx, offsetof(struct sha1_ctx, buf)); + return 0; +} + +static int __crypto_sha1_import_core(struct sha1_ctx *ctx, const void *in) +{ + memcpy(ctx, in, offsetof(struct sha1_ctx, buf)); + return 0; +} + const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE] = { 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, @@ -94,6 +106,16 @@ static int crypto_sha1_import(struct shash_desc *desc, const void *in) return __crypto_sha1_import(SHA1_CTX(desc), in); } +static int crypto_sha1_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha1_export_core(SHA1_CTX(desc), out); +} + +static int crypto_sha1_import_core(struct shash_desc *desc, const void *in) +{ + return __crypto_sha1_import_core(SHA1_CTX(desc), in); +} + #define HMAC_SHA1_KEY(tfm) ((struct hmac_sha1_key *)crypto_shash_ctx(tfm)) #define HMAC_SHA1_CTX(desc) ((struct hmac_sha1_ctx *)shash_desc_ctx(desc)) @@ -143,6 +165,19 @@ static int crypto_hmac_sha1_import(struct shash_desc *desc, const void *in) return __crypto_sha1_import(&ctx->sha_ctx, in); } +static int crypto_hmac_sha1_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha1_export_core(&HMAC_SHA1_CTX(desc)->sha_ctx, out); +} + +static int crypto_hmac_sha1_import_core(struct shash_desc *desc, const void *in) +{ + struct hmac_sha1_ctx *ctx = HMAC_SHA1_CTX(desc); + + ctx->ostate = HMAC_SHA1_KEY(desc->tfm)->ostate; + return __crypto_sha1_import_core(&ctx->sha_ctx, in); +} + static struct shash_alg algs[] = { { .base.cra_name = "sha1", @@ -157,6 +192,8 @@ static struct shash_alg algs[] = { .digest = crypto_sha1_digest, .export = crypto_sha1_export, .import = crypto_sha1_import, + .export_core = crypto_sha1_export_core, + .import_core = crypto_sha1_import_core, .descsize = sizeof(struct sha1_ctx), .statesize = SHA1_SHASH_STATE_SIZE, }, @@ -175,6 +212,8 @@ static struct shash_alg algs[] = { .digest = crypto_hmac_sha1_digest, .export = crypto_hmac_sha1_export, .import = crypto_hmac_sha1_import, + .export_core = crypto_hmac_sha1_export_core, + .import_core = crypto_hmac_sha1_import_core, .descsize = sizeof(struct hmac_sha1_ctx), .statesize = SHA1_SHASH_STATE_SIZE, }, From 30b2a8c4f2f3833f4f813d3e0d003f7c50cdf275 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Sep 2025 09:50:12 -0700 Subject: [PATCH 1092/1307] crypto: sha256 - Implement export_core() and import_core() Since commit 9d7a0ab1c753 ("crypto: ahash - Handle partial blocks in API"), the recently-added export_core() and import_core() methods in struct shash_alg have effectively become mandatory (even though it is not tested or enforced), since legacy drivers that need a fallback depend on them. Make crypto/sha256.c compatible with these legacy drivers by adding export_core() and import_core() methods to it. Reported-by: Giovanni Cabiddu Reported-by: Ovidiu Panait Closes: https://lore.kernel.org/r/aLSnCc9Ws5L9y+8X@gcabiddu-mobl.ger.corp.intel.com Fixes: 07f090959bba ("crypto: sha256 - Use same state format as legacy drivers") Tested-by: Giovanni Cabiddu Tested-by: Ovidiu Panait Link: https://lore.kernel.org/r/20250901165013.48649-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- crypto/sha256.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/crypto/sha256.c b/crypto/sha256.c index 052806559f06..fb81defe084c 100644 --- a/crypto/sha256.c +++ b/crypto/sha256.c @@ -50,6 +50,19 @@ static int __crypto_sha256_import(struct __sha256_ctx *ctx, const void *in) return 0; } +static int __crypto_sha256_export_core(const struct __sha256_ctx *ctx, + void *out) +{ + memcpy(out, ctx, offsetof(struct __sha256_ctx, buf)); + return 0; +} + +static int __crypto_sha256_import_core(struct __sha256_ctx *ctx, const void *in) +{ + memcpy(ctx, in, offsetof(struct __sha256_ctx, buf)); + return 0; +} + /* SHA-224 */ const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE] = { @@ -98,6 +111,16 @@ static int crypto_sha224_import(struct shash_desc *desc, const void *in) return __crypto_sha256_import(&SHA224_CTX(desc)->ctx, in); } +static int crypto_sha224_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha256_export_core(&SHA224_CTX(desc)->ctx, out); +} + +static int crypto_sha224_import_core(struct shash_desc *desc, const void *in) +{ + return __crypto_sha256_import_core(&SHA224_CTX(desc)->ctx, in); +} + /* SHA-256 */ const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE] = { @@ -146,6 +169,16 @@ static int crypto_sha256_import(struct shash_desc *desc, const void *in) return __crypto_sha256_import(&SHA256_CTX(desc)->ctx, in); } +static int crypto_sha256_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha256_export_core(&SHA256_CTX(desc)->ctx, out); +} + +static int crypto_sha256_import_core(struct shash_desc *desc, const void *in) +{ + return __crypto_sha256_import_core(&SHA256_CTX(desc)->ctx, in); +} + /* HMAC-SHA224 */ #define HMAC_SHA224_KEY(tfm) ((struct hmac_sha224_key *)crypto_shash_ctx(tfm)) @@ -198,6 +231,21 @@ static int crypto_hmac_sha224_import(struct shash_desc *desc, const void *in) return __crypto_sha256_import(&ctx->ctx.sha_ctx, in); } +static int crypto_hmac_sha224_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha256_export_core(&HMAC_SHA224_CTX(desc)->ctx.sha_ctx, + out); +} + +static int crypto_hmac_sha224_import_core(struct shash_desc *desc, + const void *in) +{ + struct hmac_sha224_ctx *ctx = HMAC_SHA224_CTX(desc); + + ctx->ctx.ostate = HMAC_SHA224_KEY(desc->tfm)->key.ostate; + return __crypto_sha256_import_core(&ctx->ctx.sha_ctx, in); +} + /* HMAC-SHA256 */ #define HMAC_SHA256_KEY(tfm) ((struct hmac_sha256_key *)crypto_shash_ctx(tfm)) @@ -250,6 +298,21 @@ static int crypto_hmac_sha256_import(struct shash_desc *desc, const void *in) return __crypto_sha256_import(&ctx->ctx.sha_ctx, in); } +static int crypto_hmac_sha256_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha256_export_core(&HMAC_SHA256_CTX(desc)->ctx.sha_ctx, + out); +} + +static int crypto_hmac_sha256_import_core(struct shash_desc *desc, + const void *in) +{ + struct hmac_sha256_ctx *ctx = HMAC_SHA256_CTX(desc); + + ctx->ctx.ostate = HMAC_SHA256_KEY(desc->tfm)->key.ostate; + return __crypto_sha256_import_core(&ctx->ctx.sha_ctx, in); +} + /* Algorithm definitions */ static struct shash_alg algs[] = { @@ -266,6 +329,8 @@ static struct shash_alg algs[] = { .digest = crypto_sha224_digest, .export = crypto_sha224_export, .import = crypto_sha224_import, + .export_core = crypto_sha224_export_core, + .import_core = crypto_sha224_import_core, .descsize = sizeof(struct sha224_ctx), .statesize = SHA256_SHASH_STATE_SIZE, }, @@ -282,6 +347,8 @@ static struct shash_alg algs[] = { .digest = crypto_sha256_digest, .export = crypto_sha256_export, .import = crypto_sha256_import, + .export_core = crypto_sha256_export_core, + .import_core = crypto_sha256_import_core, .descsize = sizeof(struct sha256_ctx), .statesize = SHA256_SHASH_STATE_SIZE, }, @@ -300,6 +367,8 @@ static struct shash_alg algs[] = { .digest = crypto_hmac_sha224_digest, .export = crypto_hmac_sha224_export, .import = crypto_hmac_sha224_import, + .export_core = crypto_hmac_sha224_export_core, + .import_core = crypto_hmac_sha224_import_core, .descsize = sizeof(struct hmac_sha224_ctx), .statesize = SHA256_SHASH_STATE_SIZE, }, @@ -318,6 +387,8 @@ static struct shash_alg algs[] = { .digest = crypto_hmac_sha256_digest, .export = crypto_hmac_sha256_export, .import = crypto_hmac_sha256_import, + .export_core = crypto_hmac_sha256_export_core, + .import_core = crypto_hmac_sha256_import_core, .descsize = sizeof(struct hmac_sha256_ctx), .statesize = SHA256_SHASH_STATE_SIZE, }, From cdb03b6d1896c2d23f9c47dc779edba0a9241115 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Sep 2025 09:50:13 -0700 Subject: [PATCH 1093/1307] crypto: sha512 - Implement export_core() and import_core() Since commit 9d7a0ab1c753 ("crypto: ahash - Handle partial blocks in API"), the recently-added export_core() and import_core() methods in struct shash_alg have effectively become mandatory (even though it is not tested or enforced), since legacy drivers that need a fallback depend on them. Make crypto/sha512.c compatible with these legacy drivers by adding export_core() and import_core() methods to it. Reported-by: Giovanni Cabiddu Reported-by: Ovidiu Panait Closes: https://lore.kernel.org/r/aLSnCc9Ws5L9y+8X@gcabiddu-mobl.ger.corp.intel.com Fixes: 4bc7f7b687a2 ("crypto: sha512 - Use same state format as legacy drivers") Tested-by: Giovanni Cabiddu Tested-by: Ovidiu Panait Link: https://lore.kernel.org/r/20250901165013.48649-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- crypto/sha512.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/crypto/sha512.c b/crypto/sha512.c index fb1c520978ef..d320fe53913f 100644 --- a/crypto/sha512.c +++ b/crypto/sha512.c @@ -50,6 +50,19 @@ static int __crypto_sha512_import(struct __sha512_ctx *ctx, const void *in) return 0; } +static int __crypto_sha512_export_core(const struct __sha512_ctx *ctx, + void *out) +{ + memcpy(out, ctx, offsetof(struct __sha512_ctx, buf)); + return 0; +} + +static int __crypto_sha512_import_core(struct __sha512_ctx *ctx, const void *in) +{ + memcpy(ctx, in, offsetof(struct __sha512_ctx, buf)); + return 0; +} + /* SHA-384 */ const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE] = { @@ -100,6 +113,16 @@ static int crypto_sha384_import(struct shash_desc *desc, const void *in) return __crypto_sha512_import(&SHA384_CTX(desc)->ctx, in); } +static int crypto_sha384_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha512_export_core(&SHA384_CTX(desc)->ctx, out); +} + +static int crypto_sha384_import_core(struct shash_desc *desc, const void *in) +{ + return __crypto_sha512_import_core(&SHA384_CTX(desc)->ctx, in); +} + /* SHA-512 */ const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE] = { @@ -152,6 +175,16 @@ static int crypto_sha512_import(struct shash_desc *desc, const void *in) return __crypto_sha512_import(&SHA512_CTX(desc)->ctx, in); } +static int crypto_sha512_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha512_export_core(&SHA512_CTX(desc)->ctx, out); +} + +static int crypto_sha512_import_core(struct shash_desc *desc, const void *in) +{ + return __crypto_sha512_import_core(&SHA512_CTX(desc)->ctx, in); +} + /* HMAC-SHA384 */ #define HMAC_SHA384_KEY(tfm) ((struct hmac_sha384_key *)crypto_shash_ctx(tfm)) @@ -204,6 +237,21 @@ static int crypto_hmac_sha384_import(struct shash_desc *desc, const void *in) return __crypto_sha512_import(&ctx->ctx.sha_ctx, in); } +static int crypto_hmac_sha384_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha512_export_core(&HMAC_SHA384_CTX(desc)->ctx.sha_ctx, + out); +} + +static int crypto_hmac_sha384_import_core(struct shash_desc *desc, + const void *in) +{ + struct hmac_sha384_ctx *ctx = HMAC_SHA384_CTX(desc); + + ctx->ctx.ostate = HMAC_SHA384_KEY(desc->tfm)->key.ostate; + return __crypto_sha512_import_core(&ctx->ctx.sha_ctx, in); +} + /* HMAC-SHA512 */ #define HMAC_SHA512_KEY(tfm) ((struct hmac_sha512_key *)crypto_shash_ctx(tfm)) @@ -256,6 +304,21 @@ static int crypto_hmac_sha512_import(struct shash_desc *desc, const void *in) return __crypto_sha512_import(&ctx->ctx.sha_ctx, in); } +static int crypto_hmac_sha512_export_core(struct shash_desc *desc, void *out) +{ + return __crypto_sha512_export_core(&HMAC_SHA512_CTX(desc)->ctx.sha_ctx, + out); +} + +static int crypto_hmac_sha512_import_core(struct shash_desc *desc, + const void *in) +{ + struct hmac_sha512_ctx *ctx = HMAC_SHA512_CTX(desc); + + ctx->ctx.ostate = HMAC_SHA512_KEY(desc->tfm)->key.ostate; + return __crypto_sha512_import_core(&ctx->ctx.sha_ctx, in); +} + /* Algorithm definitions */ static struct shash_alg algs[] = { @@ -272,6 +335,8 @@ static struct shash_alg algs[] = { .digest = crypto_sha384_digest, .export = crypto_sha384_export, .import = crypto_sha384_import, + .export_core = crypto_sha384_export_core, + .import_core = crypto_sha384_import_core, .descsize = sizeof(struct sha384_ctx), .statesize = SHA512_SHASH_STATE_SIZE, }, @@ -288,6 +353,8 @@ static struct shash_alg algs[] = { .digest = crypto_sha512_digest, .export = crypto_sha512_export, .import = crypto_sha512_import, + .export_core = crypto_sha512_export_core, + .import_core = crypto_sha512_import_core, .descsize = sizeof(struct sha512_ctx), .statesize = SHA512_SHASH_STATE_SIZE, }, @@ -306,6 +373,8 @@ static struct shash_alg algs[] = { .digest = crypto_hmac_sha384_digest, .export = crypto_hmac_sha384_export, .import = crypto_hmac_sha384_import, + .export_core = crypto_hmac_sha384_export_core, + .import_core = crypto_hmac_sha384_import_core, .descsize = sizeof(struct hmac_sha384_ctx), .statesize = SHA512_SHASH_STATE_SIZE, }, @@ -324,6 +393,8 @@ static struct shash_alg algs[] = { .digest = crypto_hmac_sha512_digest, .export = crypto_hmac_sha512_export, .import = crypto_hmac_sha512_import, + .export_core = crypto_hmac_sha512_export_core, + .import_core = crypto_hmac_sha512_import_core, .descsize = sizeof(struct hmac_sha512_ctx), .statesize = SHA512_SHASH_STATE_SIZE, }, From f8f15f6742b8874e59c9c715d0af3474608310ad Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Aug 2025 15:48:28 +0300 Subject: [PATCH 1094/1307] wifi: cw1200: cap SSID length in cw1200_do_join() If the ssidie[1] length is more that 32 it leads to memory corruption. Fixes: a910e4a94f69 ("cw1200: add driver for the ST-E CW1100 & CW1200 WLAN chipsets") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/e91fb43fcedc4893b604dfb973131661510901a7.1756456951.git.dan.carpenter@linaro.org Signed-off-by: Johannes Berg --- drivers/net/wireless/st/cw1200/sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index b1dd76e8aecb..5d8eaa700779 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -1291,7 +1291,7 @@ static void cw1200_do_join(struct cw1200_common *priv) rcu_read_lock(); ssidie = ieee80211_bss_get_ie(bss, WLAN_EID_SSID); if (ssidie) { - join.ssid_len = ssidie[1]; + join.ssid_len = min(ssidie[1], IEEE80211_MAX_SSID_LEN); memcpy(join.ssid, &ssidie[2], join.ssid_len); } rcu_read_unlock(); From c786794bd27b0d7a5fd9063695df83206009be59 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Aug 2025 15:48:35 +0300 Subject: [PATCH 1095/1307] wifi: libertas: cap SSID len in lbs_associate() If the ssid_eid[1] length is more that 32 it leads to memory corruption. Fixes: a910e4a94f69 ("cw1200: add driver for the ST-E CW1100 & CW1200 WLAN chipsets") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/2a40f5ec7617144aef412034c12919a4927d90ad.1756456951.git.dan.carpenter@linaro.org Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas/cfg.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 94dd488becaf..caba7491cd5a 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1151,10 +1151,13 @@ static int lbs_associate(struct lbs_private *priv, /* add SSID TLV */ rcu_read_lock(); ssid_eid = ieee80211_bss_get_ie(bss, WLAN_EID_SSID); - if (ssid_eid) - pos += lbs_add_ssid_tlv(pos, ssid_eid + 2, ssid_eid[1]); - else + if (ssid_eid) { + u32 ssid_len = min(ssid_eid[1], IEEE80211_MAX_SSID_LEN); + + pos += lbs_add_ssid_tlv(pos, ssid_eid + 2, ssid_len); + } else { lbs_deb_assoc("no SSID\n"); + } rcu_read_unlock(); /* add DS param TLV */ From 62b635dcd69c4fde7ce1de4992d71420a37e51e3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Aug 2025 15:48:45 +0300 Subject: [PATCH 1096/1307] wifi: cfg80211: sme: cap SSID length in __cfg80211_connect_result() If the ssid->datalen is more than IEEE80211_MAX_SSID_LEN (32) it would lead to memory corruption so add some bounds checking. Fixes: c38c70185101 ("wifi: cfg80211: Set SSID if it is not already set") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/0aaaae4a3ed37c6252363c34ae4904b1604e8e32.1756456951.git.dan.carpenter@linaro.org Signed-off-by: Johannes Berg --- net/wireless/sme.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 826ec0a6355f..3a028ff287fb 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -900,13 +900,16 @@ void __cfg80211_connect_result(struct net_device *dev, if (!wdev->u.client.ssid_len) { rcu_read_lock(); for_each_valid_link(cr, link) { + u32 ssid_len; + ssid = ieee80211_bss_get_elem(cr->links[link].bss, WLAN_EID_SSID); if (!ssid || !ssid->datalen) continue; - memcpy(wdev->u.client.ssid, ssid->data, ssid->datalen); + ssid_len = min(ssid->datalen, IEEE80211_MAX_SSID_LEN); + memcpy(wdev->u.client.ssid, ssid->data, ssid_len); wdev->u.client.ssid_len = ssid->datalen; break; } From fe9e4d0c39311d0f97b024147a0d155333f388b5 Mon Sep 17 00:00:00 2001 From: "Ajay.Kathat@microchip.com" Date: Fri, 29 Aug 2025 22:58:43 +0000 Subject: [PATCH 1097/1307] wifi: wilc1000: avoid buffer overflow in WID string configuration Fix the following copy overflow warning identified by Smatch checker. drivers/net/wireless/microchip/wilc1000/wlan_cfg.c:184 wilc_wlan_parse_response_frame() error: '__memcpy()' 'cfg->s[i]->str' copy overflow (512 vs 65537) This patch introduces size check before accessing the memory buffer. The checks are base on the WID type of received data from the firmware. For WID string configuration, the size limit is determined by individual element size in 'struct wilc_cfg_str_vals' that is maintained in 'len' field of 'struct wilc_cfg_str'. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-wireless/aLFbr9Yu9j_TQTey@stanley.mountain Suggested-by: Dan Carpenter Signed-off-by: Ajay Singh Link: https://patch.msgid.link/20250829225829.5423-1-ajay.kathat@microchip.com Signed-off-by: Johannes Berg --- .../wireless/microchip/wilc1000/wlan_cfg.c | 39 +++++++++++++------ .../wireless/microchip/wilc1000/wlan_cfg.h | 5 ++- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/wlan_cfg.c b/drivers/net/wireless/microchip/wilc1000/wlan_cfg.c index 131388886acb..cfabd5aebb54 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan_cfg.c +++ b/drivers/net/wireless/microchip/wilc1000/wlan_cfg.c @@ -41,10 +41,10 @@ static const struct wilc_cfg_word g_cfg_word[] = { }; static const struct wilc_cfg_str g_cfg_str[] = { - {WID_FIRMWARE_VERSION, NULL}, - {WID_MAC_ADDR, NULL}, - {WID_ASSOC_RES_INFO, NULL}, - {WID_NIL, NULL} + {WID_FIRMWARE_VERSION, 0, NULL}, + {WID_MAC_ADDR, 0, NULL}, + {WID_ASSOC_RES_INFO, 0, NULL}, + {WID_NIL, 0, NULL} }; #define WILC_RESP_MSG_TYPE_CONFIG_REPLY 'R' @@ -147,44 +147,58 @@ static void wilc_wlan_parse_response_frame(struct wilc *wl, u8 *info, int size) switch (FIELD_GET(WILC_WID_TYPE, wid)) { case WID_CHAR: + len = 3; + if (len + 2 > size) + return; + while (cfg->b[i].id != WID_NIL && cfg->b[i].id != wid) i++; if (cfg->b[i].id == wid) cfg->b[i].val = info[4]; - len = 3; break; case WID_SHORT: + len = 4; + if (len + 2 > size) + return; + while (cfg->hw[i].id != WID_NIL && cfg->hw[i].id != wid) i++; if (cfg->hw[i].id == wid) cfg->hw[i].val = get_unaligned_le16(&info[4]); - len = 4; break; case WID_INT: + len = 6; + if (len + 2 > size) + return; + while (cfg->w[i].id != WID_NIL && cfg->w[i].id != wid) i++; if (cfg->w[i].id == wid) cfg->w[i].val = get_unaligned_le32(&info[4]); - len = 6; break; case WID_STR: + len = 2 + get_unaligned_le16(&info[2]); + while (cfg->s[i].id != WID_NIL && cfg->s[i].id != wid) i++; - if (cfg->s[i].id == wid) - memcpy(cfg->s[i].str, &info[2], - get_unaligned_le16(&info[2]) + 2); + if (cfg->s[i].id == wid) { + if (len > cfg->s[i].len || (len + 2 > size)) + return; + + memcpy(cfg->s[i].str, &info[2], + len); + } - len = 2 + get_unaligned_le16(&info[2]); break; default: @@ -384,12 +398,15 @@ int wilc_wlan_cfg_init(struct wilc *wl) /* store the string cfg parameters */ wl->cfg.s[i].id = WID_FIRMWARE_VERSION; wl->cfg.s[i].str = str_vals->firmware_version; + wl->cfg.s[i].len = sizeof(str_vals->firmware_version); i++; wl->cfg.s[i].id = WID_MAC_ADDR; wl->cfg.s[i].str = str_vals->mac_address; + wl->cfg.s[i].len = sizeof(str_vals->mac_address); i++; wl->cfg.s[i].id = WID_ASSOC_RES_INFO; wl->cfg.s[i].str = str_vals->assoc_rsp; + wl->cfg.s[i].len = sizeof(str_vals->assoc_rsp); i++; wl->cfg.s[i].id = WID_NIL; wl->cfg.s[i].str = NULL; diff --git a/drivers/net/wireless/microchip/wilc1000/wlan_cfg.h b/drivers/net/wireless/microchip/wilc1000/wlan_cfg.h index 7038b74f8e8f..5ae74bced7d7 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan_cfg.h +++ b/drivers/net/wireless/microchip/wilc1000/wlan_cfg.h @@ -24,12 +24,13 @@ struct wilc_cfg_word { struct wilc_cfg_str { u16 id; + u16 len; u8 *str; }; struct wilc_cfg_str_vals { - u8 mac_address[7]; - u8 firmware_version[129]; + u8 mac_address[8]; + u8 firmware_version[130]; u8 assoc_rsp[WILC_MAX_ASSOC_RESP_FRAME_SIZE]; }; From 18dbcbfabfffc4a5d3ea10290c5ad27f22b0d240 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 11 Aug 2025 11:26:44 -0700 Subject: [PATCH 1098/1307] perf: Fix the POLL_HUP delivery breakage The event_limit can be set by the PERF_EVENT_IOC_REFRESH to limit the number of events. When the event_limit reaches 0, the POLL_HUP signal should be sent. But it's missed. The corresponding counter should be stopped when the event_limit reaches 0. It was implemented in the ARCH-specific code. However, since the commit 9734e25fbf5a ("perf: Fix the throttle logic for a group"), all the ARCH-specific code has been moved to the generic code. The code to handle the event_limit was lost. Add the event->pmu->stop(event, 0); back. Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group") Closes: https://lore.kernel.org/lkml/aICYAqM5EQUlTqtX@li-2b55cdcc-350b-11b2-a85c-a78bff51fc11.ibm.com/ Reported-by: Sumanth Korikkar Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Sumanth Korikkar Link: https://lkml.kernel.org/r/20250811182644.1305952-1-kan.liang@linux.intel.com --- kernel/events/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 872122e074e5..820127536e62 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10330,6 +10330,7 @@ static int __perf_event_overflow(struct perf_event *event, ret = 1; event->pending_kill = POLL_HUP; perf_event_disable_inatomic(event); + event->pmu->stop(event, 0); } if (event->attr.sigtrap) { From 762af5a2aa0ad18da1316666dae30d369268d44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 25 Aug 2025 15:26:35 +0200 Subject: [PATCH 1099/1307] vdso/vsyscall: Avoid slow division loop in auxiliary clock update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to __iter_div_u64_rem() in vdso_time_update_aux() is a wrapper around subtraction. It cannot be used to divide large numbers, as that introduces long, computationally expensive delays. A regular u64 division is also not possible in the timekeeper update path as it can be too slow. Instead of splitting the ktime_t offset into into second and subsecond components during the timekeeper update fast-path, do it together with the adjustment of tk->offs_aux in the slow-path. Equivalent to the handling of offs_boot and monotonic_to_boot. Reuse the storage of monotonic_to_boot for the new field, as it is not used by auxiliary timekeepers. Fixes: 380b84e168e5 ("vdso/vsyscall: Update auxiliary clock data in the datapage") Reported-by: Miroslav Lichvar Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250825-vdso-auxclock-division-v1-1-a1d32a16a313@linutronix.de Closes: https://lore.kernel.org/lkml/aKwsNNWsHJg8IKzj@localhost/ --- include/linux/timekeeper_internal.h | 9 ++++++++- kernel/time/timekeeping.c | 10 ++++++++-- kernel/time/vsyscall.c | 4 ++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index c27aac67cb3f..b8ae89ea28ab 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -76,6 +76,7 @@ struct tk_read_base { * @cs_was_changed_seq: The sequence number of clocksource change events * @clock_valid: Indicator for valid clock * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset + * @monotonic_to_aux: CLOCK_MONOTONIC to CLOCK_AUX offset * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. @@ -117,6 +118,9 @@ struct tk_read_base { * @offs_aux is used by the auxiliary timekeepers which do not utilize any * of the regular timekeeper offset fields. * + * @monotonic_to_aux is a timespec64 representation of @offs_aux to + * accelerate the VDSO update for CLOCK_AUX. + * * The cacheline ordering of the structure is optimized for in kernel usage of * the ktime_get() and ktime_get_ts64() family of time accessors. Struct * timekeeper is prepended in the core timekeeping code with a sequence count, @@ -159,7 +163,10 @@ struct timekeeper { u8 cs_was_changed_seq; u8 clock_valid; - struct timespec64 monotonic_to_boot; + union { + struct timespec64 monotonic_to_boot; + struct timespec64 monotonic_to_aux; + }; u64 cycle_interval; u64 xtime_interval; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 059fa8b79be6..b6974fce800c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -83,6 +83,12 @@ static inline bool tk_is_aux(const struct timekeeper *tk) } #endif +static inline void tk_update_aux_offs(struct timekeeper *tk, ktime_t offs) +{ + tk->offs_aux = offs; + tk->monotonic_to_aux = ktime_to_timespec64(offs); +} + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -1506,7 +1512,7 @@ static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespe timekeeping_restore_shadow(tkd); return -EINVAL; } - tks->offs_aux = offs; + tk_update_aux_offs(tks, offs); } timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); @@ -2937,7 +2943,7 @@ static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) * xtime ("realtime") is not applicable for auxiliary clocks and * kept in sync with "monotonic". */ - aux_tks->offs_aux = ktime_sub(timespec64_to_ktime(*tnew), tnow); + tk_update_aux_offs(aux_tks, ktime_sub(timespec64_to_ktime(*tnew), tnow)); timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); return 0; diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 8ba8b0d8a387..aa59919b8f2c 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -159,10 +159,10 @@ void vdso_time_update_aux(struct timekeeper *tk) if (clock_mode != VDSO_CLOCKMODE_NONE) { fill_clock_configuration(vc, &tk->tkr_mono); - vdso_ts->sec = tk->xtime_sec; + vdso_ts->sec = tk->xtime_sec + tk->monotonic_to_aux.tv_sec; nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - nsec += tk->offs_aux; + nsec += tk->monotonic_to_aux.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); nsec = nsec << tk->tkr_mono.shift; vdso_ts->nsec = nsec; From 9a6d3ff10f7f538835cae4799562004ee46922c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Aug 2025 09:56:44 +0200 Subject: [PATCH 1100/1307] arm64: uapi: Provide correct __BITS_PER_LONG for the compat vDSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic vDSO library uses the UAPI headers. On arm64 __BITS_PER_LONG is always '64' even when used from the compat vDSO. In that case __GENMASK() does an illegal bitshift, invoking undefined behaviour. Change __BITS_PER_LONG to also work when used from the comapt vDSO. To not confuse real userspace, only do this when building the kernel. Reported-by: John Stultz Closes: https://lore.kernel.org/lkml/CANDhNCqvKOc9JgphQwr0eDyJiyG4oLFS9R8rSFvU0fpurrJFDg@mail.gmail.com/ Fixes: cd3557a7618b ("vdso/gettimeofday: Add support for auxiliary clocks") Signed-off-by: Thomas Weißschuh Acked-by: Arnd Bergmann Tested-by: John Stultz Link: https://lore.kernel.org/r/20250821-vdso-arm64-compat-bitsperlong-v1-1-700bcabe7732@linutronix.de Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/bitsperlong.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/include/uapi/asm/bitsperlong.h b/arch/arm64/include/uapi/asm/bitsperlong.h index 485d60bee26c..d59730975f30 100644 --- a/arch/arm64/include/uapi/asm/bitsperlong.h +++ b/arch/arm64/include/uapi/asm/bitsperlong.h @@ -17,7 +17,12 @@ #ifndef __ASM_BITSPERLONG_H #define __ASM_BITSPERLONG_H +#if defined(__KERNEL__) && !defined(__aarch64__) +/* Used by the compat vDSO */ +#define __BITS_PER_LONG 32 +#else #define __BITS_PER_LONG 64 +#endif #include From 1991a458528588ff34e98b6365362560d208710f Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 3 Sep 2025 13:56:24 +0200 Subject: [PATCH 1101/1307] spi: spi-qpic-snand: unregister ECC engine on probe error and device remove The on-host hardware ECC engine remains registered both when the spi_register_controller() function returns with an error and also on device removal. Change the qcom_spi_probe() function to unregister the engine on the error path, and add the missing unregistering call to qcom_spi_remove() to avoid possible use-after-free issues. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Message-ID: <20250903-qpic-snand-unregister-ecceng-v1-1-ef5387b0abdc@gmail.com> Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 0ceaad7dba3c..780abb967822 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -1615,11 +1615,13 @@ static int qcom_spi_probe(struct platform_device *pdev) ret = spi_register_controller(ctlr); if (ret) { dev_err(&pdev->dev, "spi_register_controller failed.\n"); - goto err_spi_init; + goto err_register_controller; } return 0; +err_register_controller: + nand_ecc_unregister_on_host_hw_engine(&snandc->qspi->ecc_eng); err_spi_init: qcom_nandc_unalloc(snandc); err_snand_alloc: @@ -1641,7 +1643,7 @@ static void qcom_spi_remove(struct platform_device *pdev) struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); spi_unregister_controller(ctlr); - + nand_ecc_unregister_on_host_hw_engine(&snandc->qspi->ecc_eng); qcom_nandc_unalloc(snandc); clk_disable_unprepare(snandc->aon_clk); From ba3319e5905710abe495b11a1aaf03ebb51d62e2 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Tue, 26 Aug 2025 00:27:47 -0500 Subject: [PATCH 1102/1307] cpufreq/amd-pstate: Fix a regression leading to EPP 0 after resume During the suspend sequence the cached CPPC request is destroyed with the expectation that it's restored during resume. This assumption broke when the separate cache EPP variable was removed, and then it was broken again by commit 608a76b65288 ("cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option") which explicitly set it to zero during suspend. Remove the invalidation and set the value during the suspend call to update limits so that the cached variable can be used to restore on resume. Fixes: 608a76b65288 ("cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option") Fixes: b7a41156588a ("cpufreq/amd-pstate: Invalidate cppc_req_cached during suspend") Reported-by: goldens Closes: https://community.frame.work/t/increased-power-usage-after-resuming-from-suspend-on-ryzen-7040-kernel-6-15-regression/ Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2391221 Tested-by: goldens Tested-by: Willian Wang Reported-by: Vincent Mauirn Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219981 Tested-by: Alex De Lorenzo Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250826052747.2240670-1-superm1@kernel.org Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 5cd91489fcbe..b4c79fde1979 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1628,13 +1628,14 @@ static int amd_pstate_suspend(struct cpufreq_policy *policy) * min_perf value across kexec reboots. If this CPU is just resumed back without kexec, * the limits, epp and desired perf will get reset to the cached values in cpudata struct */ - ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); + ret = amd_pstate_update_perf(policy, perf.bios_min_perf, + FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), + false); if (ret) return ret; - /* invalidate to ensure it's rewritten during resume */ - cpudata->cppc_req_cached = 0; - /* set this flag to avoid setting core offline*/ cpudata->suspended = true; From 5ebf512f335053a42482ebff91e46c6dc156bf8c Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Wed, 3 Sep 2025 16:48:32 +0100 Subject: [PATCH 1103/1307] sched: Fix sched_numa_find_nth_cpu() if mask offline sched_numa_find_nth_cpu() uses a bsearch to look for the 'closest' CPU in sched_domains_numa_masks and given cpus mask. However they might not intersect if all CPUs in the cpus mask are offline. bsearch will return NULL in that case, bail out instead of dereferencing a bogus pointer. The previous behaviour lead to this bug when using maxcpus=4 on an rk3399 (LLLLbb) (i.e. booting with all big CPUs offline): [ 1.422922] Unable to handle kernel paging request at virtual address ffffff8000000000 [ 1.423635] Mem abort info: [ 1.423889] ESR = 0x0000000096000006 [ 1.424227] EC = 0x25: DABT (current EL), IL = 32 bits [ 1.424715] SET = 0, FnV = 0 [ 1.424995] EA = 0, S1PTW = 0 [ 1.425279] FSC = 0x06: level 2 translation fault [ 1.425735] Data abort info: [ 1.425998] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 [ 1.426499] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 1.426952] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 1.427428] swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000004a9f000 [ 1.428038] [ffffff8000000000] pgd=18000000f7fff403, p4d=18000000f7fff403, pud=18000000f7fff403, pmd=0000000000000000 [ 1.429014] Internal error: Oops: 0000000096000006 [#1] SMP [ 1.429525] Modules linked in: [ 1.429813] CPU: 3 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.17.0-rc4-dirty #343 PREEMPT [ 1.430559] Hardware name: Pine64 RockPro64 v2.1 (DT) [ 1.431012] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1.431634] pc : sched_numa_find_nth_cpu+0x2a0/0x488 [ 1.432094] lr : sched_numa_find_nth_cpu+0x284/0x488 [ 1.432543] sp : ffffffc084e1b960 [ 1.432843] x29: ffffffc084e1b960 x28: ffffff80078a8800 x27: ffffffc0846eb1d0 [ 1.433495] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 [ 1.434144] x23: 0000000000000000 x22: fffffffffff7f093 x21: ffffffc081de6378 [ 1.434792] x20: 0000000000000000 x19: 0000000ffff7f093 x18: 00000000ffffffff [ 1.435441] x17: 3030303866666666 x16: 66663d736b73616d x15: ffffffc104e1b5b7 [ 1.436091] x14: 0000000000000000 x13: ffffffc084712860 x12: 0000000000000372 [ 1.436739] x11: 0000000000000126 x10: ffffffc08476a860 x9 : ffffffc084712860 [ 1.437389] x8 : 00000000ffffefff x7 : ffffffc08476a860 x6 : 0000000000000000 [ 1.438036] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000 [ 1.438683] x2 : 0000000000000000 x1 : ffffffc0846eb000 x0 : ffffff8000407b68 [ 1.439332] Call trace: [ 1.439559] sched_numa_find_nth_cpu+0x2a0/0x488 (P) [ 1.440016] smp_call_function_any+0xc8/0xd0 [ 1.440416] armv8_pmu_init+0x58/0x27c [ 1.440770] armv8_cortex_a72_pmu_init+0x20/0x2c [ 1.441199] arm_pmu_device_probe+0x1e4/0x5e8 [ 1.441603] armv8_pmu_device_probe+0x1c/0x28 [ 1.442007] platform_probe+0x5c/0xac [ 1.442347] really_probe+0xbc/0x298 [ 1.442683] __driver_probe_device+0x78/0x12c [ 1.443087] driver_probe_device+0xdc/0x160 [ 1.443475] __driver_attach+0x94/0x19c [ 1.443833] bus_for_each_dev+0x74/0xd4 [ 1.444190] driver_attach+0x24/0x30 [ 1.444525] bus_add_driver+0xe4/0x208 [ 1.444874] driver_register+0x60/0x128 [ 1.445233] __platform_driver_register+0x24/0x30 [ 1.445662] armv8_pmu_driver_init+0x28/0x4c [ 1.446059] do_one_initcall+0x44/0x25c [ 1.446416] kernel_init_freeable+0x1dc/0x3bc [ 1.446820] kernel_init+0x20/0x1d8 [ 1.447151] ret_from_fork+0x10/0x20 [ 1.447493] Code: 90022e21 f000e5f5 910de2b5 2a1703e2 (f8767803) [ 1.448040] ---[ end trace 0000000000000000 ]--- [ 1.448483] note: swapper/0[1] exited with preempt_count 1 [ 1.449047] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b [ 1.449741] SMP: stopping secondary CPUs [ 1.450105] Kernel Offset: disabled [ 1.450419] CPU features: 0x000000,00080000,20002001,0400421b [ 1.450935] Memory Limit: none [ 1.451217] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]--- Yury: with the fix, the function returns cpu == nr_cpu_ids, and later in smp_call_function_any -> smp_call_function_single -> generic_exec_single we test the cpu for '>= nr_cpu_ids' and return -ENXIO. So everything is handled correctly. Fixes: cd7f55359c90 ("sched: add sched_numa_find_nth_cpu()") Cc: stable@vger.kernel.org Signed-off-by: Christian Loehle Signed-off-by: Yury Norov (NVIDIA) --- kernel/sched/topology.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 977e133bb8a4..6e2f54169e66 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2201,6 +2201,8 @@ int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) goto unlock; hop_masks = bsearch(&k, k.masks, sched_domains_numa_levels, sizeof(k.masks[0]), hop_cmp); + if (!hop_masks) + goto unlock; hop = hop_masks - k.masks; ret = hop ? From d302effafae5a6a632bf581ced97ef91c95a2ac6 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Wed, 13 Aug 2025 19:44:38 +0200 Subject: [PATCH 1104/1307] ARM: at91: select ARCH_MICROCHIP Like with the ARM64 Microchip platforms, lets add a generic ARCH_MICROCHIP symbol and select it so that drivers that are reused for multiple product generation or lines, can just depend on it instead of adding each SoC symbol as their dependencies. Signed-off-by: Robert Marko Acked-by: Nicolas Ferre Acked-by: Daniel Machon Link: https://lore.kernel.org/r/20250813174720.540015-3-robert.marko@sartura.hr Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre --- arch/arm/mach-at91/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index 04bd91c72521..c5ef27e3cd8f 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only +config ARCH_MICROCHIP + bool + menuconfig ARCH_AT91 bool "AT91/Microchip SoCs" depends on (CPU_LITTLE_ENDIAN && (ARCH_MULTI_V4T || ARCH_MULTI_V5)) || \ @@ -8,6 +11,7 @@ menuconfig ARCH_AT91 select GPIOLIB select PINCTRL select SOC_BUS + select ARCH_MICROCHIP if ARCH_AT91 config SOC_SAMV7 From 217efb440933bf97a78ef328b211d8a39f4ff171 Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Tue, 19 Aug 2025 10:05:24 -0700 Subject: [PATCH 1105/1307] ARM: dts: microchip: sama7d65: Force SDMMC Legacy mode The SDMMC in this IP currently only supports legacy mode due to a hardware quirk, setting the flags to reflect the limitation. Fixes: deaa14ab6b06 ("ARM: dts: microchip: add support for sama7d65_curiosity board") Signed-off-by: Ryan Wanner Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20250819170528.126010-1-Ryan.Wanner@microchip.com Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/microchip/at91-sama7d65_curiosity.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/microchip/at91-sama7d65_curiosity.dts b/arch/arm/boot/dts/microchip/at91-sama7d65_curiosity.dts index 7eaf6ca233ec..d086437f5e6f 100644 --- a/arch/arm/boot/dts/microchip/at91-sama7d65_curiosity.dts +++ b/arch/arm/boot/dts/microchip/at91-sama7d65_curiosity.dts @@ -387,6 +387,8 @@ &rtt { &sdmmc1 { bus-width = <4>; + no-1-8-v; + sdhci-caps-mask = <0x0 0x00200000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sdmmc1_default>; status = "okay"; From 666d2206f1ee8a4f21ffbec438381a524a62815b Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 3 Sep 2025 16:15:26 +0100 Subject: [PATCH 1106/1307] perf tests: Fix "PE file support" test build filename__read_build_id() now takes a blocking/non-blocking argument. The original behavior of filename__read_build_id() was blocking so add block=true to fix the build. Fixes: 2c369d91d093 ("perf symbol: Add blocking argument to filename__read_build_id") Signed-off-by: James Clark Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20250903-james-perf-read-build-id-fix-v1-1-6a694d0a980f@linaro.org Signed-off-by: Namhyung Kim --- tools/perf/tests/pe-file-parsing.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c index 30c7da79e109..8b31d1d05f90 100644 --- a/tools/perf/tests/pe-file-parsing.c +++ b/tools/perf/tests/pe-file-parsing.c @@ -37,7 +37,7 @@ static int run_dir(const char *d) size_t idx; scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d); - ret = filename__read_build_id(filename, &bid); + ret = filename__read_build_id(filename, &bid, /*block=*/true); TEST_ASSERT_VAL("Failed to read build_id", ret == sizeof(expect_build_id)); TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, @@ -49,7 +49,7 @@ static int run_dir(const char *d) !strcmp(debuglink, expect_debuglink)); scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink); - ret = filename__read_build_id(debugfile, &bid); + ret = filename__read_build_id(debugfile, &bid, /*block=*/true); TEST_ASSERT_VAL("Failed to read debug file build_id", ret == sizeof(expect_build_id)); TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, From 467e00b30dfe75c4cfc2197ceef1fddca06adc25 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 2 Sep 2025 13:40:50 +0100 Subject: [PATCH 1107/1307] drm/amd/amdgpu: Fix missing error return on kzalloc failure Currently the kzalloc failure check just sets reports the failure and sets the variable ret to -ENOMEM, which is not checked later for this specific error. Fix this by just returning -ENOMEM rather than setting ret. Fixes: 4fb930715468 ("drm/amd/amdgpu: remove redundant host to psp cmd buf allocations") Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher (cherry picked from commit 1ee9d1a0962c13ba5ab7e47d33a80e3b8dc4b52e) --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 23484317a5fa..693357caa9a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -448,7 +448,7 @@ static int psp_sw_init(struct amdgpu_ip_block *ip_block) psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) { dev_err(adev->dev, "Failed to allocate memory to command buffer!\n"); - ret = -ENOMEM; + return -ENOMEM; } adev->psp.xgmi_context.supports_extended_data = From 4540f1d23e7f387880ce46d11b5cd3f27248bf8d Mon Sep 17 00:00:00 2001 From: Stanislav Fort Date: Tue, 2 Sep 2025 14:00:49 +0300 Subject: [PATCH 1108/1307] audit: fix out-of-bounds read in audit_compare_dname_path() When a watch on dir=/ is combined with an fsnotify event for a single-character name directly under / (e.g., creating /a), an out-of-bounds read can occur in audit_compare_dname_path(). The helper parent_len() returns 1 for "/". In audit_compare_dname_path(), when parentlen equals the full path length (1), the code sets p = path + 1 and pathlen = 1 - 1 = 0. The subsequent loop then dereferences p[pathlen - 1] (i.e., p[-1]), causing an out-of-bounds read. Fix this by adding a pathlen > 0 check to the while loop condition to prevent the out-of-bounds access. Cc: stable@vger.kernel.org Fixes: e92eebb0d611 ("audit: fix suffixed '/' filename matching") Reported-by: Stanislav Fort Suggested-by: Linus Torvalds Signed-off-by: Stanislav Fort [PM: subject tweak, sign-off email fixes] Signed-off-by: Paul Moore --- kernel/auditfilter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index e3f42018ed46..f7708fe2c457 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1326,7 +1326,7 @@ int audit_compare_dname_path(const struct qstr *dname, const char *path, int par /* handle trailing slashes */ pathlen -= parentlen; - while (p[pathlen - 1] == '/') + while (pathlen > 0 && p[pathlen - 1] == '/') pathlen--; if (pathlen != dlen) From d5067034725b1a0b2c785cea9cfce68776a94042 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 1 Sep 2025 10:31:08 +0200 Subject: [PATCH 1109/1307] Revert "drm/nouveau: Remove waitque for sched teardown" This reverts: commit bead88002227 ("drm/nouveau: Remove waitque for sched teardown") commit 5f46f5c7af8c ("drm/nouveau: Add new callback for scheduler teardown") from the drm/sched teardown leak fix series: https://lore.kernel.org/dri-devel/20250710125412.128476-2-phasta@kernel.org/ The aforementioned series removed a blocking waitqueue from nouveau_sched_fini(). It was mistakenly assumed that this waitqueue only prevents jobs from leaking, which the series fixed. The waitqueue, however, also guarantees that all VM_BIND related jobs are finished in order, cleaning up mappings in the GPU's MMU. These jobs must be executed sequentially. Without the waitqueue, this is no longer guaranteed, because entity and scheduler teardown can race with each other. Revert all patches related to the waitqueue removal. Fixes: bead88002227 ("drm/nouveau: Remove waitque for sched teardown") Suggested-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250901083107.10206-2-phasta@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_fence.c | 15 ----------- drivers/gpu/drm/nouveau/nouveau_fence.h | 1 - drivers/gpu/drm/nouveau/nouveau_sched.c | 35 ++++++++++--------------- drivers/gpu/drm/nouveau/nouveau_sched.h | 9 ++++--- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 8 +++--- 5 files changed, 24 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 9f345a008717..869d4335c0f4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -240,21 +240,6 @@ nouveau_fence_emit(struct nouveau_fence *fence) return ret; } -void -nouveau_fence_cancel(struct nouveau_fence *fence) -{ - struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - unsigned long flags; - - spin_lock_irqsave(&fctx->lock, flags); - if (!dma_fence_is_signaled_locked(&fence->base)) { - dma_fence_set_error(&fence->base, -ECANCELED); - if (nouveau_fence_signal(fence)) - nvif_event_block(&fctx->event); - } - spin_unlock_irqrestore(&fctx->lock, flags); -} - bool nouveau_fence_done(struct nouveau_fence *fence) { diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 9957a919bd38..183dd43ecfff 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -29,7 +29,6 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); -void nouveau_fence_cancel(struct nouveau_fence *fence); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 0cc0bc9f9952..e60f7892f5ce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -11,7 +11,6 @@ #include "nouveau_exec.h" #include "nouveau_abi16.h" #include "nouveau_sched.h" -#include "nouveau_chan.h" #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 @@ -122,9 +121,11 @@ nouveau_job_done(struct nouveau_job *job) { struct nouveau_sched *sched = job->sched; - spin_lock(&sched->job_list.lock); + spin_lock(&sched->job.list.lock); list_del(&job->entry); - spin_unlock(&sched->job_list.lock); + spin_unlock(&sched->job.list.lock); + + wake_up(&sched->job.wq); } void @@ -305,9 +306,9 @@ nouveau_job_submit(struct nouveau_job *job) } /* Submit was successful; add the job to the schedulers job list. */ - spin_lock(&sched->job_list.lock); - list_add(&job->entry, &sched->job_list.head); - spin_unlock(&sched->job_list.lock); + spin_lock(&sched->job.list.lock); + list_add(&job->entry, &sched->job.list.head); + spin_unlock(&sched->job.list.lock); drm_sched_job_arm(&job->base); job->done_fence = dma_fence_get(&job->base.s_fence->finished); @@ -392,23 +393,10 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job) nouveau_job_fini(job); } -static void -nouveau_sched_cancel_job(struct drm_sched_job *sched_job) -{ - struct nouveau_fence *fence; - struct nouveau_job *job; - - job = to_nouveau_job(sched_job); - fence = to_nouveau_fence(job->done_fence); - - nouveau_fence_cancel(fence); -} - static const struct drm_sched_backend_ops nouveau_sched_ops = { .run_job = nouveau_sched_run_job, .timedout_job = nouveau_sched_timedout_job, .free_job = nouveau_sched_free_job, - .cancel_job = nouveau_sched_cancel_job, }; static int @@ -458,8 +446,9 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, goto fail_sched; mutex_init(&sched->mutex); - spin_lock_init(&sched->job_list.lock); - INIT_LIST_HEAD(&sched->job_list.head); + spin_lock_init(&sched->job.list.lock); + INIT_LIST_HEAD(&sched->job.list.head); + init_waitqueue_head(&sched->job.wq); return 0; @@ -493,12 +482,16 @@ nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, return 0; } + static void nouveau_sched_fini(struct nouveau_sched *sched) { struct drm_gpu_scheduler *drm_sched = &sched->base; struct drm_sched_entity *entity = &sched->entity; + rmb(); /* for list_empty to work without lock */ + wait_event(sched->job.wq, list_empty(&sched->job.list.head)); + drm_sched_entity_fini(entity); drm_sched_fini(drm_sched); diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index b98c3f0bef30..20cd1da8db73 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -103,9 +103,12 @@ struct nouveau_sched { struct mutex mutex; struct { - struct list_head head; - spinlock_t lock; - } job_list; + struct { + struct list_head head; + spinlock_t lock; + } list; + struct wait_queue_head wq; + } job; }; int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index ddfc46bc1b3e..48f105239f42 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1019,8 +1019,8 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) u64 end = addr + range; again: - spin_lock(&sched->job_list.lock); - list_for_each_entry(__job, &sched->job_list.head, entry) { + spin_lock(&sched->job.list.lock); + list_for_each_entry(__job, &sched->job.list.head, entry) { struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job); list_for_each_op(op, &bind_job->ops) { @@ -1030,7 +1030,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) if (!(end <= op_addr || addr >= op_end)) { nouveau_uvmm_bind_job_get(bind_job); - spin_unlock(&sched->job_list.lock); + spin_unlock(&sched->job.list.lock); wait_for_completion(&bind_job->complete); nouveau_uvmm_bind_job_put(bind_job); goto again; @@ -1038,7 +1038,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) } } } - spin_unlock(&sched->job_list.lock); + spin_unlock(&sched->job.list.lock); } static int From b4ada0618eed0fbd1b1630f73deb048c592b06a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Tue, 2 Sep 2025 15:59:59 +0000 Subject: [PATCH 1110/1307] tools: ynl-gen: fix nested array counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The blamed commit introduced the concept of split attribute counting, and later allocating an array to hold them, however TypeArrayNest wasn't updated to use the new counting variable. Abbreviated example from tools/net/ynl/generated/nl80211-user.c: nl80211_if_combination_attributes_parse(...): unsigned int n_limits = 0; [...] ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len) if (type == NL80211_IFACE_COMB_LIMITS) ynl_attr_for_each_nested(attr2, attr) dst->_count.limits++; if (n_limits) { dst->_count.limits = n_limits; /* allocate and parse attributes */ } In the above example n_limits is guaranteed to always be 0, hence the conditional is unsatisfiable and is optimized out. This patch changes the attribute counting to use n_limits++ in the attribute counting loop in the above example. Fixes: 58da455b31ba ("tools: ynl-gen: improve unwind on parsing errors") Signed-off-by: Asbjørn Sloth Tønnesen Link: https://patch.msgid.link/20250902160001.760953-1-ast@fiberby.net Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index ef032e17fec4..eb295756c3bf 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -830,7 +830,7 @@ class TypeArrayNest(Type): 'ynl_attr_for_each_nested(attr2, attr) {', '\tif (ynl_attr_validate(yarg, attr2))', '\t\treturn YNL_PARSE_CB_ERROR;', - f'\t{var}->_count.{self.c_name}++;', + f'\tn_{self.c_name}++;', '}'] return get_lines, None, local_vars From cd6c956fbc13156bcbcca084b46a8380caebc2a8 Mon Sep 17 00:00:00 2001 From: Jonas Jelonek Date: Sun, 31 Aug 2025 10:04:46 +0000 Subject: [PATCH 1111/1307] i2c: rtl9300: fix channel number bound check Fix the current check for number of channels (child nodes in the device tree). Before, this was: if (device_get_child_node_count(dev) >= RTL9300_I2C_MUX_NCHAN) RTL9300_I2C_MUX_NCHAN gives the maximum number of channels so checking with '>=' isn't correct because it doesn't allow the last channel number. Thus, fix it to: if (device_get_child_node_count(dev) > RTL9300_I2C_MUX_NCHAN) Issue occured on a TP-Link TL-ST1008F v2.0 device (8 SFP+ ports) and fix is tested there. Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Jonas Jelonek Tested-by: Sven Eckelmann Reviewed-by: Chris Packham Tested-by: Chris Packham # On RTL9302C based board Tested-by: Markus Stockhausen Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250831100457.3114-2-jelonek.jonas@gmail.com --- drivers/i2c/busses/i2c-rtl9300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index cfafe089102a..1a63790f1957 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -353,7 +353,7 @@ static int rtl9300_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, i2c); - if (device_get_child_node_count(dev) >= RTL9300_I2C_MUX_NCHAN) + if (device_get_child_node_count(dev) > RTL9300_I2C_MUX_NCHAN) return dev_err_probe(dev, -EINVAL, "Too many channels\n"); device_for_each_child_node(dev, child) { From 06418cb5a1a542a003fdb4ad8e76ea542d57cfba Mon Sep 17 00:00:00 2001 From: Jonas Jelonek Date: Sun, 31 Aug 2025 10:04:47 +0000 Subject: [PATCH 1112/1307] i2c: rtl9300: ensure data length is within supported range Add an explicit check for the xfer length to 'rtl9300_i2c_config_xfer' to ensure the data length isn't within the supported range. In particular a data length of 0 is not supported by the hardware and causes unintended or destructive behaviour. This limitation becomes obvious when looking at the register documentation [1]. 4 bits are reserved for DATA_WIDTH and the value of these 4 bits is used as N + 1, allowing a data length range of 1 <= len <= 16. Affected by this is the SMBus Quick Operation which works with a data length of 0. Passing 0 as the length causes an underflow of the value due to: (len - 1) & 0xf and effectively specifying a transfer length of 16 via the registers. This causes a 16-byte write operation instead of a Quick Write. For example, on SFP modules without write-protected EEPROM this soft-bricks them by overwriting some initial bytes. For completeness, also add a quirk for the zero length. [1] https://svanheule.net/realtek/longan/register/i2c_mst1_ctrl2 Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Jonas Jelonek Tested-by: Sven Eckelmann Reviewed-by: Chris Packham Tested-by: Chris Packham # On RTL9302C based board Tested-by: Markus Stockhausen Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250831100457.3114-3-jelonek.jonas@gmail.com --- drivers/i2c/busses/i2c-rtl9300.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index 1a63790f1957..2b3e80aa1bdf 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -99,6 +99,9 @@ static int rtl9300_i2c_config_xfer(struct rtl9300_i2c *i2c, struct rtl9300_i2c_c { u32 val, mask; + if (len < 1 || len > 16) + return -EINVAL; + val = chan->bus_freq << RTL9300_I2C_MST_CTRL2_SCL_FREQ_OFS; mask = RTL9300_I2C_MST_CTRL2_SCL_FREQ_MASK; @@ -323,7 +326,7 @@ static const struct i2c_algorithm rtl9300_i2c_algo = { }; static struct i2c_adapter_quirks rtl9300_i2c_quirks = { - .flags = I2C_AQ_NO_CLK_STRETCH, + .flags = I2C_AQ_NO_CLK_STRETCH | I2C_AQ_NO_ZERO_LEN, .max_read_len = 16, .max_write_len = 16, }; From ede965fd555ac2536cf651893a998dbfd8e57b86 Mon Sep 17 00:00:00 2001 From: Jonas Jelonek Date: Sun, 31 Aug 2025 10:04:48 +0000 Subject: [PATCH 1113/1307] i2c: rtl9300: remove broken SMBus Quick operation support Remove the SMBus Quick operation from this driver because it is not natively supported by the hardware and is wrongly implemented in the driver. The I2C controllers in Realtek RTL9300 and RTL9310 are SMBus-compliant but there doesn't seem to be native support for the SMBus Quick operation. It is not explicitly mentioned in the documentation but looking at the registers which configure an SMBus transaction, one can see that the data length cannot be set to 0. This suggests that the hardware doesn't allow any SMBus message without data bytes (except for those it does on it's own, see SMBus Block Read). The current implementation of SMBus Quick operation passes a length of 0 (which is actually invalid). Before the fix of a bug in a previous commit, this led to a read operation of 16 bytes from any register (the one of a former transaction or any other value. This caused issues like soft-bricked SFP modules after a simple probe with i2cdetect which uses Quick by default. Running this with SFP modules whose EEPROM isn't write-protected, some of the initial bytes are overwritten because a 16-byte write operation is executed instead of a Quick Write. (This temporarily soft-bricked one of my DAC cables.) Because SMBus Quick operation is obviously not supported on these controllers (because a length of 0 cannot be set, even when no register address is set), remove that instead of claiming there is support. There also shouldn't be any kind of emulated 'Quick' which just does another kind of operation in the background. Otherwise, specific issues occur in case of a 'Quick' Write which actually writes unknown data to an unknown register. Fixes: c366be720235 ("i2c: Add driver for the RTL9300 I2C controller") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Jonas Jelonek Tested-by: Sven Eckelmann Reviewed-by: Chris Packham Tested-by: Chris Packham # On RTL9302C based board Tested-by: Markus Stockhausen Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250831100457.3114-4-jelonek.jonas@gmail.com --- drivers/i2c/busses/i2c-rtl9300.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/busses/i2c-rtl9300.c b/drivers/i2c/busses/i2c-rtl9300.c index 2b3e80aa1bdf..9e1f71fed0fe 100644 --- a/drivers/i2c/busses/i2c-rtl9300.c +++ b/drivers/i2c/busses/i2c-rtl9300.c @@ -225,15 +225,6 @@ static int rtl9300_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned s } switch (size) { - case I2C_SMBUS_QUICK: - ret = rtl9300_i2c_config_xfer(i2c, chan, addr, 0); - if (ret) - goto out_unlock; - ret = rtl9300_i2c_reg_addr_set(i2c, 0, 0); - if (ret) - goto out_unlock; - break; - case I2C_SMBUS_BYTE: if (read_write == I2C_SMBUS_WRITE) { ret = rtl9300_i2c_config_xfer(i2c, chan, addr, 0); @@ -315,9 +306,9 @@ static int rtl9300_i2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned s static u32 rtl9300_i2c_func(struct i2c_adapter *a) { - return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | - I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | - I2C_FUNC_SMBUS_BLOCK_DATA; + return I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_BYTE_DATA | + I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_BLOCK_DATA | + I2C_FUNC_SMBUS_I2C_BLOCK; } static const struct i2c_algorithm rtl9300_i2c_algo = { From 5d6b58c932ec451a5c41482790eb5b1ecf165a94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Sep 2025 18:36:03 +0000 Subject: [PATCH 1114/1307] net: lockless sock_i_ino() Followup of commit c51da3f7a161 ("net: remove sock_i_uid()") A recent syzbot report was the trigger for this change. Over the years, we had many problems caused by the read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid(). We could fix smc_diag_dump_proto() or make a more radical move: Instead of waiting for new syzbot reports, cache the socket inode number in sk->sk_ino, so that we no longer need to acquire sk->sk_callback_lock in sock_i_ino(). This makes socket dumps faster (one less cache line miss, and two atomic ops avoided). Prior art: commit 25a9c8a4431c ("netlink: Add __sock_i_ino() for __netlink_diag_dump().") commit 4f9bf2a2f5aa ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.") commit efc3dbc37412 ("rds: Make rds_sock_lock BH rather than IRQ safe.") Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.") Reported-by: syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250902183603.740428-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 17 +++++++++++++---- net/core/sock.c | 22 ---------------------- net/mptcp/protocol.c | 1 - net/netlink/diag.c | 2 +- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index c8a4b283df6f..fb13322a11fc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -285,6 +285,7 @@ struct sk_filter; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner + * @sk_ino: inode number (zero if orphaned) * @sk_prefer_busy_poll: prefer busypolling over softirq processing * @sk_busy_poll_budget: napi processing budget when busypolling * @sk_priority: %SO_PRIORITY setting @@ -518,6 +519,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + unsigned long sk_ino; spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; @@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { sk->sk_socket = sock; + if (sock) { + WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); + WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); + } } static inline wait_queue_head_t *sk_sleep(struct sock *sk) @@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk) sk_set_socket(sk, NULL); sk->sk_wq = NULL; /* Note: sk_uid is unchanged. */ + WRITE_ONCE(sk->sk_ino, 0); write_unlock_bh(&sk->sk_callback_lock); } @@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) rcu_assign_pointer(sk->sk_wq, &parent->wq); parent->sk = sk; sk_set_socket(sk, parent); - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } +static inline unsigned long sock_i_ino(const struct sock *sk) +{ + /* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */ + return READ_ONCE(sk->sk_ino); +} + static inline kuid_t sk_uid(const struct sock *sk) { /* Paired with WRITE_ONCE() in sockfs_setattr() */ return READ_ONCE(sk->sk_uid); } -unsigned long __sock_i_ino(struct sock *sk); -unsigned long sock_i_ino(struct sock *sk); - static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) { return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0); diff --git a/net/core/sock.c b/net/core/sock.c index 7c26ec8dce63..158bddd23134 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2780,28 +2780,6 @@ void sock_pfree(struct sk_buff *skb) EXPORT_SYMBOL(sock_pfree); #endif /* CONFIG_INET */ -unsigned long __sock_i_ino(struct sock *sk) -{ - unsigned long ino; - - read_lock(&sk->sk_callback_lock); - ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock(&sk->sk_callback_lock); - return ino; -} -EXPORT_SYMBOL(__sock_i_ino); - -unsigned long sock_i_ino(struct sock *sk) -{ - unsigned long ino; - - local_bh_disable(); - ino = __sock_i_ino(sk); - local_bh_enable(); - return ino; -} -EXPORT_SYMBOL(sock_i_ino); - /* * Allocate a skb from the socket's send buffer. */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9a287b75c1b3..e6fd97b21e9e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3554,7 +3554,6 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent) write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); sk_set_socket(sk, parent); - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); write_unlock_bh(&sk->sk_callback_lock); } diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 61981e01fd6f..b8e58132e8af 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -168,7 +168,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - __sock_i_ino(sk)) < 0) { + sock_i_ino(sk)) < 0) { ret = 1; break; } From 3bc32fd9db47a20f38b0783364fdb2f2f1c97220 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 1 Sep 2025 12:52:56 +0100 Subject: [PATCH 1115/1307] net: phylink: move PHY interrupt request to non-fail path The blamed commit added code which could return an error after we requested the PHY interrupt. When we return an error, the caller will call phy_detach() which fails to free the interrupt. Rearrange the code such that failing operations happen before the interrupt is requested, thereby allowing phy_detach() to be used. Note that replacing phy_detach() with phy_disconnect() in these paths could lead to freeing an interrupt which was never requested. Fixes: 1942b1c6f687 ("net: phylink: make configuring clock-stop dependent on MAC support") Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1ut35k-00000001UEl-0iq6@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index f1b57e3fdf30..c7cb95aa8007 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2141,9 +2141,6 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising); - if (phy_interrupt_is_valid(phy)) - phy_request_interrupt(phy); - if (pl->config->mac_managed_pm) phy->mac_managed_pm = true; @@ -2160,6 +2157,9 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, ret = 0; } + if (ret == 0 && phy_interrupt_is_valid(phy)) + phy_request_interrupt(phy); + return ret; } From 9d28f94912589f04ab51fbccaef287d4f40e0d1f Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 1 Sep 2025 14:30:18 -0700 Subject: [PATCH 1116/1307] net: thunder_bgx: add a missing of_node_put phy_np needs to get freed, just like the other child nodes. Fixes: 5fc7cf179449 ("net: thunderx: Cleanup PHY probing code.") Signed-off-by: Rosen Penev Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901213018.47392-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 21495b5dce25..0f913db4814e 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1493,13 +1493,17 @@ static int bgx_init_of_phy(struct bgx *bgx) * this cortina phy, for which there is no driver * support, ignore it. */ - if (phy_np && - !of_device_is_compatible(phy_np, "cortina,cs4223-slice")) { - /* Wait until the phy drivers are available */ - pd = of_phy_find_device(phy_np); - if (!pd) - goto defer; - bgx->lmac[lmac].phydev = pd; + if (phy_np) { + if (!of_device_is_compatible(phy_np, "cortina,cs4223-slice")) { + /* Wait until the phy drivers are available */ + pd = of_phy_find_device(phy_np); + if (!pd) { + of_node_put(phy_np); + goto defer; + } + bgx->lmac[lmac].phydev = pd; + } + of_node_put(phy_np); } lmac++; From 9e3d71a92e561ccc77025689dab25d201fee7a3e Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 1 Sep 2025 14:33:14 -0700 Subject: [PATCH 1117/1307] net: thunder_bgx: decrement cleanup index before use All paths in probe that call goto defer do so before assigning phydev and thus it makes sense to cleanup the prior index. It also fixes a bug where index 0 does not get cleaned up. Fixes: b7d3e3d3d21a ("net: thunderx: Don't leak phy device references on -EPROBE_DEFER condition.") Signed-off-by: Rosen Penev Reviewed-by: Vadim Fedorenko Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901213314.48599-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 0f913db4814e..9efb60842ad1 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1519,11 +1519,11 @@ static int bgx_init_of_phy(struct bgx *bgx) * for phy devices we may have already found. */ while (lmac) { + lmac--; if (bgx->lmac[lmac].phydev) { put_device(&bgx->lmac[lmac].phydev->mdio.dev); bgx->lmac[lmac].phydev = NULL; } - lmac--; } of_node_put(node); return -EPROBE_DEFER; From a51160f8da850a65afbf165f5bbac7ffb388bf74 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Sep 2025 09:36:08 +0300 Subject: [PATCH 1118/1307] ipv4: Fix NULL vs error pointer check in inet_blackhole_dev_init() The inetdev_init() function never returns NULL. Check for error pointers instead. Fixes: 22600596b675 ("ipv4: give an IPv4 dev to blackhole_netdev") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/aLaQWL9NguWmeM1i@stanley.mountain Signed-off-by: Jakub Kicinski --- net/ipv4/devinet.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c47d3828d4f6..942a887bf089 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -340,14 +340,13 @@ static void inetdev_destroy(struct in_device *in_dev) static int __init inet_blackhole_dev_init(void) { - int err = 0; + struct in_device *in_dev; rtnl_lock(); - if (!inetdev_init(blackhole_netdev)) - err = -ENOMEM; + in_dev = inetdev_init(blackhole_netdev); rtnl_unlock(); - return err; + return PTR_ERR_OR_ZERO(in_dev); } late_initcall(inet_blackhole_dev_init); From cc282f73bc0cbdf3ee7af2f2d3a2ef4e6b19242d Mon Sep 17 00:00:00 2001 From: Mahanta Jambigi Date: Tue, 2 Sep 2025 10:20:41 +0200 Subject: [PATCH 1119/1307] net/smc: Remove validation of reserved bits in CLC Decline message Currently SMC code is validating the reserved bits while parsing the incoming CLC decline message & when this validation fails, its treated as a protocol error. As a result, the SMC connection is terminated instead of falling back to TCP. As per RFC7609[1] specs we shouldn't be validating the reserved bits that is part of CLC message. This patch fixes this issue. CLC Decline message format can viewed here[2]. [1] https://datatracker.ietf.org/doc/html/rfc7609#page-92 [2] https://datatracker.ietf.org/doc/html/rfc7609#page-105 Fixes: 8ade200c269f ("net/smc: add v2 format of CLC decline message") Signed-off-by: Mahanta Jambigi Reviewed-by: Sidraya Jayagond Reviewed-by: Alexandra Winter Reviewed-by: Dust Li Link: https://patch.msgid.link/20250902082041.98996-1-mjambigi@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/smc/smc_clc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 5a4db151fe95..08be56dfb3f2 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -426,8 +426,6 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) { struct smc_clc_msg_hdr *hdr = &dclc->hdr; - if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) - return false; if (hdr->version == SMC_V1) { if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline)) return false; From a125c8fb9ddbcb0602103a50727a476fd30dec01 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 2 Sep 2025 03:20:55 -0700 Subject: [PATCH 1120/1307] mctp: return -ENOPROTOOPT for unknown getsockopt options In mctp_getsockopt(), unrecognized options currently return -EINVAL. In contrast, mctp_setsockopt() returns -ENOPROTOOPT for unknown options. Update mctp_getsockopt() to also return -ENOPROTOOPT for unknown options. This aligns the behavior of getsockopt() and setsockopt(), and matches the standard kernel socket API convention for handling unsupported options. Fixes: 99ce45d5e7db ("mctp: Implement extended addressing") Signed-off-by: Alok Tiwari Link: https://patch.msgid.link/20250902102059.1370008-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index df4e8cf33899..685524800d70 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -425,7 +425,7 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname, return 0; } - return -EINVAL; + return -ENOPROTOOPT; } /* helpers for reading/writing the tag ioc, handling compatibility across the From 8156210d36a43e76372312c87eb5ea3dbb405a85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Sep 2025 12:46:42 +0000 Subject: [PATCH 1121/1307] ax25: properly unshare skbs in ax25_kiss_rcv() Bernard Pidoux reported a regression apparently caused by commit c353e8983e0d ("net: introduce per netns packet chains"). skb->dev becomes NULL and we crash in __netif_receive_skb_core(). Before above commit, different kind of bugs or corruptions could happen without a major crash. But the root cause is that ax25_kiss_rcv() can queue/mangle input skb without checking if this skb is shared or not. Many thanks to Bernard Pidoux for his help, diagnosis and tests. We had a similar issue years ago fixed with commit 7aaed57c5c28 ("phonet: properly unshare skbs in phonet_rcv()"). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Bernard Pidoux Closes: https://lore.kernel.org/netdev/1713f383-c538-4918-bc64-13b3288cd542@free.fr/ Tested-by: Bernard Pidoux Signed-off-by: Eric Dumazet Cc: Joerg Reuter Cc: David Ranch Cc: Folkert van Heusden Reviewed-by: Dan Cross Link: https://patch.msgid.link/20250902124642.212705-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ax25/ax25_in.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 1cac25aca637..f2d66af86359 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -433,6 +433,10 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + skb_orphan(skb); if (!net_eq(dev_net(dev), &init_net)) { From 394bfac1c7f7b701c2c93834c5761b9c9ceeebcf Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 22 Aug 2025 06:33:18 +0000 Subject: [PATCH 1122/1307] mm/khugepaged: fix the address passed to notifier on testing young Commit 8ee53820edfd ("thp: mmu_notifier_test_young") introduced mmu_notifier_test_young(), but we are passing the wrong address. In xxx_scan_pmd(), the actual iteration address is "_address" not "address". We seem to misuse the variable on the very beginning. Change it to the right one. [akpm@linux-foundation.org fix whitespace, per everyone] Link: https://lkml.kernel.org/r/20250822063318.11644-1-richard.weiyang@gmail.com Fixes: 8ee53820edfd ("thp: mmu_notifier_test_young") Signed-off-by: Wei Yang Reviewed-by: Dev Jain Reviewed-by: Zi Yan Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Baolin Wang Cc: Liam R. Howlett Cc: Nico Pache Cc: Ryan Roberts Cc: Barry Song Cc: Signed-off-by: Andrew Morton --- mm/khugepaged.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 6b40bdfd224c..b486c1d19b2d 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1417,8 +1417,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, */ if (cc->is_khugepaged && (pte_young(pteval) || folio_test_young(folio) || - folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm, - address))) + folio_test_referenced(folio) || + mmu_notifier_test_young(vma->vm_mm, _address))) referenced++; } if (!writable) { From 397f6d14f9c370e4910e6885294c340f39dedbf5 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Fri, 27 Jun 2025 20:57:47 +0800 Subject: [PATCH 1123/1307] mm/memory_hotplug: fix hwpoisoned large folio handling in do_migrate_range() In do_migrate_range(), the hwpoisoned folio may be large folio, which can't be handled by unmap_poisoned_folio(). I can reproduce this issue in qemu after adding delay in memory_failure() BUG: kernel NULL pointer dereference, address: 0000000000000000 Workqueue: kacpi_hotplug acpi_hotplug_work_fn RIP: 0010:try_to_unmap_one+0x16a/0xfc0 rmap_walk_anon+0xda/0x1f0 try_to_unmap+0x78/0x80 ? __pfx_try_to_unmap_one+0x10/0x10 ? __pfx_folio_not_mapped+0x10/0x10 ? __pfx_folio_lock_anon_vma_read+0x10/0x10 unmap_poisoned_folio+0x60/0x140 do_migrate_range+0x4d1/0x600 ? slab_memory_callback+0x6a/0x190 ? notifier_call_chain+0x56/0xb0 offline_pages+0x3e6/0x460 memory_subsys_offline+0x130/0x1f0 device_offline+0xba/0x110 acpi_bus_offline+0xb7/0x130 acpi_scan_hot_remove+0x77/0x290 acpi_device_hotplug+0x1e0/0x240 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x186/0x340 Besides, do_migrate_range() may be called between memory_failure set hwpoison flag and isolate the folio from lru, so remove WARN_ON(). In other places, unmap_poisoned_folio() is called when the folio is isolated, obey it in do_migrate_range() too. [david@redhat.com: don't abort offlining, fixed typo, add comment] Link: https://lkml.kernel.org/r/3c214dff-9649-4015-840f-10de0e03ebe4@redhat.com Fixes: b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined") Signed-off-by: Jinjiang Tu Signed-off-by: David Hildenbrand Acked-by: Zi Yan Reviewed-by: Miaohe Lin Cc: Kefeng Wang Cc: Luis Chamberalin Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Oscar Salvador Cc: Pankaj Raghav Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1f15af712bc3..74318c787715 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1815,8 +1815,14 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; if (folio_contain_hwpoisoned_page(folio)) { - if (WARN_ON(folio_test_lru(folio))) - folio_isolate_lru(folio); + /* + * unmap_poisoned_folio() cannot handle large folios + * in all cases yet. + */ + if (folio_test_large(folio) && !folio_test_hugetlb(folio)) + goto put_folio; + if (folio_test_lru(folio) && !folio_isolate_lru(folio)) + goto put_folio; if (folio_mapped(folio)) { folio_lock(folio); unmap_poisoned_folio(folio, pfn, false); From 669602b5b7386e4fa00fc67b045ca3fd816e685d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 24 Aug 2025 16:07:59 +0300 Subject: [PATCH 1124/1307] init/main.c: fix boot time tracing crash Steven Rostedt reported a crash with "ftrace=function" kernel command line: [ 0.159269] BUG: kernel NULL pointer dereference, address: 000000000000001c [ 0.160254] #PF: supervisor read access in kernel mode [ 0.160975] #PF: error_code(0x0000) - not-present page [ 0.161697] PGD 0 P4D 0 [ 0.162055] Oops: Oops: 0000 [#1] SMP PTI [ 0.162619] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.17.0-rc2-test-00006-g48d06e78b7cb-dirty #9 PREEMPT(undef) [ 0.164141] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 0.165439] RIP: 0010:kmem_cache_alloc_noprof (mm/slub.c:4237) [ 0.166186] Code: 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 41 54 49 89 fc 53 48 83 e4 f0 48 83 ec 20 8b 05 c9 b6 7e 01 <44> 8b 77 1c 65 4c 8b 2d b5 ea 20 02 4c 89 6c 24 18 41 89 f5 21 f0 [ 0.168811] RSP: 0000:ffffffffb2e03b30 EFLAGS: 00010086 [ 0.169545] RAX: 0000000001fff33f RBX: 0000000000000000 RCX: 0000000000000000 [ 0.170544] RDX: 0000000000002800 RSI: 0000000000002800 RDI: 0000000000000000 [ 0.171554] RBP: ffffffffb2e03b80 R08: 0000000000000004 R09: ffffffffb2e03c90 [ 0.172549] R10: ffffffffb2e03c90 R11: 0000000000000000 R12: 0000000000000000 [ 0.173544] R13: ffffffffb2e03c90 R14: ffffffffb2e03c90 R15: 0000000000000001 [ 0.174542] FS: 0000000000000000(0000) GS:ffff9d2808114000(0000) knlGS:0000000000000000 [ 0.175684] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.176486] CR2: 000000000000001c CR3: 000000007264c001 CR4: 00000000000200b0 [ 0.177483] Call Trace: [ 0.177828] [ 0.178123] mas_alloc_nodes (lib/maple_tree.c:176 (discriminator 2) lib/maple_tree.c:1255 (discriminator 2)) [ 0.178692] mas_store_gfp (lib/maple_tree.c:5468) [ 0.179223] execmem_cache_add_locked (mm/execmem.c:207) [ 0.179870] execmem_alloc (mm/execmem.c:213 mm/execmem.c:313 mm/execmem.c:335 mm/execmem.c:475) [ 0.180397] ? ftrace_caller (arch/x86/kernel/ftrace_64.S:169) [ 0.180922] ? __pfx_ftrace_caller (arch/x86/kernel/ftrace_64.S:158) [ 0.181517] execmem_alloc_rw (mm/execmem.c:487) [ 0.182052] arch_ftrace_update_trampoline (arch/x86/kernel/ftrace.c:266 arch/x86/kernel/ftrace.c:344 arch/x86/kernel/ftrace.c:474) [ 0.182778] ? ftrace_caller_op_ptr (arch/x86/kernel/ftrace_64.S:182) [ 0.183388] ftrace_update_trampoline (kernel/trace/ftrace.c:7947) [ 0.184024] __register_ftrace_function (kernel/trace/ftrace.c:368) [ 0.184682] ftrace_startup (kernel/trace/ftrace.c:3048) [ 0.185205] ? __pfx_function_trace_call (kernel/trace/trace_functions.c:210) [ 0.185877] register_ftrace_function_nolock (kernel/trace/ftrace.c:8717) [ 0.186595] register_ftrace_function (kernel/trace/ftrace.c:8745) [ 0.187254] ? __pfx_function_trace_call (kernel/trace/trace_functions.c:210) [ 0.187924] function_trace_init (kernel/trace/trace_functions.c:170) [ 0.188499] tracing_set_tracer (kernel/trace/trace.c:5916 kernel/trace/trace.c:6349) [ 0.189088] register_tracer (kernel/trace/trace.c:2391) [ 0.189642] early_trace_init (kernel/trace/trace.c:11075 kernel/trace/trace.c:11149) [ 0.190204] start_kernel (init/main.c:970) [ 0.190732] x86_64_start_reservations (arch/x86/kernel/head64.c:307) [ 0.191381] x86_64_start_kernel (??:?) [ 0.191955] common_startup_64 (arch/x86/kernel/head_64.S:419) [ 0.192534] [ 0.192839] Modules linked in: [ 0.193267] CR2: 000000000000001c [ 0.193730] ---[ end trace 0000000000000000 ]--- The crash happens because on x86 ftrace allocations from execmem require maple tree to be initialized. Move maple tree initialization that depends only on slab availability earlier in boot so that it will happen right after mm_core_init(). Link: https://lkml.kernel.org/r/20250824130759.1732736-1-rppt@kernel.org Fixes: 5d79c2be5081 ("x86/ftrace: enable EXECMEM_ROX_CACHE for ftrace allocations") Signed-off-by: Mike Rapoport (Microsoft) Reported-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Closes: https://lore.kernel.org/all/20250820184743.0302a8b5@gandalf.local.home/ Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Liam R. Howlett Cc: Borislav Betkov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 0ee0ee7b7c2c..5753e9539ae6 100644 --- a/init/main.c +++ b/init/main.c @@ -956,6 +956,7 @@ void start_kernel(void) sort_main_extable(); trap_init(); mm_core_init(); + maple_tree_init(); poking_init(); ftrace_init(); @@ -973,7 +974,6 @@ void start_kernel(void) "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); radix_tree_init(); - maple_tree_init(); /* * Set up housekeeping before setting up workqueues to allow the unbound From 21cc2b5c5062a256ae9064442d37ebbc23f5aef7 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Sun, 24 Aug 2025 03:21:15 +0900 Subject: [PATCH 1125/1307] mm/hugetlb: add missing hugetlb_lock in __unmap_hugepage_range() When restoring a reservation for an anonymous page, we need to check to freeing a surplus. However, __unmap_hugepage_range() causes data race because it reads h->surplus_huge_pages without the protection of hugetlb_lock. And adjust_reservation is a boolean variable that indicates whether reservations for anonymous pages in each folio should be restored. Therefore, it should be initialized to false for each round of the loop. However, this variable is not initialized to false except when defining the current adjust_reservation variable. This means that once adjust_reservation is set to true even once within the loop, reservations for anonymous pages will be restored unconditionally in all subsequent rounds, regardless of the folio's state. To fix this, we need to add the missing hugetlb_lock, unlock the page_table_lock earlier so that we don't lock the hugetlb_lock inside the page_table_lock lock, and initialize adjust_reservation to false on each round within the loop. Link: https://lkml.kernel.org/r/20250823182115.1193563-1-aha310510@gmail.com Fixes: df7a6d1f6405 ("mm/hugetlb: restore the reservation if needed") Signed-off-by: Jeongjun Park Reported-by: syzbot+417aeb05fd190f3a6da9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=417aeb05fd190f3a6da9 Reviewed-by: Sidhartha Kumar Cc: Breno Leitao Cc: David Hildenbrand Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 753f99b4c718..eed59cfb5d21 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5851,7 +5851,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, spinlock_t *ptl; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); - bool adjust_reservation = false; + bool adjust_reservation; unsigned long last_addr_mask; bool force_flush = false; @@ -5944,6 +5944,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, sz); hugetlb_count_sub(pages_per_huge_page(h), mm); hugetlb_remove_rmap(folio); + spin_unlock(ptl); /* * Restore the reservation for anonymous page, otherwise the @@ -5951,14 +5952,16 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, * If there we are freeing a surplus, do not set the restore * reservation bit. */ + adjust_reservation = false; + + spin_lock_irq(&hugetlb_lock); if (!h->surplus_huge_pages && __vma_private_lock(vma) && folio_test_anon(folio)) { folio_set_hugetlb_restore_reserve(folio); /* Reservation to be adjusted after the spin lock */ adjust_reservation = true; } - - spin_unlock(ptl); + spin_unlock_irq(&hugetlb_lock); /* * Adjust the reservation for the region that will have the From ce652aac9c90a96c6536681d17518efb1f660fb8 Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Fri, 22 Aug 2025 11:50:57 +0900 Subject: [PATCH 1126/1307] mm/damon/core: set quota->charged_from to jiffies at first charge window Kernel initializes the "jiffies" timer as 5 minutes below zero, as shown in include/linux/jiffies.h /* * Have the 32 bit jiffies value wrap 5 minutes after boot * so jiffies wrap bugs show up earlier. */ #define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) And jiffies comparison help functions cast unsigned value to signed to cover wraparound #define time_after_eq(a,b) \ (typecheck(unsigned long, a) && \ typecheck(unsigned long, b) && \ ((long)((a) - (b)) >= 0)) When quota->charged_from is initialized to 0, time_after_eq() can incorrectly return FALSE even after reset_interval has elapsed. This occurs when (jiffies - reset_interval) produces a value with MSB=1, which is interpreted as negative in signed arithmetic. This issue primarily affects 32-bit systems because: On 64-bit systems: MSB=1 values occur after ~292 million years from boot (assuming HZ=1000), almost impossible. On 32-bit systems: MSB=1 values occur during the first 5 minutes after boot, and the second half of every jiffies wraparound cycle, starting from day 25 (assuming HZ=1000) When above unexpected FALSE return from time_after_eq() occurs, the charging window will not reset. The user impact depends on esz value at that time. If esz is 0, scheme ignores configured quotas and runs without any limits. If esz is not 0, scheme stops working once the quota is exhausted. It remains until the charging window finally resets. So, change quota->charged_from to jiffies at damos_adjust_quota() when it is considered as the first charge window. By this change, we can avoid unexpected FALSE return from time_after_eq() Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com Fixes: 2b8a248d5873 ("mm/damon/schemes: implement size quota for schemes application speed control") # 5.16 Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/damon/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 106ee8b0f2d5..c2e0b469fd43 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2111,6 +2111,10 @@ static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) if (!quota->ms && !quota->sz && list_empty("a->goals)) return; + /* First charge window */ + if (!quota->total_charged_sz && !quota->charged_from) + quota->charged_from = jiffies; + /* New charge window starts */ if (time_after_eq(jiffies, quota->charged_from + msecs_to_jiffies(quota->reset_interval))) { From 711f19dfd783ffb37ca4324388b9c4cb87e71363 Mon Sep 17 00:00:00 2001 From: Quanmin Yan Date: Wed, 27 Aug 2025 19:58:57 +0800 Subject: [PATCH 1127/1307] mm/damon/lru_sort: avoid divide-by-zero in damon_lru_sort_apply_parameters() Patch series "mm/damon: avoid divide-by-zero in DAMON module's parameters application". DAMON's RECLAIM and LRU_SORT modules perform no validation on user-configured parameters during application, which may lead to division-by-zero errors. Avoid the divide-by-zero by adding validation checks when DAMON modules attempt to apply the parameters. This patch (of 2): During the calculation of 'hot_thres' and 'cold_thres', either 'sample_interval' or 'aggr_interval' is used as the divisor, which may lead to division-by-zero errors. Fix it by directly returning -EINVAL when such a case occurs. Additionally, since 'aggr_interval' is already required to be set no smaller than 'sample_interval' in damon_set_attrs(), only the case where 'sample_interval' is zero needs to be checked. Link: https://lkml.kernel.org/r/20250827115858.1186261-2-yanquanmin1@huawei.com Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") Signed-off-by: Quanmin Yan Reviewed-by: SeongJae Park Cc: Kefeng Wang Cc: ze zuo Cc: [6.0+] Signed-off-by: Andrew Morton --- mm/damon/lru_sort.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 151a9de5ad8b..b5a5ed16a7a5 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -198,6 +198,11 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + if (!damon_lru_sort_mon_attrs.sample_interval) { + err = -EINVAL; + goto out; + } + err = damon_set_attrs(ctx, &damon_lru_sort_mon_attrs); if (err) goto out; From e6b543ca9806d7bced863f43020e016ee996c057 Mon Sep 17 00:00:00 2001 From: Quanmin Yan Date: Wed, 27 Aug 2025 19:58:58 +0800 Subject: [PATCH 1128/1307] mm/damon/reclaim: avoid divide-by-zero in damon_reclaim_apply_parameters() When creating a new scheme of DAMON_RECLAIM, the calculation of 'min_age_region' uses 'aggr_interval' as the divisor, which may lead to division-by-zero errors. Fix it by directly returning -EINVAL when such a case occurs. Link: https://lkml.kernel.org/r/20250827115858.1186261-3-yanquanmin1@huawei.com Fixes: f5a79d7c0c87 ("mm/damon: introduce struct damos_access_pattern") Signed-off-by: Quanmin Yan Reviewed-by: SeongJae Park Cc: Kefeng Wang Cc: ze zuo Cc: [6.1+] Signed-off-by: Andrew Morton --- mm/damon/reclaim.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 3c71b4596676..fb7c982a0018 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -194,6 +194,11 @@ static int damon_reclaim_apply_parameters(void) if (err) return err; + if (!damon_reclaim_mon_attrs.aggr_interval) { + err = -EINVAL; + goto out; + } + err = damon_set_attrs(param_ctx, &damon_reclaim_mon_attrs); if (err) goto out; From 04d3cd43700a2d0fe4bfb1012a8ec7f2e34a3507 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 27 Aug 2025 03:42:21 -0700 Subject: [PATCH 1129/1307] arm64: kexec: initialize kexec_buf struct in load_other_segments() Patch series "kexec: Fix invalid field access". The kexec_buf structure was previously declared without initialization. commit bf454ec31add ("kexec_file: allow to place kexec_buf randomly") added a field that is always read but not consistently populated by all architectures. This un-initialized field will contain garbage. This is also triggering a UBSAN warning when the uninitialized data was accessed: ------------[ cut here ]------------ UBSAN: invalid-load in ./include/linux/kexec.h:210:10 load of value 252 is not a valid value for type '_Bool' Zero-initializing kexec_buf at declaration ensures all fields are cleanly set, preventing future instances of uninitialized memory being used. An initial fix was already landed for arm64[0], and this patchset fixes the problem on the remaining arm64 code and on riscv, as raised by Mark. Discussions about this problem could be found at[1][2]. This patch (of 3): The kexec_buf structure was previously declared without initialization. commit bf454ec31add ("kexec_file: allow to place kexec_buf randomly") added a field that is always read but not consistently populated by all architectures. This un-initialized field will contain garbage. This is also triggering a UBSAN warning when the uninitialized data was accessed: ------------[ cut here ]------------ UBSAN: invalid-load in ./include/linux/kexec.h:210:10 load of value 252 is not a valid value for type '_Bool' Zero-initializing kexec_buf at declaration ensures all fields are cleanly set, preventing future instances of uninitialized memory being used. Link: https://lkml.kernel.org/r/20250827-kbuf_all-v1-0-1df9882bb01a@debian.org Link: https://lkml.kernel.org/r/20250827-kbuf_all-v1-1-1df9882bb01a@debian.org Link: https://lore.kernel.org/all/20250826180742.f2471131255ec1c43683ea07@linux-foundation.org/ [0] Link: https://lore.kernel.org/all/oninomspajhxp4omtdapxnckxydbk2nzmrix7rggmpukpnzadw@c67o7njgdgm3/ [1] Link: https://lore.kernel.org/all/20250826-akpm-v1-1-3c831f0e3799@debian.org/ [2] Fixes: bf454ec31add ("kexec_file: allow to place kexec_buf randomly") Signed-off-by: Breno Leitao Acked-by: Baoquan He Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Coiby Xu Cc: Heiko Carstens Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- arch/arm64/kernel/machine_kexec_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index af1ca875c52c..410060ebd86d 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -94,7 +94,7 @@ int load_other_segments(struct kimage *image, char *initrd, unsigned long initrd_len, char *cmdline) { - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; void *dtb = NULL; unsigned long initrd_load_addr = 0, dtb_len, orig_segments = image->nr_segments; From 8afbd0045922b8146acf1a78ae818693e0468dbd Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 27 Aug 2025 03:42:22 -0700 Subject: [PATCH 1130/1307] riscv: kexec: initialize kexec_buf struct The kexec_buf structure was previously declared without initialization. commit bf454ec31add ("kexec_file: allow to place kexec_buf randomly") added a field that is always read but not consistently populated by all architectures. This un-initialized field will contain garbage. This is also triggering a UBSAN warning when the uninitialized data was accessed: ------------[ cut here ]------------ UBSAN: invalid-load in ./include/linux/kexec.h:210:10 load of value 252 is not a valid value for type '_Bool' Zero-initializing kexec_buf at declaration ensures all fields are cleanly set, preventing future instances of uninitialized memory being used. Link: https://lkml.kernel.org/r/20250827-kbuf_all-v1-2-1df9882bb01a@debian.org Fixes: bf454ec31add ("kexec_file: allow to place kexec_buf randomly") Signed-off-by: Breno Leitao Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Baoquan He Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Coiby Xu Cc: Heiko Carstens Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- arch/riscv/kernel/kexec_elf.c | 4 ++-- arch/riscv/kernel/kexec_image.c | 2 +- arch/riscv/kernel/machine_kexec_file.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c index 56444c7bd34e..531d348db84d 100644 --- a/arch/riscv/kernel/kexec_elf.c +++ b/arch/riscv/kernel/kexec_elf.c @@ -28,7 +28,7 @@ static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, int i; int ret = 0; size_t size; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; const struct elf_phdr *phdr; kbuf.image = image; @@ -66,7 +66,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, { int i; int ret; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; const struct elf_phdr *phdr; unsigned long lowest_paddr = ULONG_MAX; unsigned long lowest_vaddr = ULONG_MAX; diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c index 26a81774a78a..8f2eb900910b 100644 --- a/arch/riscv/kernel/kexec_image.c +++ b/arch/riscv/kernel/kexec_image.c @@ -41,7 +41,7 @@ static void *image_load(struct kimage *image, struct riscv_image_header *h; u64 flags; bool be_image, be_kernel; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; int ret; /* Check Image header */ diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index e36104af2e24..b9eb41b0a975 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -261,7 +261,7 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, int ret; void *fdt; unsigned long initrd_pbase = 0UL; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; char *modified_cmdline = NULL; kbuf.image = image; From e67f0bd05519012eaabaae68618ffc4ed30ab680 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 27 Aug 2025 03:42:23 -0700 Subject: [PATCH 1131/1307] s390: kexec: initialize kexec_buf struct The kexec_buf structure was previously declared without initialization. commit bf454ec31add ("kexec_file: allow to place kexec_buf randomly") added a field that is always read but not consistently populated by all architectures. This un-initialized field will contain garbage. This is also triggering a UBSAN warning when the uninitialized data was accessed: ------------[ cut here ]------------ UBSAN: invalid-load in ./include/linux/kexec.h:210:10 load of value 252 is not a valid value for type '_Bool' Zero-initializing kexec_buf at declaration ensures all fields are cleanly set, preventing future instances of uninitialized memory being used. Link: https://lkml.kernel.org/r/20250827-kbuf_all-v1-3-1df9882bb01a@debian.org Fixes: bf454ec31add ("kexec_file: allow to place kexec_buf randomly") Signed-off-by: Breno Leitao Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Baoquan He Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Coiby Xu Cc: Heiko Carstens Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- arch/s390/kernel/kexec_elf.c | 2 +- arch/s390/kernel/kexec_image.c | 2 +- arch/s390/kernel/machine_kexec_file.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c index 4d364de43799..143e34a4eca5 100644 --- a/arch/s390/kernel/kexec_elf.c +++ b/arch/s390/kernel/kexec_elf.c @@ -16,7 +16,7 @@ static int kexec_file_add_kernel_elf(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; const Elf_Ehdr *ehdr; const Elf_Phdr *phdr; Elf_Addr entry; diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c index a32ce8bea745..9a439175723c 100644 --- a/arch/s390/kernel/kexec_image.c +++ b/arch/s390/kernel/kexec_image.c @@ -16,7 +16,7 @@ static int kexec_file_add_kernel_image(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; buf.image = image; diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index c2bac14dd668..a36d7311c668 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -129,7 +129,7 @@ static int kexec_file_update_purgatory(struct kimage *image, static int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; int ret; buf.image = image; @@ -152,7 +152,7 @@ static int kexec_file_add_purgatory(struct kimage *image, static int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; int ret; buf.image = image; @@ -184,7 +184,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, { __u32 *lc_ipl_parmblock_ptr; unsigned int len, ncerts; - struct kexec_buf buf; + struct kexec_buf buf = {}; unsigned long addr; void *ptr, *end; int ret; From 3be306cccdccede13e3cefd0c14e430cc2b7c9c7 Mon Sep 17 00:00:00 2001 From: Kyle Meyer Date: Thu, 28 Aug 2025 13:38:20 -0500 Subject: [PATCH 1132/1307] mm/memory-failure: fix redundant updates for already poisoned pages Duplicate memory errors can be reported by multiple sources. Passing an already poisoned page to action_result() causes issues: * The amount of hardware corrupted memory is incorrectly updated. * Per NUMA node MF stats are incorrectly updated. * Redundant "already poisoned" messages are printed. Avoid those issues by: * Skipping hardware corrupted memory updates for already poisoned pages. * Skipping per NUMA node MF stats updates for already poisoned pages. * Dropping redundant "already poisoned" messages. Make MF_MSG_ALREADY_POISONED consistent with other action_page_types and make calls to action_result() consistent for already poisoned normal pages and huge pages. Link: https://lkml.kernel.org/r/aLCiHMy12Ck3ouwC@hpe.com Fixes: b8b9488d50b7 ("mm/memory-failure: improve memory failure action_result messages") Signed-off-by: Kyle Meyer Reviewed-by: Jiaqi Yan Acked-by: David Hildenbrand Reviewed-by: Jane Chu Acked-by: Miaohe Lin Cc: Borislav Betkov Cc: Kyle Meyer Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: "Luck, Tony" Cc: Michal Hocko Cc: Mike Rapoport Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Russ Anderson Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index fc30ca4804bf..10b3c281c2ae 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -956,7 +956,7 @@ static const char * const action_page_types[] = { [MF_MSG_BUDDY] = "free buddy page", [MF_MSG_DAX] = "dax page", [MF_MSG_UNSPLIT_THP] = "unsplit thp", - [MF_MSG_ALREADY_POISONED] = "already poisoned", + [MF_MSG_ALREADY_POISONED] = "already poisoned page", [MF_MSG_UNKNOWN] = "unknown page", }; @@ -1349,9 +1349,10 @@ static int action_result(unsigned long pfn, enum mf_action_page_type type, { trace_memory_failure_event(pfn, type, result); - num_poisoned_pages_inc(pfn); - - update_per_node_mf_stats(pfn, result); + if (type != MF_MSG_ALREADY_POISONED) { + num_poisoned_pages_inc(pfn); + update_per_node_mf_stats(pfn, result); + } pr_err("%#lx: recovery action for %s: %s\n", pfn, action_page_types[type], action_name[result]); @@ -2094,12 +2095,11 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb *hugetlb = 0; return 0; } else if (res == -EHWPOISON) { - pr_err("%#lx: already hardware poisoned\n", pfn); if (flags & MF_ACTION_REQUIRED) { folio = page_folio(p); res = kill_accessing_process(current, folio_pfn(folio), flags); - action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); } + action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); return res; } else if (res == -EBUSY) { if (!(flags & MF_NO_RETRY)) { @@ -2285,7 +2285,6 @@ int memory_failure(unsigned long pfn, int flags) goto unlock_mutex; if (TestSetPageHWPoison(p)) { - pr_err("%#lx: already hardware poisoned\n", pfn); res = -EHWPOISON; if (flags & MF_ACTION_REQUIRED) res = kill_accessing_process(current, pfn, flags); From 661a4f307fe0f80c1d544e09476ccba9037e8e65 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Aug 2025 19:17:32 +0200 Subject: [PATCH 1133/1307] selftests: netfilter: fix udpclash tool hang Yi Chen reports that 'udpclash' loops forever depending on compiler (and optimization level used); while (x == 1) gets optimized into for (;;). Add volatile qualifier to avoid that. While at it, also run it under timeout(1) and fix the resize script to not ignore the timeout passed as second parameter to insert_flood. Reported-by: Yi Chen Suggested-by: Yi Chen Fixes: 78a588363587 ("selftests: netfilter: add conntrack clash resolution test case") Signed-off-by: Florian Westphal --- tools/testing/selftests/net/netfilter/conntrack_clash.sh | 2 +- tools/testing/selftests/net/netfilter/conntrack_resize.sh | 5 +++-- tools/testing/selftests/net/netfilter/udpclash.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh index 606a43a60f73..7fc6c5dbd551 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -99,7 +99,7 @@ run_one_clash_test() local entries local cre - if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then echo "INFO: did not receive expected number of replies for $daddr:$dport" ip netns exec "$ctns" conntrack -S # don't fail: check if clash resolution triggered after all. diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 788cd56ea4a0..615fe3c6f405 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -187,7 +187,7 @@ ct_udpclash() [ -x udpclash ] || return while [ $now -lt $end ]; do - ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 now=$(date +%s) done @@ -277,6 +277,7 @@ check_taint() insert_flood() { local n="$1" + local timeout="$2" local r=0 r=$((RANDOM%$insert_count)) @@ -302,7 +303,7 @@ test_floodresize_all() read tainted_then < /proc/sys/kernel/tainted for n in "$nsclient1" "$nsclient2";do - insert_flood "$n" & + insert_flood "$n" "$timeout" & done # resize table constantly while flood/insert/dump/flushs diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c index 85c7b906ad08..79de163d61ab 100644 --- a/tools/testing/selftests/net/netfilter/udpclash.c +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -29,7 +29,7 @@ struct thread_args { int sockfd; }; -static int wait = 1; +static volatile int wait = 1; static void *thread_main(void *varg) { From 4039ce7ef40474d5ba46f414c50cc7020b9cf8ae Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 7 Aug 2025 15:49:59 +0200 Subject: [PATCH 1134/1307] netfilter: nf_tables: Introduce NFTA_DEVICE_PREFIX This new attribute is supposed to be used instead of NFTA_DEVICE_NAME for simple wildcard interface specs. It holds a NUL-terminated string representing an interface name prefix to match on. While kernel code to distinguish full names from prefixes in NFTA_DEVICE_NAME is simpler than this solution, reusing the existing attribute with different semantics leads to confusion between different versions of kernel and user space though: * With old kernels, wildcards submitted by user space are accepted yet silently treated as regular names. * With old user space, wildcards submitted by kernel may cause crashes since libnftnl expects NUL-termination when there is none. Using a distinct attribute type sanitizes these situations as the receiving part detects and rejects the unexpected attribute nested in *_HOOK_DEVS attributes. Fixes: 6d07a289504a ("netfilter: nf_tables: Support wildcard netdev hook specs") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 42 +++++++++++++++++------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2beb30be2c5f..8e0eb832bc01 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1784,10 +1784,12 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + * @NFTA_DEVICE_PREFIX: device name prefix, a simple wildcard (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, + NFTA_DEVICE_PREFIX, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 58c5425d61c2..c1082de09656 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1959,6 +1959,18 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) return -ENOSPC; } +static bool hook_is_prefix(struct nft_hook *hook) +{ + return strlen(hook->ifname) >= hook->ifnamelen; +} + +static int nft_nla_put_hook_dev(struct sk_buff *skb, struct nft_hook *hook) +{ + int attr = hook_is_prefix(hook) ? NFTA_DEVICE_PREFIX : NFTA_DEVICE_NAME; + + return nla_put_string(skb, attr, hook->ifname); +} + static int nft_dump_basechain_hook(struct sk_buff *skb, const struct net *net, int family, const struct nft_base_chain *basechain, @@ -1990,16 +2002,15 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, if (!first) first = hook; - if (nla_put(skb, NFTA_DEVICE_NAME, - hook->ifnamelen, hook->ifname)) + if (nft_nla_put_hook_dev(skb, hook)) goto nla_put_failure; n++; } nla_nest_end(skb, nest_devs); if (n == 1 && - nla_put(skb, NFTA_HOOK_DEV, - first->ifnamelen, first->ifname)) + !hook_is_prefix(first) && + nla_put_string(skb, NFTA_HOOK_DEV, first->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -2310,7 +2321,8 @@ void nf_tables_chain_destroy(struct nft_chain *chain) } static struct nft_hook *nft_netdev_hook_alloc(struct net *net, - const struct nlattr *attr) + const struct nlattr *attr, + bool prefix) { struct nf_hook_ops *ops; struct net_device *dev; @@ -2327,7 +2339,8 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, if (err < 0) goto err_hook_free; - hook->ifnamelen = nla_len(attr); + /* include the terminating NUL-char when comparing non-prefixes */ + hook->ifnamelen = strlen(hook->ifname) + !prefix; /* nf_tables_netdev_event() is called under rtnl_mutex, this is * indirectly serializing all the other holders of the commit_mutex with @@ -2374,14 +2387,22 @@ static int nf_tables_parse_netdev_hooks(struct net *net, struct nft_hook *hook, *next; const struct nlattr *tmp; int rem, n = 0, err; + bool prefix; nla_for_each_nested(tmp, attr, rem) { - if (nla_type(tmp) != NFTA_DEVICE_NAME) { + switch (nla_type(tmp)) { + case NFTA_DEVICE_NAME: + prefix = false; + break; + case NFTA_DEVICE_PREFIX: + prefix = true; + break; + default: err = -EINVAL; goto err_hook; } - hook = nft_netdev_hook_alloc(net, tmp); + hook = nft_netdev_hook_alloc(net, tmp, prefix); if (IS_ERR(hook)) { NL_SET_BAD_ATTR(extack, tmp); err = PTR_ERR(hook); @@ -2427,7 +2448,7 @@ static int nft_chain_parse_netdev(struct net *net, struct nlattr *tb[], int err; if (tb[NFTA_HOOK_DEV]) { - hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]); + hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV], false); if (IS_ERR(hook)) { NL_SET_BAD_ATTR(extack, tb[NFTA_HOOK_DEV]); return PTR_ERR(hook); @@ -9458,8 +9479,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, list_for_each_entry_rcu(hook, hook_list, list, lockdep_commit_lock_is_held(net)) { - if (nla_put(skb, NFTA_DEVICE_NAME, - hook->ifnamelen, hook->ifname)) + if (nft_nla_put_hook_dev(skb, hook)) goto nla_put_failure; } nla_nest_end(skb, nest_devs); From 0a228624bcc00af41f281a2a84c928595a74c17d Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Mon, 1 Sep 2025 14:35:37 +0800 Subject: [PATCH 1135/1307] net: atm: fix memory leak in atm_register_sysfs when device_register fail When device_register() return error in atm_register_sysfs(), which can be triggered by kzalloc fail in device_private_init() or other reasons, kmemleak reports the following memory leaks: unreferenced object 0xffff88810182fb80 (size 8): comm "insmod", pid 504, jiffies 4294852464 hex dump (first 8 bytes): 61 64 75 6d 6d 79 30 00 adummy0. backtrace (crc 14dfadaf): __kmalloc_node_track_caller_noprof+0x335/0x450 kvasprintf+0xb3/0x130 kobject_set_name_vargs+0x45/0x120 dev_set_name+0xa9/0xe0 atm_register_sysfs+0xf3/0x220 atm_dev_register+0x40b/0x780 0xffffffffa000b089 do_one_initcall+0x89/0x300 do_init_module+0x27b/0x7d0 load_module+0x54cd/0x5ff0 init_module_from_file+0xe4/0x150 idempotent_init_module+0x32c/0x610 __x64_sys_finit_module+0xbd/0x120 do_syscall_64+0xa8/0x270 entry_SYSCALL_64_after_hwframe+0x77/0x7f When device_create_file() return error in atm_register_sysfs(), the same issue also can be triggered. Function put_device() should be called to release kobj->name memory and other device resource, instead of kfree(). Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Wang Liang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250901063537.1472221-1-wangliang74@huawei.com Signed-off-by: Paolo Abeni --- net/atm/resources.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/atm/resources.c b/net/atm/resources.c index b19d851e1f44..7c6fdedbcf4e 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -112,7 +112,9 @@ struct atm_dev *atm_dev_register(const char *type, struct device *parent, if (atm_proc_dev_register(dev) < 0) { pr_err("atm_proc_dev_register failed for dev %s\n", type); - goto out_fail; + mutex_unlock(&atm_dev_mutex); + kfree(dev); + return NULL; } if (atm_register_sysfs(dev, parent) < 0) { @@ -128,7 +130,7 @@ struct atm_dev *atm_dev_register(const char *type, struct device *parent, return dev; out_fail: - kfree(dev); + put_device(&dev->class_dev); dev = NULL; goto out; } From 47ddf62b43eced739b56422cd55e86b9b4bb7dac Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Wed, 3 Sep 2025 18:51:14 +0200 Subject: [PATCH 1136/1307] Input: xpad - add support for Flydigi Apex 5 Add Flydigi Apex 5 to the list of recognized controllers. Reported-by: Brandon Lin Reported-by: Sergey Belozyorcev Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250903165114.2987905-1-lkml@antheas.dev Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 4c94297e17e6..d72e89c25e50 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -422,6 +422,7 @@ static const struct xpad_device { { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE }, { 0x366c, 0x0005, "ByoWave Proteus Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE, FLAG_DELAY_INIT }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, + { 0x37d7, 0x2501, "Flydigi Apex 5", 0, XTYPE_XBOX360 }, { 0x413d, 0x2104, "Black Shark Green Ghost Gamepad", 0, XTYPE_XBOX360 }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } @@ -578,6 +579,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x366c), /* ByoWave controllers */ + XPAD_XBOX360_VENDOR(0x37d7), /* Flydigi Controllers */ XPAD_XBOX360_VENDOR(0x413d), /* Black Shark Green Ghost Controller */ { } }; From c9ddc41cdd522f2db5d492eda3df8994d928be34 Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 17 Aug 2025 19:20:22 -0500 Subject: [PATCH 1137/1307] Input: iqs7222 - avoid enabling unused interrupts If a proximity event node is defined so as to specify the wake-up properties of the touch surface, the proximity event interrupt is enabled unconditionally. This may result in unwanted interrupts. Solve this problem by enabling the interrupt only if the event is mapped to a key or switch code. Signed-off-by: Jeff LaBundy Link: https://lore.kernel.org/r/aKJxxgEWpNaNcUaW@nixie71 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/iqs7222.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c index 6fac31c0d99f..ff23219a582a 100644 --- a/drivers/input/misc/iqs7222.c +++ b/drivers/input/misc/iqs7222.c @@ -2427,6 +2427,9 @@ static int iqs7222_parse_chan(struct iqs7222_private *iqs7222, if (error) return error; + if (!iqs7222->kp_type[chan_index][i]) + continue; + if (!dev_desc->event_offset) continue; From 8bbceba7dc5090c00105e006ce28d1292cfda8dd Mon Sep 17 00:00:00 2001 From: Abin Joseph Date: Wed, 3 Sep 2025 08:22:13 +0530 Subject: [PATCH 1138/1307] net: xilinx: axienet: Add error handling for RX metadata pointer retrieval Add proper error checking for dmaengine_desc_get_metadata_ptr() which can return an error pointer and lead to potential crashes or undefined behaviour if the pointer retrieval fails. Properly handle the error by unmapping DMA buffer, freeing the skb and returning early to prevent further processing with invalid data. Fixes: 6a91b846af85 ("net: axienet: Introduce dmaengine support") Signed-off-by: Abin Joseph Reviewed-by: Radhey Shyam Pandey Link: https://patch.msgid.link/20250903025213.3120181-1-abin.joseph@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 0d8a05fe541a..ec6d47dc984a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1168,6 +1168,15 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) &meta_max_len); dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size, DMA_FROM_DEVICE); + + if (IS_ERR(app_metadata)) { + if (net_ratelimit()) + netdev_err(lp->ndev, "Failed to get RX metadata pointer\n"); + dev_kfree_skb_any(skb); + lp->ndev->stats.rx_dropped++; + goto rx_submit; + } + /* TODO: Derive app word index programmatically */ rx_len = (app_metadata[LEN_APP] & 0xFFFF); skb_put(skb, rx_len); @@ -1180,6 +1189,7 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) u64_stats_add(&lp->rx_bytes, rx_len); u64_stats_update_end(&lp->rx_stat_sync); +rx_submit: for (i = 0; i < CIRC_SPACE(lp->rx_ring_head, lp->rx_ring_tail, RX_BUF_NUM_DEFAULT); i++) axienet_rx_submit_desc(lp->ndev); From 4844123fe0b853a4982c02666cb3fd863d701d50 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Wed, 3 Sep 2025 18:07:26 +0800 Subject: [PATCH 1139/1307] ppp: fix memory leak in pad_compress_skb If alloc_skb() fails in pad_compress_skb(), it returns NULL without releasing the old skb. The caller does: skb = pad_compress_skb(ppp, skb); if (!skb) goto drop; drop: kfree_skb(skb); When pad_compress_skb() returns NULL, the reference to the old skb is lost and kfree_skb(skb) ends up doing nothing, leading to a memory leak. Align pad_compress_skb() semantics with realloc(): only free the old skb if allocation and compression succeed. At the call site, use the new_skb variable so the original skb is not lost when pad_compress_skb() fails. Fixes: b3f9b92a6ec1 ("[PPP]: add PPP MPPE encryption module") Signed-off-by: Qingfang Deng Reviewed-by: Eric Dumazet Reviewed-by: Yue Haibing Link: https://patch.msgid.link/20250903100726.269839-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 824c8dc4120b..702a7f7183ce 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1744,7 +1744,6 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb) */ if (net_ratelimit()) netdev_err(ppp->dev, "ppp: compressor dropped pkt\n"); - kfree_skb(skb); consume_skb(new_skb); new_skb = NULL; } @@ -1845,9 +1844,10 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) "down - pkt dropped.\n"); goto drop; } - skb = pad_compress_skb(ppp, skb); - if (!skb) + new_skb = pad_compress_skb(ppp, skb); + if (!new_skb) goto drop; + skb = new_skb; } /* From b1ab3b029ff137f124c547452d0795e9de20ca63 Mon Sep 17 00:00:00 2001 From: Jeroen de Borst Date: Wed, 3 Sep 2025 10:56:49 -0700 Subject: [PATCH 1140/1307] gve: update MAINTAINERS Jeroen is leaving Google and Josh is taking his place as a maintainer. Signed-off-by: Jeroen de Borst Link: https://patch.msgid.link/20250903175649.23246-1-jeroendb@google.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1819c477eee3..c6f470250f6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10388,7 +10388,7 @@ S: Maintained F: drivers/input/touchscreen/goodix* GOOGLE ETHERNET DRIVERS -M: Jeroen de Borst +M: Joshua Washington M: Harshitha Ramamurthy L: netdev@vger.kernel.org S: Maintained From 6a989d37302a41a8a8d6231a4c265fdb6b09d6eb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 3 Sep 2025 14:20:54 -0700 Subject: [PATCH 1141/1307] MAINTAINERS: add Sabrina to TLS maintainers Sabrina has been very helpful reviewing TLS patches, fixing bugs, and, I believe, the last one to implement any major feature in the TLS code base (rekeying). Add her as a maintainer. Acked-by: Sabrina Dubroca Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250903212054.1885058-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c6f470250f6a..2dd6e8e5c0fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17850,6 +17850,7 @@ F: net/ipv6/tcp*.c NETWORKING [TLS] M: John Fastabend M: Jakub Kicinski +M: Sabrina Dubroca L: netdev@vger.kernel.org S: Maintained F: include/net/tls.h From ed42d80f3bae89592fbb2ffaf8b6b2e720d53f6a Mon Sep 17 00:00:00 2001 From: zhang jiao Date: Wed, 3 Sep 2025 14:36:20 +0800 Subject: [PATCH 1142/1307] tools: gpio: remove the include directory on make clean Remove the generated include directory when running make clean. Fixes: 8674cea84dc6 ("tools/gpio: move to tools buildsystem") Signed-off-by: Zhang Jiao Link: https://lore.kernel.org/r/20250903063621.2424-1-zhangjiao2@cmss.chinamobile.com [Bartosz: add Fixes tag, improve the commit message] Signed-off-by: Bartosz Golaszewski --- tools/gpio/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index ed565eb52275..342e056c8c66 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -77,7 +77,7 @@ $(OUTPUT)gpio-watch: $(GPIO_WATCH_IN) clean: rm -f $(ALL_PROGRAMS) - rm -f $(OUTPUT)include/linux/gpio.h + rm -rf $(OUTPUT)include find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete install: $(ALL_PROGRAMS) From fd2004d82d8d8faa94879e3de3096c8511728637 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 3 Sep 2025 22:28:51 +0000 Subject: [PATCH 1143/1307] selftest: net: Fix weird setsockopt() in bind_bhash.c. bind_bhash.c passes (SO_REUSEADDR | SO_REUSEPORT) to setsockopt(). In the asm-generic definition, the value happens to match with the bare SO_REUSEPORT, (2 | 15) == 15, but not on some arch. arch/alpha/include/uapi/asm/socket.h:18:#define SO_REUSEADDR 0x0004 arch/alpha/include/uapi/asm/socket.h:24:#define SO_REUSEPORT 0x0200 arch/mips/include/uapi/asm/socket.h:24:#define SO_REUSEADDR 0x0004 /* Allow reuse of local addresses. */ arch/mips/include/uapi/asm/socket.h:33:#define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ arch/parisc/include/uapi/asm/socket.h:12:#define SO_REUSEADDR 0x0004 arch/parisc/include/uapi/asm/socket.h:18:#define SO_REUSEPORT 0x0200 arch/sparc/include/uapi/asm/socket.h:13:#define SO_REUSEADDR 0x0004 arch/sparc/include/uapi/asm/socket.h:20:#define SO_REUSEPORT 0x0200 include/uapi/asm-generic/socket.h:12:#define SO_REUSEADDR 2 include/uapi/asm-generic/socket.h:27:#define SO_REUSEPORT 15 Let's pass SO_REUSEPORT only. Fixes: c35ecb95c448 ("selftests/net: Add test for timing a bind request to a port with a populated bhash entry") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250903222938.2601522-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bind_bhash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c index 57ff67a3751e..da04b0b19b73 100644 --- a/tools/testing/selftests/net/bind_bhash.c +++ b/tools/testing/selftests/net/bind_bhash.c @@ -75,7 +75,7 @@ static void *setup(void *arg) int *array = (int *)arg; for (i = 0; i < MAX_CONNECTIONS; i++) { - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + sock_fd = bind_socket(SO_REUSEPORT, setup_addr); if (sock_fd < 0) { ret = sock_fd; pthread_exit(&ret); @@ -103,7 +103,7 @@ int main(int argc, const char *argv[]) setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4; - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + listener_fd = bind_socket(SO_REUSEPORT, setup_addr); if (listen(listener_fd, 100) < 0) { perror("listen failed"); return -1; From 1939a9fcb80353dd8b111aa1e79c691afbde08b4 Mon Sep 17 00:00:00 2001 From: Christoffer Sandberg Date: Tue, 26 Aug 2025 16:26:06 +0200 Subject: [PATCH 1144/1307] Input: i8042 - add TUXEDO InfinityBook Pro Gen10 AMD to i8042 quirk table Occasionally wakes up from suspend with missing input on the internal keyboard. Setting the quirks appears to fix the issue for this device as well. Signed-off-by: Christoffer Sandberg Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250826142646.13516-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 6ed9fc34948c..1caa6c4ca435 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1155,6 +1155,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "XxHP4NAx"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "XxKK4NAx_XxSP4NAx"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, /* * A lot of modern Clevo barebones have touchpad and/or keyboard issues * after suspend fixable with the forcenorestore quirk. From 9b2bfdbf43adb9929c5ddcdd96efedbf1c88cf53 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 2 Sep 2025 14:12:59 +0200 Subject: [PATCH 1145/1307] phy: mscc: Stop taking ts_lock for tx_queue and use its own lock When transmitting a PTP frame which is timestamp using 2 step, the following warning appears if CONFIG_PROVE_LOCKING is enabled: ============================= [ BUG: Invalid wait context ] 6.17.0-rc1-00326-ge6160462704e #427 Not tainted ----------------------------- ptp4l/119 is trying to lock: c2a44ed4 (&vsc8531->ts_lock){+.+.}-{3:3}, at: vsc85xx_txtstamp+0x50/0xac other info that might help us debug this: context-{4:4} 4 locks held by ptp4l/119: #0: c145f068 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x58/0x1440 #1: c29df974 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_queue_xmit+0x5c4/0x1440 #2: c2aaaad0 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x108/0x350 #3: c2aac170 (&lan966x->tx_lock){+.-.}-{2:2}, at: lan966x_port_xmit+0xd0/0x350 stack backtrace: CPU: 0 UID: 0 PID: 119 Comm: ptp4l Not tainted 6.17.0-rc1-00326-ge6160462704e #427 NONE Hardware name: Generic DT based system Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x7c/0xac dump_stack_lvl from __lock_acquire+0x8e8/0x29dc __lock_acquire from lock_acquire+0x108/0x38c lock_acquire from __mutex_lock+0xb0/0xe78 __mutex_lock from mutex_lock_nested+0x1c/0x24 mutex_lock_nested from vsc85xx_txtstamp+0x50/0xac vsc85xx_txtstamp from lan966x_fdma_xmit+0xd8/0x3a8 lan966x_fdma_xmit from lan966x_port_xmit+0x1bc/0x350 lan966x_port_xmit from dev_hard_start_xmit+0xc8/0x2c0 dev_hard_start_xmit from sch_direct_xmit+0x8c/0x350 sch_direct_xmit from __dev_queue_xmit+0x680/0x1440 __dev_queue_xmit from packet_sendmsg+0xfa4/0x1568 packet_sendmsg from __sys_sendto+0x110/0x19c __sys_sendto from sys_send+0x18/0x20 sys_send from ret_fast_syscall+0x0/0x1c Exception stack(0xf0b05fa8 to 0xf0b05ff0) 5fa0: 00000001 0000000e 0000000e 0004b47a 0000003a 00000000 5fc0: 00000001 0000000e 00000000 00000121 0004af58 00044874 00000000 00000000 5fe0: 00000001 bee9d420 00025a10 b6e75c7c So, instead of using the ts_lock for tx_queue, use the spinlock that skb_buff_head has. Reviewed-by: Vadim Fedorenko Fixes: 7d272e63e0979d ("net: phy: mscc: timestamping and PHC support") Signed-off-by: Horatiu Vultur Link: https://patch.msgid.link/20250902121259.3257536-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc_ptp.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index 72847320cb65..d692df7d975c 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -456,12 +456,12 @@ static void vsc85xx_dequeue_skb(struct vsc85xx_ptp *ptp) *p++ = (reg >> 24) & 0xff; } - len = skb_queue_len(&ptp->tx_queue); + len = skb_queue_len_lockless(&ptp->tx_queue); if (len < 1) return; while (len--) { - skb = __skb_dequeue(&ptp->tx_queue); + skb = skb_dequeue(&ptp->tx_queue); if (!skb) return; @@ -486,7 +486,7 @@ static void vsc85xx_dequeue_skb(struct vsc85xx_ptp *ptp) * packet in the FIFO right now, reschedule it for later * packets. */ - __skb_queue_tail(&ptp->tx_queue, skb); + skb_queue_tail(&ptp->tx_queue, skb); } } @@ -1068,6 +1068,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, case HWTSTAMP_TX_ON: break; case HWTSTAMP_TX_OFF: + skb_queue_purge(&vsc8531->ptp->tx_queue); break; default: return -ERANGE; @@ -1092,9 +1093,6 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, mutex_lock(&vsc8531->ts_lock); - __skb_queue_purge(&vsc8531->ptp->tx_queue); - __skb_queue_head_init(&vsc8531->ptp->tx_queue); - /* Disable predictor while configuring the 1588 block */ val = vsc85xx_ts_read_csr(phydev, PROCESSOR, MSCC_PHY_PTP_INGR_PREDICTOR); @@ -1180,9 +1178,7 @@ static void vsc85xx_txtstamp(struct mii_timestamper *mii_ts, skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - mutex_lock(&vsc8531->ts_lock); - __skb_queue_tail(&vsc8531->ptp->tx_queue, skb); - mutex_unlock(&vsc8531->ts_lock); + skb_queue_tail(&vsc8531->ptp->tx_queue, skb); return; out: @@ -1548,6 +1544,7 @@ void vsc8584_ptp_deinit(struct phy_device *phydev) if (vsc8531->ptp->ptp_clock) { ptp_clock_unregister(vsc8531->ptp->ptp_clock); skb_queue_purge(&vsc8531->rx_skbs_list); + skb_queue_purge(&vsc8531->ptp->tx_queue); } } @@ -1571,7 +1568,7 @@ irqreturn_t vsc8584_handle_ts_interrupt(struct phy_device *phydev) if (rc & VSC85XX_1588_INT_FIFO_ADD) { vsc85xx_get_tx_ts(priv->ptp); } else if (rc & VSC85XX_1588_INT_FIFO_OVERFLOW) { - __skb_queue_purge(&priv->ptp->tx_queue); + skb_queue_purge(&priv->ptp->tx_queue); vsc85xx_ts_reset_fifo(phydev); } @@ -1591,6 +1588,7 @@ int vsc8584_ptp_probe(struct phy_device *phydev) mutex_init(&vsc8531->phc_lock); mutex_init(&vsc8531->ts_lock); skb_queue_head_init(&vsc8531->rx_skbs_list); + skb_queue_head_init(&vsc8531->ptp->tx_queue); /* Retrieve the shared load/save GPIO. Request it as non exclusive as * the same GPIO can be requested by all the PHYs of the same package. From a00f2015acdbd8a4b3d2382eaeebe11db1925fad Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 3 Sep 2025 12:21:33 -0700 Subject: [PATCH 1146/1307] drm/panthor: validate group queue count A panthor group can have at most MAX_CS_PER_CSG panthor queues. Fixes: 4bdca11507928 ("drm/panthor: Add the driver frontend block") Signed-off-by: Chia-I Wu Reviewed-by: Boris Brezillon # v1 Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20250903192133.288477-1-olvaffe@gmail.com --- drivers/gpu/drm/panthor/panthor_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 1116f2d2826e..4d8e9b34702a 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1094,7 +1094,7 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, struct drm_panthor_queue_create *queue_args; int ret; - if (!args->queues.count) + if (!args->queues.count || args->queues.count > MAX_CS_PER_CSG) return -EINVAL; ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues); From f3ef7110924b897f4b79db9f7ac75d319ec09c4a Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 28 Aug 2025 19:22:43 +0800 Subject: [PATCH 1147/1307] ACPI/IORT: Fix memory leak in iort_rmr_alloc_sids() If krealloc_array() fails in iort_rmr_alloc_sids(), the function returns NULL but does not free the original 'sids' allocation. This results in a memory leak since the caller overwrites the original pointer with the NULL return value. Fixes: 491cf4a6735a ("ACPI/IORT: Add support to retrieve IORT RMR reserved regions") Cc: # 6.0.x Signed-off-by: Miaoqian Lin Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/20250828112243.61460-1-linmq006@gmail.com Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/iort.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 98759d6199d3..65f0f56ad753 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -937,8 +937,10 @@ static u32 *iort_rmr_alloc_sids(u32 *sids, u32 count, u32 id_start, new_sids = krealloc_array(sids, count + new_count, sizeof(*new_sids), GFP_KERNEL); - if (!new_sids) + if (!new_sids) { + kfree(sids); return NULL; + } for (i = count; i < total_count; i++) new_sids[i] = id_start++; From 93dec51e716db88f32d770dc9ab268964fff320b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 3 Sep 2025 09:41:40 +0800 Subject: [PATCH 1148/1307] md/raid1: fix data lost for writemostly rdev If writemostly is enabled, alloc_behind_master_bio() will allocate a new bio for rdev, with bi_opf set to 0. Later, raid1_write_request() will clone from this bio, hence bi_opf is still 0 for the cloned bio. Submit this cloned bio will end up to be read, causing write data lost. Fix this problem by inheriting bi_opf from original bio for behind_mast_bio. Fixes: e879a0d9cb08 ("md/raid1,raid10: don't ignore IO flags") Reported-and-tested-by: Ian Dall Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220507 Link: https://lore.kernel.org/linux-raid/20250903014140.3690499-1-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai Reviewed-by: Li Nan --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 408c26398321..bf44878ec640 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1225,7 +1225,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, int i = 0; struct bio *behind_bio = NULL; - behind_bio = bio_alloc_bioset(NULL, vcnt, 0, GFP_NOIO, + behind_bio = bio_alloc_bioset(NULL, vcnt, bio->bi_opf, GFP_NOIO, &r1_bio->mddev->bio_set); /* discard op, we don't support writezero/writesame yet */ From 7202082b7b7a256d04ec96131c7f859df0a79f64 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Thu, 4 Sep 2025 15:34:52 +0800 Subject: [PATCH 1149/1307] md: prevent incorrect update of resync/recovery offset In md_do_sync(), when md_sync_action returns ACTION_FROZEN, subsequent call to md_sync_position() will return MaxSector. This causes 'curr_resync' (and later 'recovery_offset') to be set to MaxSector too, which incorrectly signals that recovery/resync has completed, even though disk data has not actually been updated. To fix this issue, skip updating any offset values when the sync action is FROZEN. The same holds true for IDLE. Fixes: 7d9f107a4e94 ("md: use new helpers in md_do_sync()") Signed-off-by: Li Nan Link: https://lore.kernel.org/linux-raid/20250904073452.3408516-1-linan666@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1baaf52c603c..4e033c26fdd4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9125,6 +9125,11 @@ void md_do_sync(struct md_thread *thread) } action = md_sync_action(mddev); + if (action == ACTION_FROZEN || action == ACTION_IDLE) { + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + goto skip; + } + desc = md_sync_action_name(action); mddev->last_sync_action = action; From 70bccd9855dae56942f2b18a08ba137bb54093a0 Mon Sep 17 00:00:00 2001 From: Makar Semyonov Date: Thu, 4 Sep 2025 15:28:41 +0300 Subject: [PATCH 1150/1307] cifs: prevent NULL pointer dereference in UTF16 conversion There can be a NULL pointer dereference bug here. NULL is passed to __cifs_sfu_make_node without checks, which passes it unchecked to cifs_strndup_to_utf16, which in turn passes it to cifs_local_to_utf16_bytes where '*from' is dereferenced, causing a crash. This patch adds a check for NULL 'src' in cifs_strndup_to_utf16 and returns NULL early to prevent dereferencing NULL pointer. Found by Linux Verification Center (linuxtesting.org) with SVACE Signed-off-by: Makar Semyonov Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifs_unicode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/cifs_unicode.c b/fs/smb/client/cifs_unicode.c index 4cc6e0896fad..f8659d36793f 100644 --- a/fs/smb/client/cifs_unicode.c +++ b/fs/smb/client/cifs_unicode.c @@ -629,6 +629,9 @@ cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, int len; __le16 *dst; + if (!src) + return NULL; + len = cifs_local_to_utf16_bytes(src, maxlen, cp); len += 2; /* NULL */ dst = kmalloc(len, GFP_KERNEL); From 157cf360c4a8751f7f511a71cc3a283b5d27f889 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Thu, 4 Sep 2025 10:43:22 +0800 Subject: [PATCH 1151/1307] net: libwx: fix to enable RSS Now when SRIOV is enabled, PF with multiple queues can only receive all packets on queue 0. This is caused by an incorrect flag judgement, which prevents RSS from being enabled. In fact, RSS is supported for the functions when SRIOV is enabled. Remove the flag judgement to fix it. Fixes: c52d4b898901 ("net: libwx: Redesign flow when sriov is enabled") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/A3B7449A08A044D0+20250904024322.87145-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index bcd07a715752..5cb353a97d6d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2078,10 +2078,6 @@ static void wx_setup_mrqc(struct wx *wx) { u32 rss_field = 0; - /* VT, and RSS do not coexist at the same time */ - if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) - return; - /* Disable indicating checksum in descriptor, enables RSS hash */ wr32m(wx, WX_PSR_CTL, WX_PSR_CTL_PCSD, WX_PSR_CTL_PCSD); From 41f9049cff324b7033e6ed1ded7dfff803cf550a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 10 Jul 2025 13:25:26 -0700 Subject: [PATCH 1152/1307] riscv: Only allow LTO with CMODEL_MEDANY When building with CONFIG_CMODEL_MEDLOW and CONFIG_LTO_CLANG, there is a series of errors due to some files being unconditionally compiled with '-mcmodel=medany', mismatching with the rest of the kernel built with '-mcmodel=medlow': ld.lld: error: Function Import: link error: linking module flags 'Code Model': IDs have conflicting values: 'i32 3' from vmlinux.a(init.o at 899908), and 'i32 1' from vmlinux.a(net-traces.o at 1014628) Only allow LTO to be performed when CONFIG_CMODEL_MEDANY is enabled to ensure there will be no code model mismatch errors. An alternative solution would be disabling LTO for the files with a different code model than the main kernel like some specialized areas of the kernel do but doing that for individual files is not as sustainable than forbidding the combination altogether. Cc: stable@vger.kernel.org Fixes: 021d23428bdb ("RISC-V: build: Allow LTO to be selected") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506290255.KBVM83vZ-lkp@intel.com/ Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250710-riscv-restrict-lto-to-medany-v1-1-b1dac9871ecf@kernel.org Signed-off-by: Paul Walmsley --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a4b233a0659e..51dcd8eaa243 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -65,7 +65,7 @@ config RISCV select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 - select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 + select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 && CMODEL_MEDANY select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000 select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS if 64BIT && MMU select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU From 5b3706597b90a7b6c9ae148edd07a43531dcd49e Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 20:05:59 +0530 Subject: [PATCH 1153/1307] ACPI: RISC-V: Fix FFH_CPPC_CSR error handling The cppc_ffh_csr_read() and cppc_ffh_csr_write() returns Linux error code in "data->ret.error" so cpc_read_ffh() and cpc_write_ffh() must not use sbi_err_map_linux_errno() for FFH_CPPC_CSR. Fixes: 30f3ffbee86b ("ACPI: RISC-V: Add CPPC driver") Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Reviewed-by: Troy Mitchell Reviewed-by: Sunil V L Reviewed-by: Nutty Liu Reviewed-by: Atish Patra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250818143600.894385-2-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- drivers/acpi/riscv/cppc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/riscv/cppc.c b/drivers/acpi/riscv/cppc.c index 440cf9fb91aa..42c1a9052470 100644 --- a/drivers/acpi/riscv/cppc.c +++ b/drivers/acpi/riscv/cppc.c @@ -119,7 +119,7 @@ int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) *val = data.ret.value; - return (data.ret.error) ? sbi_err_map_linux_errno(data.ret.error) : 0; + return data.ret.error; } return -EINVAL; @@ -148,7 +148,7 @@ int cpc_write_ffh(int cpu, struct cpc_reg *reg, u64 val) smp_call_function_single(cpu, cppc_ffh_csr_write, &data, 1); - return (data.ret.error) ? sbi_err_map_linux_errno(data.ret.error) : 0; + return data.ret.error; } return -EINVAL; From 744175e972ac7c1f1be53556e186e5976e39f735 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 22 Aug 2025 14:25:39 +0200 Subject: [PATCH 1154/1307] perf test: Checking BPF metadata collection fails on version string commit edf2cadf01e8f ("perf test: add test for BPF metadata collection") fails consistently on the version string check. The perf version string on some of the constant integration test machines contains characters with special meaning in grep's extended regular expression matching algorithm. The output of perf version is: # perf version perf version 6.17.0-20250814.rc1.git20.24ea63ea3877.63.fc42.s390x+git # and the '+' character has special meaning in egrep command. Also the use of egrep is deprecated. Change the perf version string check to fixed character matching and get rid of egrep's warning being deprecated. Use grep -F instead. Output before: # perf test -F 102 Checking BPF metadata collection egrep: warning: egrep is obsolescent; using grep -E Basic BPF metadata test [Failed invalid output] 102: BPF metadata collection test : FAILED! # Output after: # perf test -F 102 Checking BPF metadata collection Basic BPF metadata test [Success] 102: BPF metadata collection test : Ok # Fixes: edf2cadf01e8f ("perf test: add test for BPF metadata collection") Signed-off-by: Thomas Richter Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Acked-by: Sumanth Korikkar Cc: Blake Jones Link: https://lore.kernel.org/r/20250822122540.4104658-1-tmricht@linux.ibm.com Signed-off-by: Namhyung Kim --- tools/perf/tests/shell/test_bpf_metadata.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_bpf_metadata.sh b/tools/perf/tests/shell/test_bpf_metadata.sh index 69e3c2055134..be67d56e0f09 100755 --- a/tools/perf/tests/shell/test_bpf_metadata.sh +++ b/tools/perf/tests/shell/test_bpf_metadata.sh @@ -61,7 +61,7 @@ test_bpf_metadata() { /perf_version/ { if (entry) print $NF; } - ' | egrep "$VERS" > /dev/null + ' | grep -qF "$VERS" then echo "Basic BPF metadata test [Failed invalid output]" err=1 From ca81e74dc34734078d34485d4aa123561ba75b15 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 4 Sep 2025 09:17:31 -0700 Subject: [PATCH 1155/1307] perf symbol-elf: Add support for the block argument for libbfd James Clark caught that the BUILD_NONDISTRO=1 build with libbfd was broken due to an update to the read_build_id function adding a blocking argument. Add support for this argument by first opening the file blocking or non-blocking, then switching from bfd_openr to bfd_fdopenr and passing the opened fd. bfd_fdopenr closes the fd on error and when bfd_close are called. Reported-by: James Clark Closes: https://lore.kernel.org/lkml/20250903-james-perf-read-build-id-fix-v1-2-6a694d0a980f@linaro.org/ Fixes: 2c369d91d093 ("perf symbol: Add blocking argument to filename__read_build_id") Signed-off-by: Ian Rogers Reviewed-by: James Clark Link: https://lore.kernel.org/r/20250904161731.1193729-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/symbol-elf.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 033c79231a54..1346fd180653 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -873,13 +873,17 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) #ifdef HAVE_LIBBFD_BUILDID_SUPPORT -static int read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid, bool block) { size_t size = sizeof(bid->data); - int err = -1; + int err = -1, fd; bfd *abfd; - abfd = bfd_openr(filename, NULL); + fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); + if (fd < 0) + return -1; + + abfd = bfd_fdopenr(filename, /*target=*/NULL, fd); if (!abfd) return -1; From 349510052f765b6eb9c2a21d0ffe08ba61fa683c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 1 Sep 2025 22:26:39 +0200 Subject: [PATCH 1156/1307] MAINTAINERS: Add drm-rust tree for Rust DRM drivers and infrastructure Multiple DRM Rust drivers (e.g. nova-core, nova-drm, Tyr, rvkms) are in development, with at least Nova and (soon) Tyr already upstream. Having a shared tree will ease and accelerate development, since all drivers can consume new infrastructure in the same release cycle. This includes infrastructure shared with other subsystem trees (e.g. Rust or driver-core). By consolidating in drm-rust, we avoid adding extra burden to drm-misc maintainers, e.g. dealing with cross-tree topic branches. The drm-misc tree is not a good fit for this stage of development, since its documented scope is small drivers with occasional large series. Rust drivers in development upstream, however, regularly involve large patch series, new infrastructure, and shared topic branches, which may not align well with drm-misc at this stage. The drm-rust tree may not be a permanent solution. Once the core Rust, DRM, and KMS infrastructure have stabilized, drivers and infrastructure changes are expected to transition into drm-misc or standalone driver trees respectively. Until then, drm-rust provides a dedicated place to coordinate development without disrupting existing workflows too much. Cc: David Airlie Cc: Simona Vetter Cc: Maarten Lankhorst Cc: Thomas Zimmermann Signed-off-by: Danilo Krummrich Acked-by: Janne Grunau Acked-by: Maxime Ripard Acked-by: Alexandre Courbot Acked-by: Jani Nikula Acked-by: Daniel Almeida Acked-by: Alice Ryhl Link: https://lore.kernel.org/r/20250901202850.208116-1-dakr@kernel.org Signed-off-by: Alice Ryhl --- MAINTAINERS | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6dcfbd11efef..2b155a12f776 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8079,7 +8079,6 @@ F: Documentation/devicetree/bindings/gpu/ F: Documentation/gpu/ F: drivers/gpu/drm/ F: drivers/gpu/vga/ -F: rust/kernel/drm/ F: include/drm/drm F: include/linux/vga* F: include/uapi/drm/ @@ -8096,6 +8095,16 @@ X: drivers/gpu/drm/radeon/ X: drivers/gpu/drm/tegra/ X: drivers/gpu/drm/xe/ +DRM DRIVERS AND COMMON INFRASTRUCTURE [RUST] +M: Danilo Krummrich +M: Alice Ryhl +S: Supported +W: https://drm.pages.freedesktop.org/maintainer-tools/drm-rust.html +T: git https://gitlab.freedesktop.org/drm/rust/kernel.git +F: drivers/gpu/drm/nova/ +F: drivers/gpu/nova-core/ +F: rust/kernel/drm/ + DRM DRIVERS FOR ALLWINNER A10 M: Maxime Ripard M: Chen-Yu Tsai From e1bf212d0604d2cbb5514e47ccec252b656071fb Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Thu, 4 Sep 2025 20:01:19 +0800 Subject: [PATCH 1157/1307] fuse: virtio_fs: fix page fault for DAX page address The commit ced17ee32a99 ("Revert "virtio: reject shm region if length is zero"") exposes the following DAX page fault bug (this fix the failure that getting shm region alway returns false because of zero length): The commit 21aa65bf82a7 ("mm: remove callers of pfn_t functionality") handles the DAX physical page address incorrectly: the removed macro 'phys_to_pfn_t()' should be replaced with 'PHYS_PFN()'. [ 1.390321] BUG: unable to handle page fault for address: ffffd3fb40000008 [ 1.390875] #PF: supervisor read access in kernel mode [ 1.391257] #PF: error_code(0x0000) - not-present page [ 1.391509] PGD 0 P4D 0 [ 1.391626] Oops: Oops: 0000 [#1] SMP NOPTI [ 1.391806] CPU: 6 UID: 1000 PID: 162 Comm: weston Not tainted 6.17.0-rc3-WSL2-STABLE #2 PREEMPT(none) [ 1.392361] RIP: 0010:dax_to_folio+0x14/0x60 [ 1.392653] Code: 52 c9 c3 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 48 c1 ef 05 48 c1 e7 06 48 03 3d 34 b5 31 01 <48> 8b 57 08 48 89 f8 f6 c2 01 75 2b 66 90 c3 cc cc cc cc f7 c7 ff [ 1.393727] RSP: 0000:ffffaf7d04407aa8 EFLAGS: 00010086 [ 1.394003] RAX: 000000a000000000 RBX: ffffaf7d04407bb0 RCX: 0000000000000000 [ 1.394524] RDX: ffffd17b40000008 RSI: 0000000000000083 RDI: ffffd3fb40000000 [ 1.394967] RBP: 0000000000000011 R08: 000000a000000000 R09: 0000000000000000 [ 1.395400] R10: 0000000000001000 R11: ffffaf7d04407c10 R12: 0000000000000000 [ 1.395806] R13: ffffa020557be9c0 R14: 0000014000000001 R15: 0000725970e94000 [ 1.396268] FS: 000072596d6d2ec0(0000) GS:ffffa0222dc59000(0000) knlGS:0000000000000000 [ 1.396715] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1.397100] CR2: ffffd3fb40000008 CR3: 000000011579c005 CR4: 0000000000372ef0 [ 1.397518] Call Trace: [ 1.397663] [ 1.397900] dax_insert_entry+0x13b/0x390 [ 1.398179] dax_fault_iter+0x2a5/0x6c0 [ 1.398443] dax_iomap_pte_fault+0x193/0x3c0 [ 1.398750] __fuse_dax_fault+0x8b/0x270 [ 1.398997] ? vm_mmap_pgoff+0x161/0x210 [ 1.399175] __do_fault+0x30/0x180 [ 1.399360] do_fault+0xc4/0x550 [ 1.399547] __handle_mm_fault+0x8e3/0xf50 [ 1.399731] ? do_syscall_64+0x72/0x1e0 [ 1.399958] handle_mm_fault+0x192/0x2f0 [ 1.400204] do_user_addr_fault+0x20e/0x700 [ 1.400418] exc_page_fault+0x66/0x150 [ 1.400602] asm_exc_page_fault+0x26/0x30 [ 1.400831] RIP: 0033:0x72596d1bf703 [ 1.401076] Code: 31 f6 45 31 e4 48 8d 15 b3 73 00 00 e8 06 03 00 00 8b 83 68 01 00 00 e9 8e fa ff ff 0f 1f 00 48 8b 44 24 08 4c 89 ee 48 89 df 00 21 43 34 12 e8 72 09 00 00 e9 6a fa ff ff 0f 1f 44 00 00 e8 [ 1.402172] RSP: 002b:00007ffc350f6dc0 EFLAGS: 00010202 [ 1.402488] RAX: 0000725970e94000 RBX: 00005b7c642c2560 RCX: 0000725970d359a7 [ 1.402898] RDX: 0000000000000003 RSI: 00007ffc350f6dc0 RDI: 00005b7c642c2560 [ 1.403284] RBP: 00007ffc350f6e90 R08: 000000000000000d R09: 0000000000000000 [ 1.403634] R10: 00007ffc350f6dd8 R11: 0000000000000246 R12: 0000000000000001 [ 1.404078] R13: 00007ffc350f6dc0 R14: 0000725970e29ce0 R15: 0000000000000003 [ 1.404450] [ 1.404570] Modules linked in: [ 1.404821] CR2: ffffd3fb40000008 [ 1.405029] ---[ end trace 0000000000000000 ]--- [ 1.405323] RIP: 0010:dax_to_folio+0x14/0x60 [ 1.405556] Code: 52 c9 c3 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 48 c1 ef 05 48 c1 e7 06 48 03 3d 34 b5 31 01 <48> 8b 57 08 48 89 f8 f6 c2 01 75 2b 66 90 c3 cc cc cc cc f7 c7 ff [ 1.406639] RSP: 0000:ffffaf7d04407aa8 EFLAGS: 00010086 [ 1.406910] RAX: 000000a000000000 RBX: ffffaf7d04407bb0 RCX: 0000000000000000 [ 1.407379] RDX: ffffd17b40000008 RSI: 0000000000000083 RDI: ffffd3fb40000000 [ 1.407800] RBP: 0000000000000011 R08: 000000a000000000 R09: 0000000000000000 [ 1.408246] R10: 0000000000001000 R11: ffffaf7d04407c10 R12: 0000000000000000 [ 1.408666] R13: ffffa020557be9c0 R14: 0000014000000001 R15: 0000725970e94000 [ 1.409170] FS: 000072596d6d2ec0(0000) GS:ffffa0222dc59000(0000) knlGS:0000000000000000 [ 1.409608] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1.409977] CR2: ffffd3fb40000008 CR3: 000000011579c005 CR4: 0000000000372ef0 [ 1.410437] Kernel panic - not syncing: Fatal exception [ 1.410857] Kernel Offset: 0xc000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Fixes: 21aa65bf82a7 ("mm: remove callers of pfn_t functionality") Signed-off-by: Haiyue Wang Link: https://lore.kernel.org/20250904120339.972-1-haiyuewa@163.com Acked-by: David Hildenbrand Reviewed-by: Miklos Szeredi Signed-off-by: Christian Brauner --- fs/fuse/virtio_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index c826e7ca49f5..76c8fd0bfc75 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1016,7 +1016,7 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, if (kaddr) *kaddr = fs->window_kaddr + offset; if (pfn) - *pfn = fs->window_phys_addr + offset; + *pfn = PHYS_PFN(fs->window_phys_addr + offset); return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; } From 4550d33e18112a11a740424c4eec063cd58e918c Mon Sep 17 00:00:00 2001 From: Santhosh Kumar K Date: Thu, 4 Sep 2025 18:47:41 +0530 Subject: [PATCH 1158/1307] mtd: spinand: winbond: Fix oob_layout for W25N01JW Fix the W25N01JW's oob_layout according to the datasheet [1] [1] https://www.winbond.com/hq/product/code-storage-flash-memory/qspinand-flash/?__locale=en&partNo=W25N01JW Fixes: 6a804fb72de5 ("mtd: spinand: winbond: add support for serial NAND flash") Cc: Sridharan S N Cc: stable@vger.kernel.org Signed-off-by: Santhosh Kumar K Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/winbond.c | 37 +++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 87053389a1fc..4870b2d5edb2 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -176,6 +176,36 @@ static const struct mtd_ooblayout_ops w25n02kv_ooblayout = { .free = w25n02kv_ooblayout_free, }; +static int w25n01jw_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 3) + return -ERANGE; + + region->offset = (16 * section) + 12; + region->length = 4; + + return 0; +} + +static int w25n01jw_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 3) + return -ERANGE; + + region->offset = (16 * section); + region->length = 12; + + /* Extract BBM */ + if (!section) { + region->offset += 2; + region->length -= 2; + } + + return 0; +} + static int w35n01jw_ooblayout_ecc(struct mtd_info *mtd, int section, struct mtd_oob_region *region) { @@ -206,6 +236,11 @@ static int w35n01jw_ooblayout_free(struct mtd_info *mtd, int section, return 0; } +static const struct mtd_ooblayout_ops w25n01jw_ooblayout = { + .ecc = w25n01jw_ooblayout_ecc, + .free = w25n01jw_ooblayout_free, +}; + static const struct mtd_ooblayout_ops w35n01jw_ooblayout = { .ecc = w35n01jw_ooblayout_ecc, .free = w35n01jw_ooblayout_free, @@ -394,7 +429,7 @@ static const struct spinand_info winbond_spinand_table[] = { &write_cache_variants, &update_cache_variants), 0, - SPINAND_ECCINFO(&w25m02gv_ooblayout, NULL), + SPINAND_ECCINFO(&w25n01jw_ooblayout, NULL), SPINAND_CONFIGURE_CHIP(w25n0xjw_hs_cfg)), SPINAND_INFO("W25N01KV", /* 3.3V */ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xae, 0x21), From a7ed7b9d0ebb038db9963d574da0311cab0b666a Mon Sep 17 00:00:00 2001 From: panfan Date: Thu, 4 Sep 2025 20:22:36 -0700 Subject: [PATCH 1159/1307] arm64: ftrace: fix unreachable PLT for ftrace_caller in init_module with CONFIG_DYNAMIC_FTRACE On arm64, it has been possible for a module's sections to be placed more than 128M away from each other since commit: commit 3e35d303ab7d ("arm64: module: rework module VA range selection") Due to this, an ftrace callsite in a module's .init.text section can be out of branch range for the module's ftrace PLT entry (in the module's .text section). Any attempt to enable tracing of that callsite will result in a BRK being patched into the callsite, resulting in a fatal exception when the callsite is later executed. Fix this by adding an additional trampoline for .init.text, which will be within range. No additional trampolines are necessary due to the way a given module's executable sections are packed together. Any executable section beginning with ".init" will be placed in MOD_INIT_TEXT, and any other executable section, including those beginning with ".exit", will be placed in MOD_TEXT. Fixes: 3e35d303ab7d ("arm64: module: rework module VA range selection") Cc: # 6.5.x Signed-off-by: panfan Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250905032236.3220885-1-panfan@qti.qualcomm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/module.h | 1 + arch/arm64/include/asm/module.lds.h | 1 + arch/arm64/kernel/ftrace.c | 13 ++++++++++--- arch/arm64/kernel/module-plts.c | 12 +++++++++++- arch/arm64/kernel/module.c | 11 +++++++++++ 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 79550b22ba19..fb9b88eebeb1 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -19,6 +19,7 @@ struct mod_arch_specific { /* for CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; + struct plt_entry *init_ftrace_trampolines; }; u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index b9ae8349e35d..fb944b46846d 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -2,6 +2,7 @@ SECTIONS { .plt 0 : { BYTE(0) } .init.plt 0 : { BYTE(0) } .text.ftrace_trampoline 0 : { BYTE(0) } + .init.text.ftrace_trampoline 0 : { BYTE(0) } #ifdef CONFIG_KASAN_SW_TAGS /* diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 5a890714ee2e..5adad37ab4fa 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -258,10 +258,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(pc, 0, new, false); } -static struct plt_entry *get_ftrace_plt(struct module *mod) +static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) { #ifdef CONFIG_MODULES - struct plt_entry *plt = mod->arch.ftrace_trampolines; + struct plt_entry *plt = NULL; + + if (within_module_mem_type(addr, mod, MOD_INIT_TEXT)) + plt = mod->arch.init_ftrace_trampolines; + else if (within_module_mem_type(addr, mod, MOD_TEXT)) + plt = mod->arch.ftrace_trampolines; + else + return NULL; return &plt[FTRACE_PLT_IDX]; #else @@ -332,7 +339,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, if (WARN_ON(!mod)) return false; - plt = get_ftrace_plt(mod); + plt = get_ftrace_plt(mod, pc); if (!plt) { pr_err("ftrace: no module PLT for %ps\n", (void *)*addr); return false; diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index bde32979c06a..7afd370da9f4 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -283,7 +283,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned long core_plts = 0; unsigned long init_plts = 0; Elf64_Sym *syms = NULL; - Elf_Shdr *pltsec, *tramp = NULL; + Elf_Shdr *pltsec, *tramp = NULL, *init_tramp = NULL; int i; /* @@ -298,6 +298,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, else if (!strcmp(secstrings + sechdrs[i].sh_name, ".text.ftrace_trampoline")) tramp = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, + ".init.text.ftrace_trampoline")) + init_tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } @@ -363,5 +366,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); } + if (init_tramp) { + init_tramp->sh_type = SHT_NOBITS; + init_tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + init_tramp->sh_addralign = __alignof__(struct plt_entry); + init_tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 40148d2725ce..d6d443c4a01a 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -466,6 +466,17 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr, __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR); mod->arch.ftrace_trampolines = plts; + + s = find_section(hdr, sechdrs, ".init.text.ftrace_trampoline"); + if (!s) + return -ENOEXEC; + + plts = (void *)s->sh_addr; + + __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR); + + mod->arch.init_ftrace_trampolines = plts; + #endif return 0; } From 5a91f52c8650334aaf8c4c7c90f40c6906994225 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 12 Aug 2025 10:29:23 -0400 Subject: [PATCH 1160/1307] MAINTAINERS: update btrfs entry This is an update to reflect reality, not a signal of any seismic change. Dave Sterba has been the acting maintainer for almost a decade, I've simply been here as a backstop in case he gets hit by a bus. The fact is we have a strong and thriving community with any number of more active developers that can take on that role if it's necessary. I'm exceedingly happy and proud of the work that Dave has done in keeping us all in line, and know that if further changes need to be made it'll be with the development community we've built throughout the lifetime of btrfs. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 10850512c118..bc4f57d179d7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5157,7 +5157,6 @@ F: drivers/gpio/gpio-bt8xx.c BTRFS FILE SYSTEM M: Chris Mason -M: Josef Bacik M: David Sterba L: linux-btrfs@vger.kernel.org S: Maintained From 3d1267475b94b3df7a61e4ea6788c7c5d9e473c4 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 2 Sep 2025 11:34:10 +0100 Subject: [PATCH 1161/1307] btrfs: don't allow adding block device of less than 1 MB Commit 15ae0410c37a79 ("btrfs-progs: add error handling for device_get_partition_size_fd_stat()") in btrfs-progs inadvertently changed it so that if the BLKGETSIZE64 ioctl on a block device returned a size of 0, this was no longer seen as an error condition. Unfortunately this is how disconnected NBD devices behave, meaning that with btrfs-progs 6.16 it's now possible to add a device you can't remove: # btrfs device add /dev/nbd0 /root/temp # btrfs device remove /dev/nbd0 /root/temp ERROR: error removing device '/dev/nbd0': Invalid argument This check should always have been done kernel-side anyway, so add a check in btrfs_init_new_device() that the new device doesn't have a size less than BTRFS_DEVICE_RANGE_RESERVED (i.e. 1 MB). Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fa7a929a0461..c6e3efd6f602 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2722,6 +2722,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path goto error; } + if (bdev_nr_bytes(file_bdev(bdev_file)) <= BTRFS_DEVICE_RANGE_RESERVED) { + ret = -EINVAL; + goto error; + } + if (fs_devices->seeding) { seeding_dev = true; down_write(&sb->s_umount); From 992203a1fba51b025c60ec0c8b0d9223343dea95 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 7 Aug 2025 12:49:38 -0400 Subject: [PATCH 1162/1307] nfs/localio: restore creds before releasing pageio data Otherwise if the nfsd filecache code releases the nfsd_file immediately, it can trigger the BUG_ON(cred == current->cred) in __put_cred() when it puts the nfsd_file->nf_file->f-cred. Fixes: b9f5dd57f4a5 ("nfs/localio: use dedicated workqueues for filesystem read and write") Signed-off-by: Scott Mayhew Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20250807164938.2395136-1-smayhew@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/localio.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index bd5fca285899..bdb82a19136a 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -453,12 +453,13 @@ static void nfs_local_call_read(struct work_struct *work) nfs_local_iter_init(&iter, iocb, READ); status = filp->f_op->read_iter(&iocb->kiocb, &iter); + + revert_creds(save_cred); + if (status != -EIOCBQUEUED) { nfs_local_read_done(iocb, status); nfs_local_pgio_release(iocb); } - - revert_creds(save_cred); } static int @@ -648,14 +649,15 @@ static void nfs_local_call_write(struct work_struct *work) file_start_write(filp); status = filp->f_op->write_iter(&iocb->kiocb, &iter); file_end_write(filp); + + revert_creds(save_cred); + current->flags = old_flags; + if (status != -EIOCBQUEUED) { nfs_local_write_done(iocb, status); nfs_local_vfs_getattr(iocb); nfs_local_pgio_release(iocb); } - - revert_creds(save_cred); - current->flags = old_flags; } static int From e108c8a94f3f958c877f6ec7a6052a893ae4aa98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 25 Jul 2025 18:54:09 +0200 Subject: [PATCH 1163/1307] riscv: use lw when reading int cpu in new_vmalloc_check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit REG_L is wrong, because thread_info.cpu is 32-bit, not xlen-bit wide. The struct currently has a hole after cpu, so little endian accesses seemed fine. Fixes: 503638e0babf ("riscv: Stop emitting preventive sfence.vma for new vmalloc mappings") Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Signed-off-by: Radim Krčmář Link: https://lore.kernel.org/r/20250725165410.2896641-4-rkrcmar@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 3a0ec6fd5956..d0ded2438533 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -46,7 +46,7 @@ * a0 = &new_vmalloc[BIT_WORD(cpu)] * a1 = BIT_MASK(cpu) */ - REG_L a2, TASK_TI_CPU(tp) + lw a2, TASK_TI_CPU(tp) /* * Compute the new_vmalloc element position: * (cpu / 64) * 8 = (cpu >> 6) << 3 From 1046791390af6703a5e24718a16f37974adb11db Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Fri, 25 Jul 2025 00:08:52 +0200 Subject: [PATCH 1164/1307] riscv: uaccess: fix __put_user_nocheck for unaligned accesses The type of the value to write should be determined by the size of the destination, not by the value itself, which may be a constant. This aligns the behavior with x86_64, where __typeof__(*(__gu_ptr)) is used to infer the correct type. This fixes an issue in put_cmsg, which was only writing 4 out of 8 bytes to the cmsg_len field, causing the glibc tst-socket-timestamp test to fail. Fixes: ca1a66cdd685 ("riscv: uaccess: do not do misaligned accesses in get/put_user()") Signed-off-by: Aurelien Jarno Reviewed-by: Alexandre Ghiti Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250724220853.1969954-1-aurelien@aurel32.net Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index b88a6218b7f2..22e3f52a763d 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -311,7 +311,7 @@ do { \ do { \ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \ - __inttype(x) ___val = (__inttype(x))x; \ + __typeof__(*(__gu_ptr)) ___val = (x); \ if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(___val), sizeof(*__gu_ptr))) \ goto label; \ break; \ From ad5348c765914766a98ad26cf7a8c28d51a16bdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 12 Aug 2025 11:02:55 +0200 Subject: [PATCH 1165/1307] riscv, bpf: use lw when reading int cpu in BPF_MOV64_PERCPU_REG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit emit_ld is wrong, because thread_info.cpu is 32-bit, not xlen-bit wide. The struct currently has a hole after cpu, so little endian accesses seemed fine. Fixes: 19c56d4e5be1 ("riscv, bpf: add internal-only MOV instruction to resolve per-CPU addrs") Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář Reviewed-by: Pu Lehui Acked-by: Björn Töpel Tested-by: Björn Töpel # QEMU Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250812090256.757273-3-rkrcmar@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/net/bpf_jit_comp64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 10e01ff06312..6e1554d89681 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1356,7 +1356,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_mv(rd, rs, ctx); #ifdef CONFIG_SMP /* Load current CPU number in T1 */ - emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu), + emit_lw(RV_REG_T1, offsetof(struct thread_info, cpu), RV_REG_TP, ctx); /* Load address of __per_cpu_offset array in T2 */ emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); From 8a16586fa7b8a01360890d284896b90c217dca44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 12 Aug 2025 11:02:56 +0200 Subject: [PATCH 1166/1307] riscv, bpf: use lw when reading int cpu in bpf_get_smp_processor_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit emit_ld is wrong, because thread_info.cpu is 32-bit, not xlen-bit wide. The struct currently has a hole after cpu, so little endian accesses seemed fine. Fixes: 2ddec2c80b44 ("riscv, bpf: inline bpf_get_smp_processor_id()") Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář Reviewed-by: Pu Lehui Link: https://lore.kernel.org/r/20250812090256.757273-4-rkrcmar@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/net/bpf_jit_comp64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 6e1554d89681..9883a55d61b5 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1763,7 +1763,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, */ if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { /* Load current CPU number in R0 */ - emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), + emit_lw(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), RV_REG_TP, ctx); break; } From f4ea67a722e8c9e1fb8109adebb9fb881ff0793a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 25 Jul 2025 18:54:10 +0200 Subject: [PATCH 1167/1307] riscv: use lw when reading int cpu in asm_per_cpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit REG_L is wrong, because thread_info.cpu is 32-bit, not xlen-bit wide. The struct currently has a hole after cpu, so little endian accesses seemed fine. Fixes: be97d0db5f44 ("riscv: VMAP_STACK overflow detection thread-safe") Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Signed-off-by: Radim Krčmář Link: https://lore.kernel.org/r/20250725165410.2896641-5-rkrcmar@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/asm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index a8a2af6dfe9d..2a16e88e13de 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -91,7 +91,7 @@ #endif .macro asm_per_cpu dst sym tmp - REG_L \tmp, TASK_TI_CPU_NUM(tp) + lw \tmp, TASK_TI_CPU_NUM(tp) slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT la \dst, __per_cpu_offset add \dst, \dst, \tmp From 95c54cd9c769a198118772e196adfaa1f002e365 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 27 Aug 2025 03:42:22 -0700 Subject: [PATCH 1168/1307] riscv: kexec: Initialize kexec_buf struct The kexec_buf structure was previously declared without initialization. commit bf454ec31add ("kexec_file: allow to place kexec_buf randomly") added a field that is always read but not consistently populated by all architectures. This un-initialized field will contain garbage. This is also triggering a UBSAN warning when the uninitialized data was accessed: ------------[ cut here ]------------ UBSAN: invalid-load in ./include/linux/kexec.h:210:10 load of value 252 is not a valid value for type '_Bool' Zero-initializing kexec_buf at declaration ensures all fields are cleanly set, preventing future instances of uninitialized memory being used. Fixes: bf454ec31add ("kexec_file: allow to place kexec_buf randomly") Signed-off-by: Breno Leitao Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250827-kbuf_all-v1-2-1df9882bb01a@debian.org Signed-off-by: Paul Walmsley --- arch/riscv/kernel/kexec_elf.c | 4 ++-- arch/riscv/kernel/kexec_image.c | 2 +- arch/riscv/kernel/machine_kexec_file.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c index 56444c7bd34e..531d348db84d 100644 --- a/arch/riscv/kernel/kexec_elf.c +++ b/arch/riscv/kernel/kexec_elf.c @@ -28,7 +28,7 @@ static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, int i; int ret = 0; size_t size; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; const struct elf_phdr *phdr; kbuf.image = image; @@ -66,7 +66,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, { int i; int ret; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; const struct elf_phdr *phdr; unsigned long lowest_paddr = ULONG_MAX; unsigned long lowest_vaddr = ULONG_MAX; diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c index 26a81774a78a..8f2eb900910b 100644 --- a/arch/riscv/kernel/kexec_image.c +++ b/arch/riscv/kernel/kexec_image.c @@ -41,7 +41,7 @@ static void *image_load(struct kimage *image, struct riscv_image_header *h; u64 flags; bool be_image, be_kernel; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; int ret; /* Check Image header */ diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index e36104af2e24..b9eb41b0a975 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -261,7 +261,7 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, int ret; void *fdt; unsigned long initrd_pbase = 0UL; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; char *modified_cmdline = NULL; kbuf.image = image; From fef7ded169ed7e133612f90a032dc2af1ce19bef Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 3 Sep 2025 18:53:08 +0000 Subject: [PATCH 1169/1307] riscv: Fix sparse warning in __get_user_error() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to assign 0 to x without an appropriate cast which results in sparse complaining when x is a pointer: >> block/ioctl.c:72:39: sparse: sparse: Using plain integer as NULL pointer So fix this by casting 0 to the correct type of x. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508062321.gHv4kvuY-lkp@intel.com/ Fixes: f6bff7827a48 ("riscv: uaccess: use 'asm_goto_output' for get_user()") Cc: stable@vger.kernel.org Signed-off-by: Alexandre Ghiti Reviewed-by: Clément Léger Reviewed-by: Cyril Bur Link: https://lore.kernel.org/r/20250903-dev-alex-sparse_warnings_v1-v1-1-7e6350beb700@rivosinc.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 22e3f52a763d..551e7490737e 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -209,7 +209,7 @@ do { \ err = 0; \ break; \ __gu_failed: \ - x = 0; \ + x = (__typeof__(x))0; \ err = -EFAULT; \ } while (0) From a03ee11b8f850bd008226c6d392da24163dfb56e Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 3 Sep 2025 18:53:09 +0000 Subject: [PATCH 1170/1307] riscv: Fix sparse warning about different address spaces We did not propagate the __user attribute of the pointers in __get_kernel_nofault() and __put_kernel_nofault(), which results in sparse complaining: >> mm/maccess.c:41:17: sparse: sparse: incorrect type in argument 2 (different address spaces) @@ expected void const [noderef] __user *from @@ got unsigned long long [usertype] * @@ mm/maccess.c:41:17: sparse: expected void const [noderef] __user *from mm/maccess.c:41:17: sparse: got unsigned long long [usertype] * So fix this by correctly casting those pointers. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508161713.RWu30Lv1-lkp@intel.com/ Suggested-by: Al Viro Fixes: f6bff7827a48 ("riscv: uaccess: use 'asm_goto_output' for get_user()") Cc: stable@vger.kernel.org Signed-off-by: Alexandre Ghiti Reviewed-by: Cyril Bur Link: https://lore.kernel.org/r/20250903-dev-alex-sparse_warnings_v1-v1-2-7e6350beb700@rivosinc.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 551e7490737e..f5f4f7f85543 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -438,10 +438,10 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) } #define __get_kernel_nofault(dst, src, type, err_label) \ - __get_user_nocheck(*((type *)(dst)), (type *)(src), err_label) + __get_user_nocheck(*((type *)(dst)), (__force __user type *)(src), err_label) #define __put_kernel_nofault(dst, src, type, err_label) \ - __put_user_nocheck(*((type *)(src)), (type *)(dst), err_label) + __put_user_nocheck(*((type *)(src)), (__force __user type *)(dst), err_label) static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) { From 80d03a40837a9b26750a25122b906c052cc846c9 Mon Sep 17 00:00:00 2001 From: Vladimir Riabchun Date: Tue, 26 Aug 2025 18:16:46 +0200 Subject: [PATCH 1171/1307] ftrace/samples: Fix function size computation In my_tramp1 function .size directive was placed above ASM_RET instruction, leading to a wrong function size. Link: https://lore.kernel.org/aK3d7vxNcO52kEmg@vova-pc Fixes: 9d907f1ae80b ("samples/ftrace: Fix asm function ELF annotations") Signed-off-by: Vladimir Riabchun Signed-off-by: Steven Rostedt (Google) --- samples/ftrace/ftrace-direct-modify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index cfea7a38befb..da3a9f2091f5 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -75,8 +75,8 @@ asm ( CALL_DEPTH_ACCOUNT " call my_direct_func1\n" " leave\n" -" .size my_tramp1, .-my_tramp1\n" ASM_RET +" .size my_tramp1, .-my_tramp1\n" " .type my_tramp2, @function\n" " .globl my_tramp2\n" From 03e79de4608bdd48ad6eec272e196124cefaf798 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 4 Sep 2025 11:13:34 +0200 Subject: [PATCH 1172/1307] net: fec: Fix possible NPD in fec_enet_phy_reset_after_clk_enable() The function of_phy_find_device may return NULL, so we need to take care before dereferencing phy_dev. Fixes: 64a632da538a ("net: fec: Fix phy_device lookup for phy_reset_after_clk_enable()") Signed-off-by: Stefan Wahren Cc: Christoph Niedermaier Cc: Richard Leitner Reviewed-by: Simon Horman Reviewed-by: Wei Fang Link: https://patch.msgid.link/20250904091334.53965-1-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1383918f8a3f..adf1f2bbcbb1 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2363,7 +2363,8 @@ static void fec_enet_phy_reset_after_clk_enable(struct net_device *ndev) */ phy_dev = of_phy_find_device(fep->phy_node); phy_reset_after_clk_enable(phy_dev); - put_device(&phy_dev->mdio.dev); + if (phy_dev) + put_device(&phy_dev->mdio.dev); } } From 0ba5b2f2c381dbec9ed9e4ab3ae5d3e667de0dc3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Sep 2025 15:52:37 +0300 Subject: [PATCH 1173/1307] net: phylink: add lock for serializing concurrent pl->phydev writes with resolver Currently phylink_resolve() protects itself against concurrent phylink_bringup_phy() or phylink_disconnect_phy() calls which modify pl->phydev by relying on pl->state_mutex. The problem is that in phylink_resolve(), pl->state_mutex is in a lock inversion state with pl->phydev->lock. So pl->phydev->lock needs to be acquired prior to pl->state_mutex. But that requires dereferencing pl->phydev in the first place, and without pl->state_mutex, that is racy. Hence the reason for the extra lock. Currently it is redundant, but it will serve a functional purpose once mutex_lock(&phy->lock) will be moved outside of the mutex_lock(&pl->state_mutex) section. Another alternative considered would have been to let phylink_resolve() acquire the rtnl_mutex, which is also held when phylink_bringup_phy() and phylink_disconnect_phy() are called. But since phylink_disconnect_phy() runs under rtnl_lock(), it would deadlock with phylink_resolve() when calling flush_work(&pl->resolve). Additionally, it would have been undesirable because it would have unnecessarily blocked many other call paths as well in the entire kernel, so the smaller-scoped lock was preferred. Link: https://lore.kernel.org/netdev/aLb6puGVzR29GpPx@shell.armlinux.org.uk/ Signed-off-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250904125238.193990-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index c7cb95aa8007..aa17ad2622fc 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -67,6 +67,8 @@ struct phylink { struct timer_list link_poll; struct mutex state_mutex; + /* Serialize updates to pl->phydev with phylink_resolve() */ + struct mutex phydev_mutex; struct phylink_link_state phy_state; unsigned int phy_ib_mode; struct work_struct resolve; @@ -1591,8 +1593,11 @@ static void phylink_resolve(struct work_struct *w) struct phylink_link_state link_state; bool mac_config = false; bool retrigger = false; + struct phy_device *phy; bool cur_link_state; + mutex_lock(&pl->phydev_mutex); + phy = pl->phydev; mutex_lock(&pl->state_mutex); cur_link_state = phylink_link_is_up(pl); @@ -1626,11 +1631,11 @@ static void phylink_resolve(struct work_struct *w) /* If we have a phy, the "up" state is the union of both the * PHY and the MAC */ - if (pl->phydev) + if (phy) link_state.link &= pl->phy_state.link; /* Only update if the PHY link is up */ - if (pl->phydev && pl->phy_state.link) { + if (phy && pl->phy_state.link) { /* If the interface has changed, force a link down * event if the link isn't already down, and re-resolve. */ @@ -1694,6 +1699,7 @@ static void phylink_resolve(struct work_struct *w) queue_work(system_power_efficient_wq, &pl->resolve); } mutex_unlock(&pl->state_mutex); + mutex_unlock(&pl->phydev_mutex); } static void phylink_run_resolve(struct phylink *pl) @@ -1829,6 +1835,7 @@ struct phylink *phylink_create(struct phylink_config *config, if (!pl) return ERR_PTR(-ENOMEM); + mutex_init(&pl->phydev_mutex); mutex_init(&pl->state_mutex); INIT_WORK(&pl->resolve, phylink_resolve); @@ -2089,6 +2096,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, dev_name(&phy->mdio.dev), phy->drv->name, irq_str); kfree(irq_str); + mutex_lock(&pl->phydev_mutex); mutex_lock(&phy->lock); mutex_lock(&pl->state_mutex); pl->phydev = phy; @@ -2134,6 +2142,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, mutex_unlock(&pl->state_mutex); mutex_unlock(&phy->lock); + mutex_unlock(&pl->phydev_mutex); phylink_dbg(pl, "phy: %s setting supported %*pb advertising %*pb\n", @@ -2312,6 +2321,7 @@ void phylink_disconnect_phy(struct phylink *pl) ASSERT_RTNL(); + mutex_lock(&pl->phydev_mutex); phy = pl->phydev; if (phy) { mutex_lock(&phy->lock); @@ -2321,8 +2331,11 @@ void phylink_disconnect_phy(struct phylink *pl) pl->mac_tx_clk_stop = false; mutex_unlock(&pl->state_mutex); mutex_unlock(&phy->lock); - flush_work(&pl->resolve); + } + mutex_unlock(&pl->phydev_mutex); + if (phy) { + flush_work(&pl->resolve); phy_disconnect(phy); } } From e2a10daba84968f6b5777d150985fd7d6abc9c84 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Sep 2025 15:52:38 +0300 Subject: [PATCH 1174/1307] net: phy: transfer phy_config_inband() locking responsibility to phylink Problem description =================== Lockdep reports a possible circular locking dependency (AB/BA) between &pl->state_mutex and &phy->lock, as follows. phylink_resolve() // acquires &pl->state_mutex -> phylink_major_config() -> phy_config_inband() // acquires &pl->phydev->lock whereas all the other call sites where &pl->state_mutex and &pl->phydev->lock have the locking scheme reversed. Everywhere else, &pl->phydev->lock is acquired at the top level, and &pl->state_mutex at the lower level. A clear example is phylink_bringup_phy(). The outlier is the newly introduced phy_config_inband() and the existing lock order is the correct one. To understand why it cannot be the other way around, it is sufficient to consider phylink_phy_change(), phylink's callback from the PHY device's phy->phy_link_change() virtual method, invoked by the PHY state machine. phy_link_up() and phy_link_down(), the (indirect) callers of phylink_phy_change(), are called with &phydev->lock acquired. Then phylink_phy_change() acquires its own &pl->state_mutex, to serialize changes made to its pl->phy_state and pl->link_config. So all other instances of &pl->state_mutex and &phydev->lock must be consistent with this order. Problem impact ============== I think the kernel runs a serious deadlock risk if an existing phylink_resolve() thread, which results in a phy_config_inband() call, is concurrent with a phy_link_up() or phy_link_down() call, which will deadlock on &pl->state_mutex in phylink_phy_change(). Practically speaking, the impact may be limited by the slow speed of the medium auto-negotiation protocol, which makes it unlikely for the current state to still be unresolved when a new one is detected, but I think the problem is there. Nonetheless, the problem was discovered using lockdep. Proposed solution ================= Practically speaking, the phy_config_inband() requirement of having phydev->lock acquired must transfer to the caller (phylink is the only caller). There, it must bubble up until immediately before &pl->state_mutex is acquired, for the cases where that takes place. Solution details, considerations, notes ======================================= This is the phy_config_inband() call graph: sfp_upstream_ops :: connect_phy() | v phylink_sfp_connect_phy() | v phylink_sfp_config_phy() | | sfp_upstream_ops :: module_insert() | | | v | phylink_sfp_module_insert() | | | | sfp_upstream_ops :: module_start() | | | | | v | | phylink_sfp_module_start() | | | | v v | phylink_sfp_config_optical() phylink_start() | | | phylink_resume() v v | | phylink_sfp_set_config() | | | v v v phylink_mac_initial_config() | phylink_resolve() | | phylink_ethtool_ksettings_set() v v v phylink_major_config() | v phy_config_inband() phylink_major_config() caller #1, phylink_mac_initial_config(), does not acquire &pl->state_mutex nor do its callers. It must acquire &pl->phydev->lock prior to calling phylink_major_config(). phylink_major_config() caller #2, phylink_resolve() acquires &pl->state_mutex, thus also needs to acquire &pl->phydev->lock. phylink_major_config() caller #3, phylink_ethtool_ksettings_set(), is completely uninteresting, because it only calls phylink_major_config() if pl->phydev is NULL (otherwise it calls phy_ethtool_ksettings_set()). We need to change nothing there. Other solutions =============== The lock inversion between &pl->state_mutex and &pl->phydev->lock has occurred at least once before, as seen in commit c718af2d00a3 ("net: phylink: fix ethtool -A with attached PHYs"). The solution there was to simply not call phy_set_asym_pause() under the &pl->state_mutex. That cannot be extended to our case though, where the phy_config_inband() call is much deeper inside the &pl->state_mutex section. Fixes: 5fd0f1a02e75 ("net: phylink: add negotiation of in-band capabilities") Signed-off-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250904125238.193990-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 12 ++++-------- drivers/net/phy/phylink.c | 9 +++++++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 13df28445f02..c02da57a4da5 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1065,23 +1065,19 @@ EXPORT_SYMBOL_GPL(phy_inband_caps); */ int phy_config_inband(struct phy_device *phydev, unsigned int modes) { - int err; + lockdep_assert_held(&phydev->lock); if (!!(modes & LINK_INBAND_DISABLE) + !!(modes & LINK_INBAND_ENABLE) + !!(modes & LINK_INBAND_BYPASS) != 1) return -EINVAL; - mutex_lock(&phydev->lock); if (!phydev->drv) - err = -EIO; + return -EIO; else if (!phydev->drv->config_inband) - err = -EOPNOTSUPP; - else - err = phydev->drv->config_inband(phydev, modes); - mutex_unlock(&phydev->lock); + return -EOPNOTSUPP; - return err; + return phydev->drv->config_inband(phydev, modes); } EXPORT_SYMBOL(phy_config_inband); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index aa17ad2622fc..1988b7d2089a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1434,6 +1434,7 @@ static void phylink_get_fixed_state(struct phylink *pl, static void phylink_mac_initial_config(struct phylink *pl, bool force_restart) { struct phylink_link_state link_state; + struct phy_device *phy = pl->phydev; switch (pl->req_link_an_mode) { case MLO_AN_PHY: @@ -1457,7 +1458,11 @@ static void phylink_mac_initial_config(struct phylink *pl, bool force_restart) link_state.link = false; phylink_apply_manual_flow(pl, &link_state); + if (phy) + mutex_lock(&phy->lock); phylink_major_config(pl, force_restart, &link_state); + if (phy) + mutex_unlock(&phy->lock); } static const char *phylink_pause_to_str(int pause) @@ -1598,6 +1603,8 @@ static void phylink_resolve(struct work_struct *w) mutex_lock(&pl->phydev_mutex); phy = pl->phydev; + if (phy) + mutex_lock(&phy->lock); mutex_lock(&pl->state_mutex); cur_link_state = phylink_link_is_up(pl); @@ -1699,6 +1706,8 @@ static void phylink_resolve(struct work_struct *w) queue_work(system_power_efficient_wq, &pl->resolve); } mutex_unlock(&pl->state_mutex); + if (phy) + mutex_unlock(&phy->lock); mutex_unlock(&pl->phydev_mutex); } From 220a0ffde02f962c13bc752b01aa570b8c65a37b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 2 Sep 2025 13:53:04 +0300 Subject: [PATCH 1175/1307] xhci: dbc: decouple endpoint allocation from initialization Decouple allocation of endpoint ring buffer from initialization of the buffer, and initialization of endpoint context parts from from the rest of the contexts. It allows driver to clear up and reinitialize endpoint rings after disconnect without reallocating everything. This is a prerequisite for the next patch that prevents the transfer ring from filling up with cancelled (no-op) TRBs if a debug cable is reconnected several times without transferring anything. Cc: stable@vger.kernel.org Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250902105306.877476-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 71 ++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 06a2edb9e86e..d0faff233e3e 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -101,13 +101,34 @@ static u32 xhci_dbc_populate_strings(struct dbc_str_descs *strings) return string_length; } +static void xhci_dbc_init_ep_contexts(struct xhci_dbc *dbc) +{ + struct xhci_ep_ctx *ep_ctx; + unsigned int max_burst; + dma_addr_t deq; + + max_burst = DBC_CTRL_MAXBURST(readl(&dbc->regs->control)); + + /* Populate bulk out endpoint context: */ + ep_ctx = dbc_bulkout_ctx(dbc); + deq = dbc_bulkout_enq(dbc); + ep_ctx->ep_info = 0; + ep_ctx->ep_info2 = dbc_epctx_info2(BULK_OUT_EP, 1024, max_burst); + ep_ctx->deq = cpu_to_le64(deq | dbc->ring_out->cycle_state); + + /* Populate bulk in endpoint context: */ + ep_ctx = dbc_bulkin_ctx(dbc); + deq = dbc_bulkin_enq(dbc); + ep_ctx->ep_info = 0; + ep_ctx->ep_info2 = dbc_epctx_info2(BULK_IN_EP, 1024, max_burst); + ep_ctx->deq = cpu_to_le64(deq | dbc->ring_in->cycle_state); +} + static void xhci_dbc_init_contexts(struct xhci_dbc *dbc, u32 string_length) { struct dbc_info_context *info; - struct xhci_ep_ctx *ep_ctx; u32 dev_info; - dma_addr_t deq, dma; - unsigned int max_burst; + dma_addr_t dma; if (!dbc) return; @@ -121,20 +142,8 @@ static void xhci_dbc_init_contexts(struct xhci_dbc *dbc, u32 string_length) info->serial = cpu_to_le64(dma + DBC_MAX_STRING_LENGTH * 3); info->length = cpu_to_le32(string_length); - /* Populate bulk out endpoint context: */ - ep_ctx = dbc_bulkout_ctx(dbc); - max_burst = DBC_CTRL_MAXBURST(readl(&dbc->regs->control)); - deq = dbc_bulkout_enq(dbc); - ep_ctx->ep_info = 0; - ep_ctx->ep_info2 = dbc_epctx_info2(BULK_OUT_EP, 1024, max_burst); - ep_ctx->deq = cpu_to_le64(deq | dbc->ring_out->cycle_state); - - /* Populate bulk in endpoint context: */ - ep_ctx = dbc_bulkin_ctx(dbc); - deq = dbc_bulkin_enq(dbc); - ep_ctx->ep_info = 0; - ep_ctx->ep_info2 = dbc_epctx_info2(BULK_IN_EP, 1024, max_burst); - ep_ctx->deq = cpu_to_le64(deq | dbc->ring_in->cycle_state); + /* Populate bulk in and out endpoint contexts: */ + xhci_dbc_init_ep_contexts(dbc); /* Set DbC context and info registers: */ lo_hi_writeq(dbc->ctx->dma, &dbc->regs->dccp); @@ -436,6 +445,23 @@ dbc_alloc_ctx(struct device *dev, gfp_t flags) return ctx; } +static void xhci_dbc_ring_init(struct xhci_ring *ring) +{ + struct xhci_segment *seg = ring->first_seg; + + /* clear all trbs on ring in case of old ring */ + memset(seg->trbs, 0, TRB_SEGMENT_SIZE); + + /* Only event ring does not use link TRB */ + if (ring->type != TYPE_EVENT) { + union xhci_trb *trb = &seg->trbs[TRBS_PER_SEGMENT - 1]; + + trb->link.segment_ptr = cpu_to_le64(ring->first_seg->dma); + trb->link.control = cpu_to_le32(LINK_TOGGLE | TRB_TYPE(TRB_LINK)); + } + xhci_initialize_ring_info(ring); +} + static struct xhci_ring * xhci_dbc_ring_alloc(struct device *dev, enum xhci_ring_type type, gfp_t flags) { @@ -464,15 +490,10 @@ xhci_dbc_ring_alloc(struct device *dev, enum xhci_ring_type type, gfp_t flags) seg->dma = dma; - /* Only event ring does not use link TRB */ - if (type != TYPE_EVENT) { - union xhci_trb *trb = &seg->trbs[TRBS_PER_SEGMENT - 1]; - - trb->link.segment_ptr = cpu_to_le64(dma); - trb->link.control = cpu_to_le32(LINK_TOGGLE | TRB_TYPE(TRB_LINK)); - } INIT_LIST_HEAD(&ring->td_list); - xhci_initialize_ring_info(ring); + + xhci_dbc_ring_init(ring); + return ring; dma_fail: kfree(seg); From a5c98e8b1398534ae1feb6e95e2d3ee5215538ed Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 2 Sep 2025 13:53:05 +0300 Subject: [PATCH 1176/1307] xhci: dbc: Fix full DbC transfer ring after several reconnects Pending requests will be flushed on disconnect, and the corresponding TRBs will be turned into No-op TRBs, which are ignored by the xHC controller once it starts processing the ring. If the USB debug cable repeatedly disconnects before ring is started then the ring will eventually be filled with No-op TRBs. No new transfers can be queued when the ring is full, and driver will print the following error message: "xhci_hcd 0000:00:14.0: failed to queue trbs" This is a normal case for 'in' transfers where TRBs are always enqueued in advance, ready to take on incoming data. If no data arrives, and device is disconnected, then ring dequeue will remain at beginning of the ring while enqueue points to first free TRB after last cancelled No-op TRB. s Solve this by reinitializing the rings when the debug cable disconnects and DbC is leaving the configured state. Clear the whole ring buffer and set enqueue and dequeue to the beginning of ring, and set cycle bit to its initial state. Cc: stable@vger.kernel.org Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250902105306.877476-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index d0faff233e3e..63edf2d8f245 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -462,6 +462,25 @@ static void xhci_dbc_ring_init(struct xhci_ring *ring) xhci_initialize_ring_info(ring); } +static int xhci_dbc_reinit_ep_rings(struct xhci_dbc *dbc) +{ + struct xhci_ring *in_ring = dbc->eps[BULK_IN].ring; + struct xhci_ring *out_ring = dbc->eps[BULK_OUT].ring; + + if (!in_ring || !out_ring || !dbc->ctx) { + dev_warn(dbc->dev, "Can't re-init unallocated endpoints\n"); + return -ENODEV; + } + + xhci_dbc_ring_init(in_ring); + xhci_dbc_ring_init(out_ring); + + /* set ep context enqueue, dequeue, and cycle to initial values */ + xhci_dbc_init_ep_contexts(dbc); + + return 0; +} + static struct xhci_ring * xhci_dbc_ring_alloc(struct device *dev, enum xhci_ring_type type, gfp_t flags) { @@ -885,7 +904,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) dev_info(dbc->dev, "DbC cable unplugged\n"); dbc->state = DS_ENABLED; xhci_dbc_flush_requests(dbc); - + xhci_dbc_reinit_ep_rings(dbc); return EVT_DISC; } @@ -895,7 +914,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) writel(portsc, &dbc->regs->portsc); dbc->state = DS_ENABLED; xhci_dbc_flush_requests(dbc); - + xhci_dbc_reinit_ep_rings(dbc); return EVT_DISC; } From edcbe06453ddfde21f6aa763f7cab655f26133cc Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 2 Sep 2025 13:53:06 +0300 Subject: [PATCH 1177/1307] xhci: fix memory leak regression when freeing xhci vdev devices depth first Suspend-resume cycle test revealed a memory leak in 6.17-rc3 Turns out the slot_id race fix changes accidentally ends up calling xhci_free_virt_device() with an incorrect vdev parameter. The vdev variable was reused for temporary purposes right before calling xhci_free_virt_device(). Fix this by passing the correct vdev parameter. The slot_id race fix that caused this regression was targeted for stable, so this needs to be applied there as well. Fixes: 2eb03376151b ("usb: xhci: Fix slot_id resource race conflict") Reported-by: David Wang <00107082@163.com> Closes: https://lore.kernel.org/linux-usb/20250829181354.4450-1-00107082@163.com Suggested-by: Michal Pecio Suggested-by: David Wang <00107082@163.com> Cc: stable@vger.kernel.org Tested-by: David Wang <00107082@163.com> Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250902105306.877476-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 81eaad87a3d9..c4a6544aa107 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -962,7 +962,7 @@ static void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_i out: /* we are now at a leaf device */ xhci_debugfs_remove_slot(xhci, slot_id); - xhci_free_virt_device(xhci, vdev, slot_id); + xhci_free_virt_device(xhci, xhci->devs[slot_id], slot_id); } int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, From 8d63c83d8eb922f6c316320f50c82fa88d099bea Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 25 Aug 2025 12:00:22 -0400 Subject: [PATCH 1178/1307] USB: gadget: dummy-hcd: Fix locking bug in RT-enabled kernels Yunseong Kim and the syzbot fuzzer both reported a problem in RT-enabled kernels caused by the way dummy-hcd mixes interrupt management and spin-locking. The pattern was: local_irq_save(flags); spin_lock(&dum->lock); ... spin_unlock(&dum->lock); ... // calls usb_gadget_giveback_request() local_irq_restore(flags); The code was written this way because usb_gadget_giveback_request() needs to be called with interrupts disabled and the private lock not held. While this pattern works fine in non-RT kernels, it's not good when RT is enabled. RT kernels handle spinlocks much like mutexes; in particular, spin_lock() may sleep. But sleeping is not allowed while local interrupts are disabled. To fix the problem, rewrite the code to conform to the pattern used elsewhere in dummy-hcd and other UDC drivers: spin_lock_irqsave(&dum->lock, flags); ... spin_unlock(&dum->lock); usb_gadget_giveback_request(...); spin_lock(&dum->lock); ... spin_unlock_irqrestore(&dum->lock, flags); This approach satisfies the RT requirements. Signed-off-by: Alan Stern Cc: stable Fixes: b4dbda1a22d2 ("USB: dummy-hcd: disable interrupts during req->complete") Reported-by: Yunseong Kim Closes: Reported-by: syzbot+8baacc4139f12fa77909@syzkaller.appspotmail.com Closes: Tested-by: syzbot+8baacc4139f12fa77909@syzkaller.appspotmail.com CC: Sebastian Andrzej Siewior CC: stable@vger.kernel.org Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/bb192ae2-4eee-48ee-981f-3efdbbd0d8f0@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 21dbfb0b3bac..1cefca660773 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -765,8 +765,7 @@ static int dummy_dequeue(struct usb_ep *_ep, struct usb_request *_req) if (!dum->driver) return -ESHUTDOWN; - local_irq_save(flags); - spin_lock(&dum->lock); + spin_lock_irqsave(&dum->lock, flags); list_for_each_entry(iter, &ep->queue, queue) { if (&iter->req != _req) continue; @@ -776,15 +775,16 @@ static int dummy_dequeue(struct usb_ep *_ep, struct usb_request *_req) retval = 0; break; } - spin_unlock(&dum->lock); if (retval == 0) { dev_dbg(udc_dev(dum), "dequeued req %p from %s, len %d buf %p\n", req, _ep->name, _req->length, _req->buf); + spin_unlock(&dum->lock); usb_gadget_giveback_request(_ep, _req); + spin_lock(&dum->lock); } - local_irq_restore(flags); + spin_unlock_irqrestore(&dum->lock, flags); return retval; } From f34bfcc77b18375a87091c289c2eb53c249787b4 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Thu, 21 Aug 2025 20:37:57 +0000 Subject: [PATCH 1179/1307] usb: typec: tcpm: properly deliver cable vdms to altmode drivers tcpm_handle_vdm_request delivers messages to the partner altmode or the cable altmode depending on the SVDM response type, which is incorrect. The partner or cable should be chosen based on the received message type instead. Also add this filter to ADEV_NOTIFY_USB_AND_QUEUE_VDM, which is used when the Enter Mode command is responded to by a NAK on SOP or SOP' and when the Exit Mode command is responded to by an ACK on SOP. Fixes: 7e7877c55eb1 ("usb: typec: tcpm: add alt mode enter/exit/vdm support for sop'") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250821203759.1720841-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 1f6fdfaa34bf..b2a568a5bc9b 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2426,17 +2426,21 @@ static void tcpm_handle_vdm_request(struct tcpm_port *port, case ADEV_NONE: break; case ADEV_NOTIFY_USB_AND_QUEUE_VDM: - WARN_ON(typec_altmode_notify(adev, TYPEC_STATE_USB, NULL)); - typec_altmode_vdm(adev, p[0], &p[1], cnt); + if (rx_sop_type == TCPC_TX_SOP_PRIME) { + typec_cable_altmode_vdm(adev, TYPEC_PLUG_SOP_P, p[0], &p[1], cnt); + } else { + WARN_ON(typec_altmode_notify(adev, TYPEC_STATE_USB, NULL)); + typec_altmode_vdm(adev, p[0], &p[1], cnt); + } break; case ADEV_QUEUE_VDM: - if (response_tx_sop_type == TCPC_TX_SOP_PRIME) + if (rx_sop_type == TCPC_TX_SOP_PRIME) typec_cable_altmode_vdm(adev, TYPEC_PLUG_SOP_P, p[0], &p[1], cnt); else typec_altmode_vdm(adev, p[0], &p[1], cnt); break; case ADEV_QUEUE_VDM_SEND_EXIT_MODE_ON_FAIL: - if (response_tx_sop_type == TCPC_TX_SOP_PRIME) { + if (rx_sop_type == TCPC_TX_SOP_PRIME) { if (typec_cable_altmode_vdm(adev, TYPEC_PLUG_SOP_P, p[0], &p[1], cnt)) { int svdm_version = typec_get_cable_svdm_version( From 21d8525d2e061cde034277d518411b02eac764e2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Sep 2025 17:39:24 +0200 Subject: [PATCH 1180/1307] usb: gadget: midi2: Fix missing UMP group attributes initialization The gadget card driver forgot to call snd_ump_update_group_attrs() after adding FBs, and this leaves the UMP group attributes uninitialized. As a result, -ENODEV error is returned at opening a legacy rawmidi device as an inactive group. This patch adds the missing call to address the behavior above. Fixes: 8b645922b223 ("usb: gadget: Add support for USB MIDI 2.0 function driver") Cc: stable Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20250904153932.13589-1-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_midi2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index 0a800ba53816..b351f9ae0fc5 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -1599,6 +1599,7 @@ static int f_midi2_create_card(struct f_midi2 *midi2) strscpy(fb->info.name, ump_fb_name(b), sizeof(fb->info.name)); } + snd_ump_update_group_attrs(ump); } for (i = 0; i < midi2->num_eps; i++) { From 116e79c679a1530cf833d0ff3007061d7a716bd9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Sep 2025 15:32:34 +0200 Subject: [PATCH 1181/1307] usb: gadget: midi2: Fix MIDI2 IN EP max packet size The EP-IN of MIDI2 (altset 1) wasn't initialized in f_midi2_create_usb_configs() as it's an INT EP unlike others BULK EPs. But this leaves rather the max packet size unchanged no matter which speed is used, resulting in the very slow access. And the wMaxPacketSize values set there look legit for INT EPs, so let's initialize the MIDI2 EP-IN there for achieving the equivalent speed as well. Fixes: 8b645922b223 ("usb: gadget: Add support for USB MIDI 2.0 function driver") Cc: stable Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20250905133240.20966-1-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_midi2.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index b351f9ae0fc5..de16b02d857e 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -1737,9 +1737,12 @@ static int f_midi2_create_usb_configs(struct f_midi2 *midi2, case USB_SPEED_HIGH: midi2_midi1_ep_out_desc.wMaxPacketSize = cpu_to_le16(512); midi2_midi1_ep_in_desc.wMaxPacketSize = cpu_to_le16(512); - for (i = 0; i < midi2->num_eps; i++) + for (i = 0; i < midi2->num_eps; i++) { midi2_midi2_ep_out_desc[i].wMaxPacketSize = cpu_to_le16(512); + midi2_midi2_ep_in_desc[i].wMaxPacketSize = + cpu_to_le16(512); + } fallthrough; case USB_SPEED_FULL: midi1_in_eps = midi2_midi1_ep_in_descs; @@ -1748,9 +1751,12 @@ static int f_midi2_create_usb_configs(struct f_midi2 *midi2, case USB_SPEED_SUPER: midi2_midi1_ep_out_desc.wMaxPacketSize = cpu_to_le16(1024); midi2_midi1_ep_in_desc.wMaxPacketSize = cpu_to_le16(1024); - for (i = 0; i < midi2->num_eps; i++) + for (i = 0; i < midi2->num_eps; i++) { midi2_midi2_ep_out_desc[i].wMaxPacketSize = cpu_to_le16(1024); + midi2_midi2_ep_in_desc[i].wMaxPacketSize = + cpu_to_le16(1024); + } midi1_in_eps = midi2_midi1_ep_in_ss_descs; midi1_out_eps = midi2_midi1_ep_out_ss_descs; break; From b5e3277c0f1c3439dd02b58997c06201d0ee8dbf Mon Sep 17 00:00:00 2001 From: Harshit Shah Date: Tue, 2 Sep 2025 12:16:29 -0700 Subject: [PATCH 1182/1307] serial: xilinx_uartps: read reg size from DTS Current implementation uses `CDNS_UART_REGISTER_SPACE(0x1000)` for request_mem_region() and ioremap() in cdns_uart_request_port() API. The cadence/xilinx IP has register space defined from offset 0x0 to 0x48. It also mentions that the register map is defined as [6:0]. So, the upper region may/maynot be used based on the IP integration. In Axiado AX3000 SoC two UART instances are defined 0x100 apart. That is creating issue in some other instance due to overlap with addresses. Since, this address space is already being defined in the devicetree, use the same when requesting the register space. Fixes: 1f7055779001 ("arm64: dts: axiado: Add initial support for AX3000 SoC and eval board") Acked-by: Michal Simek Signed-off-by: Harshit Shah Link: https://lore.kernel.org/r/20250902-xilinx-uartps-reg-size-v3-1-d11cfa7258e3@axiado.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index fe457bf1e15b..a66b44d21fba 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -33,7 +33,6 @@ #define CDNS_UART_MINOR 0 /* works best with devtmpfs */ #define CDNS_UART_NR_PORTS 16 #define CDNS_UART_FIFO_SIZE 64 /* FIFO size */ -#define CDNS_UART_REGISTER_SPACE 0x1000 #define TX_TIMEOUT 500000 /* Rx Trigger level */ @@ -1098,15 +1097,15 @@ static int cdns_uart_verify_port(struct uart_port *port, */ static int cdns_uart_request_port(struct uart_port *port) { - if (!request_mem_region(port->mapbase, CDNS_UART_REGISTER_SPACE, + if (!request_mem_region(port->mapbase, port->mapsize, CDNS_UART_NAME)) { return -ENOMEM; } - port->membase = ioremap(port->mapbase, CDNS_UART_REGISTER_SPACE); + port->membase = ioremap(port->mapbase, port->mapsize); if (!port->membase) { dev_err(port->dev, "Unable to map registers\n"); - release_mem_region(port->mapbase, CDNS_UART_REGISTER_SPACE); + release_mem_region(port->mapbase, port->mapsize); return -ENOMEM; } return 0; @@ -1121,7 +1120,7 @@ static int cdns_uart_request_port(struct uart_port *port) */ static void cdns_uart_release_port(struct uart_port *port) { - release_mem_region(port->mapbase, CDNS_UART_REGISTER_SPACE); + release_mem_region(port->mapbase, port->mapsize); iounmap(port->membase); port->membase = NULL; } @@ -1780,6 +1779,7 @@ static int cdns_uart_probe(struct platform_device *pdev) * and triggers invocation of the config_port() entry point. */ port->mapbase = res->start; + port->mapsize = resource_size(res); port->irq = irq; port->dev = &pdev->dev; port->uartclk = clk_get_rate(cdns_uart_data->uartclk); From ab1396af7595e7d49a3850481b24d7fe7cbdfd31 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 5 Sep 2025 22:06:18 -0700 Subject: [PATCH 1183/1307] trace/fgraph: Fix error handling Commit edede7a6dcd7 ("trace/fgraph: Fix the warning caused by missing unregister notifier") added a call to unregister the PM notifier if register_ftrace_graph() failed. It does so unconditionally. However, the PM notifier is only registered with the first call to register_ftrace_graph(). If the first registration was successful and a subsequent registration failed, the notifier is now unregistered even if ftrace graphs are still registered. Fix the problem by only unregistering the PM notifier during error handling if there are no active fgraph registrations. Fixes: edede7a6dcd7 ("trace/fgraph: Fix the warning caused by missing unregister notifier") Closes: https://lore.kernel.org/all/63b0ba5a-a928-438e-84f9-93028dd72e54@roeck-us.net/ Cc: Ye Weihua Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250906050618.2634078-1-linux@roeck-us.net Signed-off-by: Guenter Roeck Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 2a42c1036ea8..1e3b32b1e82c 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1397,7 +1397,8 @@ int register_ftrace_graph(struct fgraph_ops *gops) ftrace_graph_active--; gops->saved_func = NULL; fgraph_lru_release_index(i); - unregister_pm_notifier(&ftrace_suspend_notifier); + if (!ftrace_graph_active) + unregister_pm_notifier(&ftrace_suspend_notifier); } return ret; } From c1628c00c4351dd0727ef7f670694f68d9e663d8 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Sat, 6 Sep 2025 11:56:10 +0800 Subject: [PATCH 1184/1307] tracing/osnoise: Fix null-ptr-deref in bitmap_parselist() A crash was observed with the following output: BUG: kernel NULL pointer dereference, address: 0000000000000010 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 2 UID: 0 PID: 92 Comm: osnoise_cpus Not tainted 6.17.0-rc4-00201-gd69eb204c255 #138 PREEMPT(voluntary) RIP: 0010:bitmap_parselist+0x53/0x3e0 Call Trace: osnoise_cpus_write+0x7a/0x190 vfs_write+0xf8/0x410 ? do_sys_openat2+0x88/0xd0 ksys_write+0x60/0xd0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f This issue can be reproduced by below code: fd=open("/sys/kernel/debug/tracing/osnoise/cpus", O_WRONLY); write(fd, "0-2", 0); When user pass 'count=0' to osnoise_cpus_write(), kmalloc() will return ZERO_SIZE_PTR (16) and cpulist_parse() treat it as a normal value, which trigger the null pointer dereference. Add check for the parameter 'count'. Cc: Cc: Cc: Link: https://lore.kernel.org/20250906035610.3880282-1-wangliang74@huawei.com Fixes: 17f89102fe23 ("tracing/osnoise: Allow arbitrarily long CPU string") Signed-off-by: Wang Liang Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index fd259da0aa64..337bc0eb5d71 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -2322,6 +2322,9 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, int running, err; char *buf __free(kfree) = NULL; + if (count < 1) + return 0; + buf = kmalloc(count, GFP_KERNEL); if (!buf) return -ENOMEM; From 5f9efb6b7667043527d377421af2070cc0aa2ecd Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Fri, 5 Sep 2025 13:51:58 +0200 Subject: [PATCH 1185/1307] Input: mtk-pmic-keys - MT6359 has a specific release irq Support for MT6359 PMIC keys has been added recently. However, the key release event is not properly handled: only key press events are generated, leaving key states stuck in "pressed". This patch ensures that both key press and key release events are properly emitted by handling the release logic correctly. Introduce a 'key_release_irq' member to the 'mtk_pmic_regs' to identify the devices that have a separate irq for the release event. Fixes: bc25e6bf032e ("Input: mtk-pmic-keys - add support for MT6359 PMIC keys") Signed-off-by: Julien Massot Link: https://lore.kernel.org/r/20250905-radxa-nio-12-l-gpio-v3-1-40f11377fb55@collabora.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/mtk-pmic-keys.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c index 50e2e792c91d..c78d9f6d97c4 100644 --- a/drivers/input/keyboard/mtk-pmic-keys.c +++ b/drivers/input/keyboard/mtk-pmic-keys.c @@ -55,6 +55,7 @@ struct mtk_pmic_regs { const struct mtk_pmic_keys_regs keys_regs[MTK_PMIC_MAX_KEY_COUNT]; u32 pmic_rst_reg; u32 rst_lprst_mask; /* Long-press reset timeout bitmask */ + bool key_release_irq; }; static const struct mtk_pmic_regs mt6397_regs = { @@ -116,6 +117,7 @@ static const struct mtk_pmic_regs mt6358_regs = { MTK_PMIC_HOMEKEY_RST), .pmic_rst_reg = MT6358_TOP_RST_MISC, .rst_lprst_mask = MTK_PMIC_RST_DU_MASK, + .key_release_irq = true, }; static const struct mtk_pmic_regs mt6359_regs = { @@ -129,6 +131,7 @@ static const struct mtk_pmic_regs mt6359_regs = { MTK_PMIC_HOMEKEY_RST), .pmic_rst_reg = MT6359_TOP_RST_MISC, .rst_lprst_mask = MTK_PMIC_RST_DU_MASK, + .key_release_irq = true, }; struct mtk_pmic_keys_info { @@ -368,7 +371,7 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev) if (keys->keys[index].irq < 0) return keys->keys[index].irq; - if (of_device_is_compatible(node, "mediatek,mt6358-keys")) { + if (mtk_pmic_regs->key_release_irq) { keys->keys[index].irq_r = platform_get_irq_byname(pdev, irqnames_r[index]); From 3c9ba2777d6c86025e1ba4186dc5cd930e40ec5f Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Fri, 22 Aug 2025 07:07:14 +0000 Subject: [PATCH 1186/1307] kernfs: Fix UAF in polling when open file is released A use-after-free (UAF) vulnerability was identified in the PSI (Pressure Stall Information) monitoring mechanism: BUG: KASAN: slab-use-after-free in psi_trigger_poll+0x3c/0x140 Read of size 8 at addr ffff3de3d50bd308 by task systemd/1 psi_trigger_poll+0x3c/0x140 cgroup_pressure_poll+0x70/0xa0 cgroup_file_poll+0x8c/0x100 kernfs_fop_poll+0x11c/0x1c0 ep_item_poll.isra.0+0x188/0x2c0 Allocated by task 1: cgroup_file_open+0x88/0x388 kernfs_fop_open+0x73c/0xaf0 do_dentry_open+0x5fc/0x1200 vfs_open+0xa0/0x3f0 do_open+0x7e8/0xd08 path_openat+0x2fc/0x6b0 do_filp_open+0x174/0x368 Freed by task 8462: cgroup_file_release+0x130/0x1f8 kernfs_drain_open_files+0x17c/0x440 kernfs_drain+0x2dc/0x360 kernfs_show+0x1b8/0x288 cgroup_file_show+0x150/0x268 cgroup_pressure_write+0x1dc/0x340 cgroup_file_write+0x274/0x548 Reproduction Steps: 1. Open test/cpu.pressure and establish epoll monitoring 2. Disable monitoring: echo 0 > test/cgroup.pressure 3. Re-enable monitoring: echo 1 > test/cgroup.pressure The race condition occurs because: 1. When cgroup.pressure is disabled (echo 0 > cgroup.pressure), it: - Releases PSI triggers via cgroup_file_release() - Frees of->priv through kernfs_drain_open_files() 2. While epoll still holds reference to the file and continues polling 3. Re-enabling (echo 1 > cgroup.pressure) accesses freed of->priv epolling disable/enable cgroup.pressure fd=open(cpu.pressure) while(1) ... epoll_wait kernfs_fop_poll kernfs_get_active = true echo 0 > cgroup.pressure ... cgroup_file_show kernfs_show // inactive kn kernfs_drain_open_files cft->release(of); kfree(ctx); ... kernfs_get_active = false echo 1 > cgroup.pressure kernfs_show kernfs_activate_one(kn); kernfs_fop_poll kernfs_get_active = true cgroup_file_poll psi_trigger_poll // UAF ... end: close(fd) To address this issue, introduce kernfs_get_active_of() for kernfs open files to obtain active references. This function will fail if the open file has been released. Replace kernfs_get_active() with kernfs_get_active_of() to prevent further operations on released file descriptors. Fixes: 34f26a15611a ("sched/psi: Per-cgroup PSI accounting disable/re-enable interface") Cc: stable Reported-by: Zhang Zhaotian Signed-off-by: Chen Ridong Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250822070715.1565236-2-chenridong@huaweicloud.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 58 +++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index a6c692cac616..9adf36e6364b 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -70,6 +70,24 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of) !list_empty(&of->list)); } +/* Get active reference to kernfs node for an open file */ +static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of) +{ + /* Skip if file was already released */ + if (unlikely(of->released)) + return NULL; + + if (!kernfs_get_active(of->kn)) + return NULL; + + return of; +} + +static void kernfs_put_active_of(struct kernfs_open_file *of) +{ + return kernfs_put_active(of->kn); +} + /** * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn * @@ -139,7 +157,7 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v) if (ops->seq_stop) ops->seq_stop(sf, v); - kernfs_put_active(of->kn); + kernfs_put_active_of(of); } static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) @@ -152,7 +170,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); - if (!kernfs_get_active(of->kn)) + if (!kernfs_get_active_of(of)) return ERR_PTR(-ENODEV); ops = kernfs_ops(of->kn); @@ -238,7 +256,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); - if (!kernfs_get_active(of->kn)) { + if (!kernfs_get_active_of(of)) { len = -ENODEV; mutex_unlock(&of->mutex); goto out_free; @@ -252,7 +270,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) else len = -EINVAL; - kernfs_put_active(of->kn); + kernfs_put_active_of(of); mutex_unlock(&of->mutex); if (len < 0) @@ -323,7 +341,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); - if (!kernfs_get_active(of->kn)) { + if (!kernfs_get_active_of(of)) { mutex_unlock(&of->mutex); len = -ENODEV; goto out_free; @@ -335,7 +353,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) else len = -EINVAL; - kernfs_put_active(of->kn); + kernfs_put_active_of(of); mutex_unlock(&of->mutex); if (len > 0) @@ -357,13 +375,13 @@ static void kernfs_vma_open(struct vm_area_struct *vma) if (!of->vm_ops) return; - if (!kernfs_get_active(of->kn)) + if (!kernfs_get_active_of(of)) return; if (of->vm_ops->open) of->vm_ops->open(vma); - kernfs_put_active(of->kn); + kernfs_put_active_of(of); } static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) @@ -375,14 +393,14 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) if (!of->vm_ops) return VM_FAULT_SIGBUS; - if (!kernfs_get_active(of->kn)) + if (!kernfs_get_active_of(of)) return VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS; if (of->vm_ops->fault) ret = of->vm_ops->fault(vmf); - kernfs_put_active(of->kn); + kernfs_put_active_of(of); return ret; } @@ -395,7 +413,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) if (!of->vm_ops) return VM_FAULT_SIGBUS; - if (!kernfs_get_active(of->kn)) + if (!kernfs_get_active_of(of)) return VM_FAULT_SIGBUS; ret = 0; @@ -404,7 +422,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) else file_update_time(file); - kernfs_put_active(of->kn); + kernfs_put_active_of(of); return ret; } @@ -418,14 +436,14 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, if (!of->vm_ops) return -EINVAL; - if (!kernfs_get_active(of->kn)) + if (!kernfs_get_active_of(of)) return -EINVAL; ret = -EINVAL; if (of->vm_ops->access) ret = of->vm_ops->access(vma, addr, buf, len, write); - kernfs_put_active(of->kn); + kernfs_put_active_of(of); return ret; } @@ -455,7 +473,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) mutex_lock(&of->mutex); rc = -ENODEV; - if (!kernfs_get_active(of->kn)) + if (!kernfs_get_active_of(of)) goto out_unlock; ops = kernfs_ops(of->kn); @@ -490,7 +508,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) } vma->vm_ops = &kernfs_vm_ops; out_put: - kernfs_put_active(of->kn); + kernfs_put_active_of(of); out_unlock: mutex_unlock(&of->mutex); @@ -852,7 +870,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); __poll_t ret; - if (!kernfs_get_active(kn)) + if (!kernfs_get_active_of(of)) return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; if (kn->attr.ops->poll) @@ -860,7 +878,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) else ret = kernfs_generic_poll(of, wait); - kernfs_put_active(kn); + kernfs_put_active_of(of); return ret; } @@ -875,7 +893,7 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); - if (!kernfs_get_active(of->kn)) { + if (!kernfs_get_active_of(of)) { mutex_unlock(&of->mutex); return -ENODEV; } @@ -886,7 +904,7 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) else ret = generic_file_llseek(file, offset, whence); - kernfs_put_active(of->kn); + kernfs_put_active_of(of); mutex_unlock(&of->mutex); return ret; } From d3684397ea9ba2edf02be0aa2b4dcab3bd74c503 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 15 Aug 2025 19:29:55 -0400 Subject: [PATCH 1187/1307] nfs/localio: avoid bouncing LOCALIO if nfs_client_is_local() Previously nfs_local_probe() was made to disable and then attempt to re-enable LOCALIO (via LOCALIO protocol handshake) if/when it was called and LOCALIO already enabled. Vague memory for _why_ this was the case is that this was useful if/when a local NFS server were to be restarted with a local NFS client connected to it. But as it happens this causes an absurd amount of LOCALIO flapping which has a side-effect of too much IO being needlessly sent to NFSD (using RPC over the loopback network interface). This is the definition of "serious performance loss" (that negates the point of having LOCALIO). So remove this mis-optimization for re-enabling LOCALIO if/when an NFS server is restarted (which is an extremely rare thing to do). Will revisit testing that scenario again but in the meantime this patch restores the full benefit of LOCALIO. Signed-off-by: Mike Snitzer Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/localio.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index bdb82a19136a..97abf62f109d 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -180,10 +180,8 @@ static void nfs_local_probe(struct nfs_client *clp) return; } - if (nfs_client_is_local(clp)) { - /* If already enabled, disable and re-enable */ - nfs_localio_disable_client(clp); - } + if (nfs_client_is_local(clp)) + return; if (!nfs_uuid_begin(&clp->cl_uuid)) return; @@ -244,7 +242,8 @@ __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, case -ENOMEM: case -ENXIO: case -ENOENT: - /* Revalidate localio, will disable if unsupported */ + /* Revalidate localio */ + nfs_localio_disable_client(clp); nfs_local_probe(clp); } } From 5a46d2339a5ae268ede53a221f20433d8ea4f2f9 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Thu, 28 Aug 2025 16:51:00 +0200 Subject: [PATCH 1188/1307] flexfiles/pNFS: fix NULL checks on result of ff_layout_choose_ds_for_read Recent commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS errors") has changed the error return type of ff_layout_choose_ds_for_read() from NULL to an error pointer. However, not all code paths have been updated to match the change. Thus, some non-NULL checks will accept error pointers as a valid return value. Reported-by: Dan Carpenter Suggested-by: Dan Carpenter Fixes: f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS errors") Signed-off-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 8dc921d83538..f8ab7b4e09e7 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -773,8 +773,11 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, continue; if (check_device && - nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) + nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { + // reinitialize the error state in case if this is the last iteration + ds = ERR_PTR(-EINVAL); continue; + } *best_idx = idx; break; @@ -804,7 +807,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, struct nfs4_pnfs_ds *ds; ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); - if (ds) + if (!IS_ERR(ds)) return ds; return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); } @@ -818,7 +821,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, best_idx); - if (ds || !pgio->pg_mirror_idx) + if (!IS_ERR(ds) || !pgio->pg_mirror_idx) return ds; return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); } @@ -868,7 +871,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, req->wb_nio = 0; ds = ff_layout_get_ds_for_read(pgio, &ds_idx); - if (!ds) { + if (IS_ERR(ds)) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; pnfs_generic_pg_cleanup(pgio); @@ -1072,11 +1075,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) { u32 idx = hdr->pgio_mirror_idx + 1; u32 new_idx = 0; + struct nfs4_pnfs_ds *ds; - if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) - ff_layout_send_layouterror(hdr->lseg); - else + ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); + if (IS_ERR(ds)) pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); + else + ff_layout_send_layouterror(hdr->lseg); pnfs_read_resend_pnfs(hdr, new_idx); } From b1817b18ff20e69f5accdccefaf78bf5454bede2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Sep 2025 18:46:16 -0400 Subject: [PATCH 1189/1307] NFS: Protect against 'eof page pollution' This commit fixes the failing xfstest 'generic/363'. When the user mmaps() an area that extends beyond the end of file, and proceeds to write data into the folio that straddles that eof, we're required to discard that folio data if the user calls some function that extends the file length. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 33 +++++++++++++++++++++++++++++++++ fs/nfs/inode.c | 9 +++++++-- fs/nfs/internal.h | 2 ++ fs/nfs/nfs42proc.c | 14 +++++++++++--- fs/nfs/nfstrace.h | 1 + 5 files changed, 54 insertions(+), 5 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 86e36c630f09..a3105f944a0e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -280,6 +281,37 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) } EXPORT_SYMBOL_GPL(nfs_file_fsync); +void nfs_truncate_last_folio(struct address_space *mapping, loff_t from, + loff_t to) +{ + struct folio *folio; + + if (from >= to) + return; + + folio = filemap_lock_folio(mapping, from >> PAGE_SHIFT); + if (IS_ERR(folio)) + return; + + if (folio_mkclean(folio)) + folio_mark_dirty(folio); + + if (folio_test_uptodate(folio)) { + loff_t fpos = folio_pos(folio); + size_t offset = from - fpos; + size_t end = folio_size(folio); + + if (to - fpos < end) + end = to - fpos; + folio_zero_segment(folio, offset, end); + trace_nfs_size_truncate_folio(mapping->host, to); + } + + folio_unlock(folio); + folio_put(folio); +} +EXPORT_SYMBOL_GPL(nfs_truncate_last_folio); + /* * Decide whether a read/modify/write cycle may be more efficient * then a modify/write/read cycle when writing to a page in the @@ -356,6 +388,7 @@ static int nfs_write_begin(const struct kiocb *iocb, dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", file, mapping->host->i_ino, len, (long long) pos); + nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos); fgp |= fgf_set_order(len); start: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 338ef77ae423..0b141feacc52 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -716,6 +716,7 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, { struct inode *inode = d_inode(dentry); struct nfs_fattr *fattr; + loff_t oldsize = i_size_read(inode); int error = 0; nfs_inc_stats(inode, NFSIOS_VFSSETATTR); @@ -731,7 +732,7 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (error) return error; - if (attr->ia_size == i_size_read(inode)) + if (attr->ia_size == oldsize) attr->ia_valid &= ~ATTR_SIZE; } @@ -777,8 +778,12 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, } error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); - if (error == 0) + if (error == 0) { + if (attr->ia_valid & ATTR_SIZE) + nfs_truncate_last_folio(inode->i_mapping, oldsize, + attr->ia_size); error = nfs_refresh_inode(inode, fattr); + } nfs_free_fattr(fattr); out: trace_nfs_setattr_exit(inode, error); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 74d712b58423..1433ae13dba0 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -437,6 +437,8 @@ int nfs_file_release(struct inode *, struct file *); int nfs_lock(struct file *, int, struct file_lock *); int nfs_flock(struct file *, int, struct file_lock *); int nfs_check_flags(int); +void nfs_truncate_last_folio(struct address_space *mapping, loff_t from, + loff_t to); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 01c01f45358b..4420b8740e2f 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -137,6 +137,7 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE], }; struct inode *inode = file_inode(filep); + loff_t oldsize = i_size_read(inode); int err; if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) @@ -145,7 +146,11 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) inode_lock(inode); err = nfs42_proc_fallocate(&msg, filep, offset, len); - if (err == -EOPNOTSUPP) + + if (err == 0) + nfs_truncate_last_folio(inode->i_mapping, oldsize, + offset + len); + else if (err == -EOPNOTSUPP) NFS_SERVER(inode)->caps &= ~(NFS_CAP_ALLOCATE | NFS_CAP_ZERO_RANGE); @@ -183,6 +188,7 @@ int nfs42_proc_zero_range(struct file *filep, loff_t offset, loff_t len) .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ZERO_RANGE], }; struct inode *inode = file_inode(filep); + loff_t oldsize = i_size_read(inode); int err; if (!nfs_server_capable(inode, NFS_CAP_ZERO_RANGE)) @@ -191,9 +197,11 @@ int nfs42_proc_zero_range(struct file *filep, loff_t offset, loff_t len) inode_lock(inode); err = nfs42_proc_fallocate(&msg, filep, offset, len); - if (err == 0) + if (err == 0) { + nfs_truncate_last_folio(inode->i_mapping, oldsize, + offset + len); truncate_pagecache_range(inode, offset, (offset + len) -1); - if (err == -EOPNOTSUPP) + } else if (err == -EOPNOTSUPP) NFS_SERVER(inode)->caps &= ~NFS_CAP_ZERO_RANGE; inode_unlock(inode); diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 96b1323318c2..627115179795 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -272,6 +272,7 @@ DECLARE_EVENT_CLASS(nfs_update_size_class, TP_ARGS(inode, new_size)) DEFINE_NFS_UPDATE_SIZE_EVENT(truncate); +DEFINE_NFS_UPDATE_SIZE_EVENT(truncate_folio); DEFINE_NFS_UPDATE_SIZE_EVENT(wcc); DEFINE_NFS_UPDATE_SIZE_EVENT(update); DEFINE_NFS_UPDATE_SIZE_EVENT(grow); From b2036bb65114c01caf4a1afe553026e081703c8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 6 Sep 2025 10:25:35 -0400 Subject: [PATCH 1190/1307] NFSv4.2: Protect copy offload and clone against 'eof page pollution' The NFSv4.2 copy offload and clone functions can also end up extending the size of the destination file, so they too need to call nfs_truncate_last_folio(). Reported-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 4420b8740e2f..e2fea37c5348 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -362,22 +362,27 @@ static int process_copy_commit(struct file *dst, loff_t pos_dst, /** * nfs42_copy_dest_done - perform inode cache updates after clone/copy offload - * @inode: pointer to destination inode + * @file: pointer to destination file * @pos: destination offset * @len: copy length + * @oldsize: length of the file prior to clone/copy * * Punch a hole in the inode page cache, so that the NFS client will * know to retrieve new data. * Update the file size if necessary, and then mark the inode as having * invalid cached values for change attribute, ctime, mtime and space used. */ -static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len) +static void nfs42_copy_dest_done(struct file *file, loff_t pos, loff_t len, + loff_t oldsize) { + struct inode *inode = file_inode(file); + struct address_space *mapping = file->f_mapping; loff_t newsize = pos + len; loff_t end = newsize - 1; - WARN_ON_ONCE(invalidate_inode_pages2_range(inode->i_mapping, - pos >> PAGE_SHIFT, end >> PAGE_SHIFT)); + nfs_truncate_last_folio(mapping, oldsize, pos); + WARN_ON_ONCE(invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, + end >> PAGE_SHIFT)); spin_lock(&inode->i_lock); if (newsize > i_size_read(inode)) @@ -410,6 +415,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, struct nfs_server *src_server = NFS_SERVER(src_inode); loff_t pos_src = args->src_pos; loff_t pos_dst = args->dst_pos; + loff_t oldsize_dst = i_size_read(dst_inode); size_t count = args->count; ssize_t status; @@ -483,7 +489,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, goto out; } - nfs42_copy_dest_done(dst_inode, pos_dst, res->write_res.count); + nfs42_copy_dest_done(dst, pos_dst, res->write_res.count, oldsize_dst); nfs_invalidate_atime(src_inode); status = res->write_res.count; out: @@ -1250,6 +1256,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, struct nfs42_clone_res res = { .server = server, }; + loff_t oldsize_dst = i_size_read(dst_inode); int status; msg->rpc_argp = &args; @@ -1284,7 +1291,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, /* a zero-length count means clone to EOF in src */ if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE) count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset; - nfs42_copy_dest_done(dst_inode, dst_offset, count); + nfs42_copy_dest_done(dst_f, dst_offset, count, oldsize_dst); status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); } From 9eb90f435415c7da4800974ed943e39b5578ee7f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Sep 2025 12:06:23 -0400 Subject: [PATCH 1191/1307] NFS: Serialise O_DIRECT i/o and truncate() Ensure that all O_DIRECT reads and writes are complete, and prevent the initiation of new i/o until the setattr operation that will truncate the file is complete. Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 +++- fs/nfs/internal.h | 10 ++++++++++ fs/nfs/io.c | 13 ++----------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0b141feacc52..49df9debb1a6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -768,8 +768,10 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, trace_nfs_setattr_enter(inode); /* Write all dirty data */ - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { + nfs_file_block_o_direct(NFS_I(inode)); nfs_sync_inode(inode); + } fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1433ae13dba0..c0a44f389f8f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -532,6 +532,16 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0; } +/* Must be called with exclusively locked inode->i_rwsem */ +static inline void nfs_file_block_o_direct(struct nfs_inode *nfsi) +{ + if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { + clear_bit(NFS_INO_ODIRECT, &nfsi->flags); + inode_dio_wait(&nfsi->vfs_inode); + } +} + + /* namespace.c */ #define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, diff --git a/fs/nfs/io.c b/fs/nfs/io.c index 3388faf2acb9..d275b0a250bf 100644 --- a/fs/nfs/io.c +++ b/fs/nfs/io.c @@ -14,15 +14,6 @@ #include "internal.h" -/* Call with exclusively locked inode->i_rwsem */ -static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) -{ - if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { - clear_bit(NFS_INO_ODIRECT, &nfsi->flags); - inode_dio_wait(inode); - } -} - /** * nfs_start_io_read - declare the file is being used for buffered reads * @inode: file inode @@ -57,7 +48,7 @@ nfs_start_io_read(struct inode *inode) err = down_write_killable(&inode->i_rwsem); if (err) return err; - nfs_block_o_direct(nfsi, inode); + nfs_file_block_o_direct(nfsi); downgrade_write(&inode->i_rwsem); return 0; @@ -90,7 +81,7 @@ nfs_start_io_write(struct inode *inode) err = down_write_killable(&inode->i_rwsem); if (!err) - nfs_block_o_direct(NFS_I(inode), inode); + nfs_file_block_o_direct(NFS_I(inode)); return err; } From b93128f29733af5d427a335978a19884c2c230e2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Sep 2025 12:11:17 -0400 Subject: [PATCH 1192/1307] NFSv4.2: Serialise O_DIRECT i/o and fallocate() Ensure that all O_DIRECT reads and writes complete before calling fallocate so that we don't race w.r.t. attribute updates. Fixes: 99f237832243 ("NFSv4.2: Always flush out writes in nfs42_proc_fallocate()") Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index e2fea37c5348..1a169372ca16 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -114,6 +114,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, exception.inode = inode; exception.state = lock->open_context->state; + nfs_file_block_o_direct(NFS_I(inode)); err = nfs_sync_inode(inode); if (err) goto out; From c80ebeba1198eac8811ab0dba36ecc13d51e4438 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 6 Sep 2025 10:40:24 -0400 Subject: [PATCH 1193/1307] NFSv4.2: Serialise O_DIRECT i/o and clone range Ensure that all O_DIRECT reads and writes complete before cloning a file range, so that both the source and destination are up to date. Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 1d6b5f4230c9..c9a0d1e420c6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -278,9 +278,11 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, lock_two_nondirectories(src_inode, dst_inode); /* flush all pending writes on both src and dst so that server * has the latest data */ + nfs_file_block_o_direct(NFS_I(src_inode)); ret = nfs_sync_inode(src_inode); if (ret) goto out_unlock; + nfs_file_block_o_direct(NFS_I(dst_inode)); ret = nfs_sync_inode(dst_inode); if (ret) goto out_unlock; From ca247c89900ae90207f4d321e260cd93b7c7d104 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 6 Sep 2025 10:54:13 -0400 Subject: [PATCH 1194/1307] NFSv4.2: Serialise O_DIRECT i/o and copy range Ensure that all O_DIRECT reads and writes complete before copying a file range, so that the destination is up to date. Fixes: a5864c999de6 ("NFS: Do not serialise O_DIRECT reads and writes") Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 1a169372ca16..6a0b5871ba3b 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -445,6 +445,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, return status; } + nfs_file_block_o_direct(NFS_I(dst_inode)); status = nfs_sync_inode(dst_inode); if (status) return status; From b7b8574225e9d2b5f1fb5483886ab797892f43b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Sep 2025 11:48:57 -0400 Subject: [PATCH 1195/1307] NFS: nfs_invalidate_folio() must observe the offset and size arguments If we're truncating part of the folio, then we need to write out the data on the part that is not covered by the cancellation. Fixes: d47992f86b30 ("mm: change invalidatepage prototype to accept length") Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 7 ++++--- fs/nfs/write.c | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a3105f944a0e..8059ece82468 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -475,10 +475,11 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset, dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", folio->index, offset, length); - if (offset != 0 || length < folio_size(folio)) - return; /* Cancel any unstarted writes on this page */ - nfs_wb_folio_cancel(inode, folio); + if (offset != 0 || length < folio_size(folio)) + nfs_wb_folio(inode, folio); + else + nfs_wb_folio_cancel(inode, folio); folio_wait_private_2(folio); /* [DEPRECATED] */ trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8b7c04737967..e359fbcdc8a0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2045,6 +2045,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) * release it */ nfs_inode_remove_request(req); nfs_unlock_and_release_request(req); + folio_cancel_dirty(folio); } return ret; From c12b6a7b12a13ccd3aece6be09345c1944e18d3e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Sep 2025 20:11:03 -0400 Subject: [PATCH 1196/1307] NFS: Fix the marking of the folio as up to date Since all callers of nfs_page_group_covers_page() have already ensured that there is only one group member, all that is required is to check that the entire folio contains dirty data. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 52 +++++--------------------------------------------- 1 file changed, 5 insertions(+), 47 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e359fbcdc8a0..647c53d1418a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -237,59 +237,17 @@ static void nfs_mapping_set_error(struct folio *folio, int error) } /* - * nfs_page_group_search_locked - * @head - head request of page group - * @page_offset - offset into page + * nfs_page_covers_folio + * @req: struct nfs_page * - * Search page group with head @head to find a request that contains the - * page offset @page_offset. - * - * Returns a pointer to the first matching nfs request, or NULL if no - * match is found. - * - * Must be called with the page group lock held - */ -static struct nfs_page * -nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) -{ - struct nfs_page *req; - - req = head; - do { - if (page_offset >= req->wb_pgbase && - page_offset < (req->wb_pgbase + req->wb_bytes)) - return req; - - req = req->wb_this_page; - } while (req != head); - - return NULL; -} - -/* - * nfs_page_group_covers_page - * @head - head request of page group - * - * Return true if the page group with head @head covers the whole page, - * returns false otherwise + * Return true if the request covers the whole folio. + * Note that the caller should ensure all subrequests have been joined */ static bool nfs_page_group_covers_page(struct nfs_page *req) { unsigned int len = nfs_folio_length(nfs_page_to_folio(req)); - struct nfs_page *tmp; - unsigned int pos = 0; - nfs_page_group_lock(req); - - for (;;) { - tmp = nfs_page_group_search_locked(req->wb_head, pos); - if (!tmp) - break; - pos = tmp->wb_pgbase + tmp->wb_bytes; - } - - nfs_page_group_unlock(req); - return pos >= len; + return req->wb_pgbase == 0 && req->wb_bytes == len; } /* We can set the PG_uptodate flag if we see that a write request From 199cd9e8d14bc14bdbd1fa3031ce26dac9781507 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Sep 2025 09:49:33 -0400 Subject: [PATCH 1197/1307] Revert "SUNRPC: Don't allow waiting for exiting tasks" This reverts commit 14e41b16e8cb677bb440dca2edba8b041646c742. This patch breaks the LTP acct02 test, so let's revert and look for a better solution. Reported-by: Mark Brown Reported-by: Harshvardhan Jha Link: https://lore.kernel.org/linux-nfs/7d4d57b0-39a3-49f1-8ada-60364743e3b4@sirena.org.uk/ Cc: stable@vger.kernel.org # 6.15.x Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 73bc39281ef5..9b45fbdc90ca 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -276,8 +276,6 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode) { - if (unlikely(current->flags & PF_EXITING)) - return -EINTR; schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; From 9559d2fffd4f9b892165eed48198a0e5cb8504e6 Mon Sep 17 00:00:00 2001 From: Justin Worrell Date: Thu, 4 Sep 2025 16:09:57 -0500 Subject: [PATCH 1198/1307] SUNRPC: call xs_sock_process_cmsg for all cmsg xs_sock_recv_cmsg was failing to call xs_sock_process_cmsg for any cmsg type other than TLS_RECORD_TYPE_ALERT (TLS_RECORD_TYPE_DATA, and other values not handled.) Based on my reading of the previous commit (cc5d5908: sunrpc: fix client side handling of tls alerts), it looks like only iov_iter_revert should be conditional on TLS_RECORD_TYPE_ALERT (but that other cmsg types should still call xs_sock_process_cmsg). On my machine, I was unable to connect (over mtls) to an NFS share hosted on FreeBSD. With this patch applied, I am able to mount the share again. Fixes: cc5d59081fa2 ("sunrpc: fix client side handling of tls alerts") Signed-off-by: Justin Worrell Reviewed-and-tested-by: Scott Mayhew Link: https://lore.kernel.org/r/20250904211038.12874-3-jworrell@gmail.com Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c5f7bbf5775f..3aa987e7f072 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -407,9 +407,9 @@ xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, alert_kvec.iov_len); ret = sock_recvmsg(sock, &msg, flags); - if (ret > 0 && - tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { - iov_iter_revert(&msg.msg_iter, ret); + if (ret > 0) { + if (tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) + iov_iter_revert(&msg.msg_iter, ret); ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, -EAGAIN); } From 76eeb9b8de9880ca38696b2fb56ac45ac0a25c6c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Sep 2025 14:22:57 -0700 Subject: [PATCH 1199/1307] Linux 6.17-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b9c661913250..cf37b9407821 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* From cba4262a19afae21665ee242b3404bcede5a94d7 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Mon, 1 Sep 2025 17:04:15 +0000 Subject: [PATCH 1200/1307] x86/cpu/topology: Always try cpu_parse_topology_ext() on AMD/Hygon Support for parsing the topology on AMD/Hygon processors using CPUID leaf 0xb was added in 3986a0a805e6 ("x86/CPU/AMD: Derive CPU topology from CPUID function 0xB when available"). In an effort to keep all the topology parsing bits in one place, this commit also introduced a pseudo dependency on the TOPOEXT feature to parse the CPUID leaf 0xb. The TOPOEXT feature (CPUID 0x80000001 ECX[22]) advertises the support for Cache Properties leaf 0x8000001d and the CPUID leaf 0x8000001e EAX for "Extended APIC ID" however support for 0xb was introduced alongside the x2APIC support not only on AMD [1], but also historically on x86 [2]. Similar to 0xb, the support for extended CPU topology leaf 0x80000026 too does not depend on the TOPOEXT feature. The support for these leaves is expected to be confirmed by ensuring leaf <= {extended_}cpuid_level and then parsing the level 0 of the respective leaf to confirm EBX[15:0] (LogProcAtThisLevel) is non-zero as stated in the definition of "CPUID_Fn0000000B_EAX_x00 [Extended Topology Enumeration] (Core::X86::Cpuid::ExtTopEnumEax0)" in Processor Programming Reference (PPR) for AMD Family 19h Model 01h Rev B1 Vol1 [3] Sec. 2.1.15.1 "CPUID Instruction Functions". This has not been a problem on baremetal platforms since support for TOPOEXT (Fam 0x15 and later) predates the support for CPUID leaf 0xb (Fam 0x17[Zen2] and later), however, for AMD guests on QEMU, the "x2apic" feature can be enabled independent of the "topoext" feature where QEMU expects topology and the initial APICID to be parsed using the CPUID leaf 0xb (especially when number of cores > 255) which is populated independent of the "topoext" feature flag. Unconditionally call cpu_parse_topology_ext() on AMD and Hygon processors to first parse the topology using the XTOPOLOGY leaves (0x80000026 / 0xb) before using the TOPOEXT leaf (0x8000001e). While at it, break down the single large comment in parse_topology_amd() to better highlight the purpose of each CPUID leaf. Fixes: 3986a0a805e6 ("x86/CPU/AMD: Derive CPU topology from CPUID function 0xB when available") Suggested-by: Naveen N Rao (AMD) Signed-off-by: K Prateek Nayak Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org # Only v6.9 and above; depends on x86 topology rewrite Link: https://lore.kernel.org/lkml/1529686927-7665-1-git-send-email-suravee.suthikulpanit@amd.com/ [1] Link: https://lore.kernel.org/lkml/20080818181435.523309000@linux-os.sc.intel.com/ [2] Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 [3] --- arch/x86/kernel/cpu/topology_amd.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 827dd0dbb6e9..c79ebbb639cb 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -175,27 +175,30 @@ static void topoext_fixup(struct topo_scan *tscan) static void parse_topology_amd(struct topo_scan *tscan) { - bool has_topoext = false; - /* - * If the extended topology leaf 0x8000_001e is available - * try to get SMT, CORE, TILE, and DIE shifts from extended + * Try to get SMT, CORE, TILE, and DIE shifts from extended * CPUID leaf 0x8000_0026 on supported processors first. If * extended CPUID leaf 0x8000_0026 is not supported, try to - * get SMT and CORE shift from leaf 0xb first, then try to - * get the CORE shift from leaf 0x8000_0008. + * get SMT and CORE shift from leaf 0xb. If either leaf is + * available, cpu_parse_topology_ext() will return true. */ - if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) - has_topoext = cpu_parse_topology_ext(tscan); + bool has_xtopology = cpu_parse_topology_ext(tscan); if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES)) tscan->c->topo.cpu_type = cpuid_ebx(0x80000026); - if (!has_topoext && !parse_8000_0008(tscan)) + /* + * If XTOPOLOGY leaves (0x26/0xb) are not available, try to + * get the CORE shift from leaf 0x8000_0008 first. + */ + if (!has_xtopology && !parse_8000_0008(tscan)) return; - /* Prefer leaf 0x8000001e if available */ - if (parse_8000_001e(tscan, has_topoext)) + /* + * Prefer leaf 0x8000001e if available to get the SMT shift and + * the initial APIC ID if XTOPOLOGY leaves are not available. + */ + if (parse_8000_001e(tscan, has_xtopology)) return; /* Try the NODEID MSR */ From 440cec4ca1c242d72e309a801995584a55af25c6 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 18 Jul 2025 18:50:58 +0530 Subject: [PATCH 1201/1307] drm/amdgpu: Wait for bootloader after PSPv11 reset Some PSPv11 SOCs take a longer time for PSP based mode-1 reset. Instead of checking for C2PMSG_33 status, add the callback wait_for_bootloader. Wait for bootloader to be back to steady state is already part of the generic mode-1 reset flow. Increase the retry count for bootloader wait and also fix the mask to prevent fake pass. Fixes: 8345a71fc54b ("drm/amdgpu: Add more checks to PSP mailbox") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4531 Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 32f73741d6ee41fd5db8791c1163931e313d0fdc) --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 6cc05d36e359..64b240b51f1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -149,12 +149,12 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) int ret; int retry_loop; - for (retry_loop = 0; retry_loop < 10; retry_loop++) { + for (retry_loop = 0; retry_loop < 20; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); + 0x80000000, 0x8000FFFF, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -397,18 +397,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) msleep(500); - offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - - ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, - 0); - - if (ret) { - DRM_INFO("psp mode 1 reset failed!\n"); - return -EINVAL; - } - - DRM_INFO("psp mode1 reset succeed \n"); - return 0; } @@ -665,7 +653,8 @@ static const struct psp_funcs psp_v11_0_funcs = { .ring_get_wptr = psp_v11_0_ring_get_wptr, .ring_set_wptr = psp_v11_0_ring_set_wptr, .load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw, - .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw + .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw, + .wait_for_bootloader = psp_v11_0_wait_for_bootloader }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) From c05d0b32eebadc8be6e53196e99c64cf2bed1d99 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sat, 6 Sep 2025 11:09:13 +0200 Subject: [PATCH 1202/1307] regulator: sy7636a: fix lifecycle of power good gpio Attach the power good gpio to the regulator device devres instead of the parent device to fix problems if probe is run multiple times (rmmod/insmod or some deferral). Fixes: 8c485bedfb785 ("regulator: sy7636a: Initial commit") Signed-off-by: Andreas Kemnade Reviewed-by: Alistair Francis Reviewed-by: Peng Fan Message-ID: <20250906-sy7636-rsrc-v1-2-e2886a9763a7@kernel.org> Signed-off-by: Mark Brown --- drivers/regulator/sy7636a-regulator.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/sy7636a-regulator.c b/drivers/regulator/sy7636a-regulator.c index d1e7ba1fb3e1..27e3d939b7bb 100644 --- a/drivers/regulator/sy7636a-regulator.c +++ b/drivers/regulator/sy7636a-regulator.c @@ -83,9 +83,11 @@ static int sy7636a_regulator_probe(struct platform_device *pdev) if (!regmap) return -EPROBE_DEFER; - gdp = devm_gpiod_get(pdev->dev.parent, "epd-pwr-good", GPIOD_IN); + device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent); + + gdp = devm_gpiod_get(&pdev->dev, "epd-pwr-good", GPIOD_IN); if (IS_ERR(gdp)) { - dev_err(pdev->dev.parent, "Power good GPIO fault %ld\n", PTR_ERR(gdp)); + dev_err(&pdev->dev, "Power good GPIO fault %ld\n", PTR_ERR(gdp)); return PTR_ERR(gdp); } @@ -105,7 +107,6 @@ static int sy7636a_regulator_probe(struct platform_device *pdev) } config.dev = &pdev->dev; - config.dev->of_node = pdev->dev.parent->of_node; config.regmap = regmap; rdev = devm_regulator_register(&pdev->dev, &desc, &config); From 4b66d18918f8e4d85e51974a9e3ce9abad5c7c3d Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Mon, 8 Sep 2025 09:50:25 +0800 Subject: [PATCH 1203/1307] wifi: ath12k: Fix missing station power save configuration Commit afbab6e4e88d ("wifi: ath12k: modify ath12k_mac_op_bss_info_changed() for MLO") replaced the bss_info_changed() callback with vif_cfg_changed() and link_info_changed() to support Multi-Link Operation (MLO). As a result, the station power save configuration is no longer correctly applied in ath12k_mac_bss_info_changed(). Move the handling of 'BSS_CHANGED_PS' into ath12k_mac_op_vif_cfg_changed() to align with the updated callback structure introduced for MLO, ensuring proper power-save behavior for station interfaces. Tested-on: WCN7850 hw2.0 PCI WLAN.IOE_HMT.1.1-00011-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Fixes: afbab6e4e88d ("wifi: ath12k: modify ath12k_mac_op_bss_info_changed() for MLO") Signed-off-by: Miaoqing Pan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250908015025.1301398-1-miaoqing.pan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 122 ++++++++++++++------------ 1 file changed, 67 insertions(+), 55 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index bd1ec3b2c084..3a3965b79942 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4078,12 +4078,68 @@ static int ath12k_mac_fils_discovery(struct ath12k_link_vif *arvif, return ret; } +static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) +{ + struct ath12k *ar = arvif->ar; + struct ieee80211_vif *vif = arvif->ahvif->vif; + struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; + enum wmi_sta_powersave_param param; + struct ieee80211_bss_conf *info; + enum wmi_sta_ps_mode psmode; + int ret; + int timeout; + bool enable_ps; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + if (vif->type != NL80211_IFTYPE_STATION) + return; + + enable_ps = arvif->ahvif->ps; + if (enable_ps) { + psmode = WMI_STA_PS_MODE_ENABLED; + param = WMI_STA_PS_PARAM_INACTIVITY_TIME; + + timeout = conf->dynamic_ps_timeout; + if (timeout == 0) { + info = ath12k_mac_get_link_bss_conf(arvif); + if (!info) { + ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; + } + + /* firmware doesn't like 0 */ + timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; + } + + ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, + timeout); + if (ret) { + ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", + arvif->vdev_id, ret); + return; + } + } else { + psmode = WMI_STA_PS_MODE_DISABLED; + } + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", + arvif->vdev_id, psmode ? "enable" : "disable"); + + ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); + if (ret) + ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", + psmode, arvif->vdev_id, ret); +} + static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u64 changed) { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); unsigned long links = ahvif->links_map; + struct ieee80211_vif_cfg *vif_cfg; struct ieee80211_bss_conf *info; struct ath12k_link_vif *arvif; struct ieee80211_sta *sta; @@ -4147,61 +4203,24 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, } } } -} -static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) -{ - struct ath12k *ar = arvif->ar; - struct ieee80211_vif *vif = arvif->ahvif->vif; - struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; - enum wmi_sta_powersave_param param; - struct ieee80211_bss_conf *info; - enum wmi_sta_ps_mode psmode; - int ret; - int timeout; - bool enable_ps; + if (changed & BSS_CHANGED_PS) { + links = ahvif->links_map; + vif_cfg = &vif->cfg; - lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + if (!arvif || !arvif->ar) + continue; - if (vif->type != NL80211_IFTYPE_STATION) - return; + ar = arvif->ar; - enable_ps = arvif->ahvif->ps; - if (enable_ps) { - psmode = WMI_STA_PS_MODE_ENABLED; - param = WMI_STA_PS_PARAM_INACTIVITY_TIME; - - timeout = conf->dynamic_ps_timeout; - if (timeout == 0) { - info = ath12k_mac_get_link_bss_conf(arvif); - if (!info) { - ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", - vif->addr, arvif->link_id); - return; + if (ar->ab->hw_params->supports_sta_ps) { + ahvif->ps = vif_cfg->ps; + ath12k_mac_vif_setup_ps(arvif); } - - /* firmware doesn't like 0 */ - timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; } - - ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, - timeout); - if (ret) { - ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", - arvif->vdev_id, ret); - return; - } - } else { - psmode = WMI_STA_PS_MODE_DISABLED; } - - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", - arvif->vdev_id, psmode ? "enable" : "disable"); - - ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); - if (ret) - ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", - psmode, arvif->vdev_id, ret); } static bool ath12k_mac_supports_tpc(struct ath12k *ar, struct ath12k_vif *ahvif, @@ -4223,7 +4242,6 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); - struct ieee80211_vif_cfg *vif_cfg = &vif->cfg; struct cfg80211_chan_def def; u32 param_id, param_value; enum nl80211_band band; @@ -4510,12 +4528,6 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, } ath12k_mac_fils_discovery(arvif, info); - - if (changed & BSS_CHANGED_PS && - ar->ab->hw_params->supports_sta_ps) { - ahvif->ps = vif_cfg->ps; - ath12k_mac_vif_setup_ps(arvif); - } } static struct ath12k_vif_cache *ath12k_ahvif_get_link_cache(struct ath12k_vif *ahvif, From 82e2be57d544ff9ad4696c85600827b39be8ce9e Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Mon, 8 Sep 2025 09:51:39 +0800 Subject: [PATCH 1204/1307] wifi: ath12k: fix WMI TLV header misalignment When buf_len is not 4-byte aligned in ath12k_wmi_mgmt_send(), the firmware asserts and triggers a recovery. The following error messages are observed: ath12k_pci 0004:01:00.0: failed to submit WMI_MGMT_TX_SEND_CMDID cmd ath12k_pci 0004:01:00.0: failed to send mgmt frame: -108 ath12k_pci 0004:01:00.0: failed to tx mgmt frame, vdev_id 0 :-108 ath12k_pci 0004:01:00.0: waiting recovery start... This issue was observed when running 'iw wlanx set power_save off/on' in MLO station mode, which triggers the sending of an SMPS action frame with a length of 27 bytes to the AP. To resolve the misalignment, use buf_len_aligned instead of buf_len when constructing the WMI TLV header. Tested-on: WCN7850 hw2.0 PCI WLAN.IOE_HMT.1.1-00011-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Miaoqing Pan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250908015139.1301437-1-miaoqing.pan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 742ffeb48bce..29dadedefdd2 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -843,7 +843,7 @@ int ath12k_wmi_mgmt_send(struct ath12k_link_vif *arvif, u32 buf_id, cmd->tx_params_valid = 0; frame_tlv = (struct wmi_tlv *)(skb->data + sizeof(*cmd)); - frame_tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_BYTE, buf_len); + frame_tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_BYTE, buf_len_aligned); memcpy(frame_tlv->value, frame->data, buf_len); From dd2fa82473453661d12723c46c9f43d9876a7efd Mon Sep 17 00:00:00 2001 From: Jonathan Curley Date: Mon, 8 Sep 2025 17:35:16 +0000 Subject: [PATCH 1205/1307] NFSv4/flexfiles: Fix layout merge mirror check. Typo in ff_lseg_match_mirrors makes the diff ineffective. This results in merge happening all the time. Merge happening all the time is problematic because it marks lsegs invalid. Marking lsegs invalid causes all outstanding IO to get restarted with EAGAIN and connections to get closed. Closing connections constantly triggers race conditions in the RDMA implementation... Fixes: 660d1eb22301c ("pNFS/flexfile: Don't merge layout segments if the mirrors don't match") Signed-off-by: Jonathan Curley Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f8ab7b4e09e7..9edb5f9b0c4e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -293,7 +293,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1, struct pnfs_layout_segment *l2) { const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1); - const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1); + const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2); u32 i; if (fl1->mirror_array_cnt != fl2->mirror_array_cnt) From cd4453c5e983cf1fd5757e9acb915adb1e4602b6 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Mon, 8 Sep 2025 02:46:58 +0000 Subject: [PATCH 1206/1307] tracing: Silence warning when chunk allocation fails in trace_pid_write Syzkaller trigger a fault injection warning: WARNING: CPU: 1 PID: 12326 at tracepoint_add_func+0xbfc/0xeb0 Modules linked in: CPU: 1 UID: 0 PID: 12326 Comm: syz.6.10325 Tainted: G U 6.14.0-rc5-syzkaller #0 Tainted: [U]=USER Hardware name: Google Compute Engine/Google Compute Engine RIP: 0010:tracepoint_add_func+0xbfc/0xeb0 kernel/tracepoint.c:294 Code: 09 fe ff 90 0f 0b 90 0f b6 74 24 43 31 ff 41 bc ea ff ff ff RSP: 0018:ffffc9000414fb48 EFLAGS: 00010283 RAX: 00000000000012a1 RBX: ffffffff8e240ae0 RCX: ffffc90014b78000 RDX: 0000000000080000 RSI: ffffffff81bbd78b RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffffffffffffffef R13: 0000000000000000 R14: dffffc0000000000 R15: ffffffff81c264f0 FS: 00007f27217f66c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2e80dff8 CR3: 00000000268f8000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tracepoint_probe_register_prio+0xc0/0x110 kernel/tracepoint.c:464 register_trace_prio_sched_switch include/trace/events/sched.h:222 [inline] register_pid_events kernel/trace/trace_events.c:2354 [inline] event_pid_write.isra.0+0x439/0x7a0 kernel/trace/trace_events.c:2425 vfs_write+0x24c/0x1150 fs/read_write.c:677 ksys_write+0x12b/0x250 fs/read_write.c:731 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f We can reproduce the warning by following the steps below: 1. echo 8 >> set_event_notrace_pid. Let tr->filtered_pids owns one pid and register sched_switch tracepoint. 2. echo ' ' >> set_event_pid, and perform fault injection during chunk allocation of trace_pid_list_alloc. Let pid_list with no pid and assign to tr->filtered_pids. 3. echo ' ' >> set_event_pid. Let pid_list is NULL and assign to tr->filtered_pids. 4. echo 9 >> set_event_pid, will trigger the double register sched_switch tracepoint warning. The reason is that syzkaller injects a fault into the chunk allocation in trace_pid_list_alloc, causing a failure in trace_pid_list_set, which may trigger double register of the same tracepoint. This only occurs when the system is about to crash, but to suppress this warning, let's add failure handling logic to trace_pid_list_set. Link: https://lore.kernel.org/20250908024658.2390398-1-pulehui@huaweicloud.com Fixes: 8d6e90983ade ("tracing: Create a sparse bitmask for pid filtering") Reported-by: syzbot+161412ccaeff20ce4dde@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67cb890e.050a0220.d8275.022e.GAE@google.com Signed-off-by: Pu Lehui Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2f1ae6c0ee81..b3c94fbaf002 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -834,7 +834,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, /* copy the current bits to the new max */ ret = trace_pid_list_first(filtered_pids, &pid); while (!ret) { - trace_pid_list_set(pid_list, pid); + ret = trace_pid_list_set(pid_list, pid); + if (ret < 0) + goto out; + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); nr_pids++; } @@ -871,6 +874,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, trace_parser_clear(&parser); ret = 0; } + out: trace_parser_put(&parser); if (ret < 0) { From b816265396daf1beb915e0ffbfd7f3906c2bf4a4 Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Sun, 7 Sep 2025 12:21:46 +0200 Subject: [PATCH 1207/1307] PCI: mvebu: Fix use of for_each_of_range() iterator 5da3d94a23c6 ("PCI: mvebu: Use for_each_of_range() iterator for parsing "ranges"") simplified code by using the for_each_of_range() iterator, but it broke PCI enumeration on Turris Omnia (and probably other mvebu targets). Issue #1: To determine range.flags, of_pci_range_parser_one() uses bus->get_flags(), which resolves to of_bus_pci_get_flags(), which already returns an IORESOURCE bit field, and NOT the original flags from the "ranges" resource. Then mvebu_get_tgt_attr() attempts the very same conversion again. Remove the misinterpretation of range.flags in mvebu_get_tgt_attr(), to restore the intended behavior. Issue #2: The driver needs target and attributes, which are encoded in the raw address values of the "/soc/pcie/ranges" resource. According to of_pci_range_parser_one(), the raw values are stored in range.bus_addr and range.parent_bus_addr, respectively. range.cpu_addr is a translated version of range.parent_bus_addr, and not relevant here. Use the correct range structure member, to extract target and attributes. This restores the intended behavior. Fixes: 5da3d94a23c6 ("PCI: mvebu: Use for_each_of_range() iterator for parsing "ranges"") Reported-by: Jan Palus Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220479 Signed-off-by: Klaus Kudielka Signed-off-by: Bjorn Helgaas Tested-by: Tony Dinh Tested-by: Jan Palus Link: https://patch.msgid.link/20250907102303.29735-1-klaus.kudielka@gmail.com --- drivers/pci/controller/pci-mvebu.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 755651f33811..a72aa57591c0 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -1168,12 +1168,6 @@ static void __iomem *mvebu_pcie_map_registers(struct platform_device *pdev, return devm_ioremap_resource(&pdev->dev, &port->regs); } -#define DT_FLAGS_TO_TYPE(flags) (((flags) >> 24) & 0x03) -#define DT_TYPE_IO 0x1 -#define DT_TYPE_MEM32 0x2 -#define DT_CPUADDR_TO_TARGET(cpuaddr) (((cpuaddr) >> 56) & 0xFF) -#define DT_CPUADDR_TO_ATTR(cpuaddr) (((cpuaddr) >> 48) & 0xFF) - static int mvebu_get_tgt_attr(struct device_node *np, int devfn, unsigned long type, unsigned int *tgt, @@ -1189,19 +1183,12 @@ static int mvebu_get_tgt_attr(struct device_node *np, int devfn, return -EINVAL; for_each_of_range(&parser, &range) { - unsigned long rtype; u32 slot = upper_32_bits(range.bus_addr); - if (DT_FLAGS_TO_TYPE(range.flags) == DT_TYPE_IO) - rtype = IORESOURCE_IO; - else if (DT_FLAGS_TO_TYPE(range.flags) == DT_TYPE_MEM32) - rtype = IORESOURCE_MEM; - else - continue; - - if (slot == PCI_SLOT(devfn) && type == rtype) { - *tgt = DT_CPUADDR_TO_TARGET(range.cpu_addr); - *attr = DT_CPUADDR_TO_ATTR(range.cpu_addr); + if (slot == PCI_SLOT(devfn) && + type == (range.flags & IORESOURCE_TYPE_BITS)) { + *tgt = (range.parent_bus_addr >> 56) & 0xFF; + *attr = (range.parent_bus_addr >> 48) & 0xFF; return 0; } } From 1dbfb0363224f6da56f6655d596dc5097308d6f5 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 5 Sep 2025 06:57:27 -0700 Subject: [PATCH 1208/1307] genetlink: fix genl_bind() invoking bind() after -EPERM Per family bind/unbind callbacks were introduced to allow families to track multicast group consumer presence, e.g. to start or stop producing events depending on listeners. However, in genl_bind() the bind() callback was invoked even if capability checks failed and ret was set to -EPERM. This means that callbacks could run on behalf of unauthorized callers while the syscall still returned failure to user space. Fix this by only invoking bind() after "if (ret) break;" check i.e. after permission checks have succeeded. Fixes: 3de21a8990d3 ("genetlink: Add per family bind/unbind callbacks") Signed-off-by: Alok Tiwari Link: https://patch.msgid.link/20250905135731.3026965-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- net/netlink/genetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 104732d34543..978c129c6095 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1836,6 +1836,9 @@ static int genl_bind(struct net *net, int group) !ns_capable(net->user_ns, CAP_SYS_ADMIN)) ret = -EPERM; + if (ret) + break; + if (family->bind) family->bind(i); From 674b34c4c770551e916ae707829c7faea4782d3a Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Fri, 5 Sep 2025 14:45:07 +0200 Subject: [PATCH 1209/1307] net: dsa: b53: fix ageing time for BCM53101 For some reason Broadcom decided that BCM53101 uses 0.5s increments for the ageing time register, but kept the field width the same [1]. Due to this, the actual ageing time was always half of what was configured. Fix this by adapting the limits and value calculation for BCM53101. So far it looks like this is the only chip with the increased tick speed: $ grep -l -r "Specifies the aging time in 0.5 seconds" cdk/PKG/chip | sort cdk/PKG/chip/bcm53101/bcm53101_a0_defs.h $ grep -l -r "Specifies the aging time in seconds" cdk/PKG/chip | sort cdk/PKG/chip/bcm53010/bcm53010_a0_defs.h cdk/PKG/chip/bcm53020/bcm53020_a0_defs.h cdk/PKG/chip/bcm53084/bcm53084_a0_defs.h cdk/PKG/chip/bcm53115/bcm53115_a0_defs.h cdk/PKG/chip/bcm53118/bcm53118_a0_defs.h cdk/PKG/chip/bcm53125/bcm53125_a0_defs.h cdk/PKG/chip/bcm53128/bcm53128_a0_defs.h cdk/PKG/chip/bcm53134/bcm53134_a0_defs.h cdk/PKG/chip/bcm53242/bcm53242_a0_defs.h cdk/PKG/chip/bcm53262/bcm53262_a0_defs.h cdk/PKG/chip/bcm53280/bcm53280_a0_defs.h cdk/PKG/chip/bcm53280/bcm53280_b0_defs.h cdk/PKG/chip/bcm53600/bcm53600_a0_defs.h cdk/PKG/chip/bcm89500/bcm89500_a0_defs.h [1] https://github.com/Broadcom/OpenMDK/blob/a5d3fc9b12af3eeb68f2ca0ce7ec4056cd14d6c2/cdk/PKG/chip/bcm53101/bcm53101_a0_defs.h#L28966 Fixes: e39d14a760c0 ("net: dsa: b53: implement setting ageing time") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250905124507.59186-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 829b1f087e9e..2f846381d5a7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1273,9 +1273,15 @@ static int b53_setup(struct dsa_switch *ds) */ ds->untag_vlan_aware_bridge_pvid = true; - /* Ageing time is set in seconds */ - ds->ageing_time_min = 1 * 1000; - ds->ageing_time_max = AGE_TIME_MAX * 1000; + if (dev->chip_id == BCM53101_DEVICE_ID) { + /* BCM53101 uses 0.5 second increments */ + ds->ageing_time_min = 1 * 500; + ds->ageing_time_max = AGE_TIME_MAX * 500; + } else { + /* Everything else uses 1 second increments */ + ds->ageing_time_min = 1 * 1000; + ds->ageing_time_max = AGE_TIME_MAX * 1000; + } ret = b53_reset_switch(dev); if (ret) { @@ -2559,7 +2565,10 @@ int b53_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) else reg = B53_AGING_TIME_CONTROL; - atc = DIV_ROUND_CLOSEST(msecs, 1000); + if (dev->chip_id == BCM53101_DEVICE_ID) + atc = DIV_ROUND_CLOSEST(msecs, 500); + else + atc = DIV_ROUND_CLOSEST(msecs, 1000); if (!is5325(dev) && !is5365(dev)) atc |= AGE_CHANGE; From 8625f5748fea960d2af4f3c3e9891ee8f6f80906 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 5 Sep 2025 13:12:33 +0200 Subject: [PATCH 1210/1307] net: bridge: Bounce invalid boolopts The bridge driver currently tolerates options that it does not recognize. Instead, it should bounce them. Fixes: a428afe82f98 ("net: bridge: add support for user-controlled bool options") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/e6fdca3b5a8d54183fbda075daffef38bdd7ddce.1757070067.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bridge/br.c b/net/bridge/br.c index 1885d0c315f0..c683baa3847f 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -324,6 +324,13 @@ int br_boolopt_multi_toggle(struct net_bridge *br, int err = 0; int opt_id; + opt_id = find_next_bit(&bitmap, BITS_PER_LONG, BR_BOOLOPT_MAX); + if (opt_id != BITS_PER_LONG) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Unknown boolean option %d", + opt_id); + return -EINVAL; + } + for_each_set_bit(opt_id, &bitmap, BR_BOOLOPT_MAX) { bool on = !!(bm->optval & BIT(opt_id)); From d3b28612bc5500133260aaf36794a0a0c287d61b Mon Sep 17 00:00:00 2001 From: Jonas Rebmann Date: Fri, 5 Sep 2025 14:20:50 +0200 Subject: [PATCH 1211/1307] net: phy: NXP_TJA11XX: Update Kconfig with TJA1102 support Update the Kconfig description to indicate support for the TJA1102. Signed-off-by: Jonas Rebmann Link: https://patch.msgid.link/20250905-tja1102-kconfig-v1-1-a57e6ac4e264@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 28acc6392cfc..392749aae54d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -361,7 +361,7 @@ config NXP_TJA11XX_PHY tristate "NXP TJA11xx PHYs support" depends on HWMON help - Currently supports the NXP TJA1100 and TJA1101 PHY. + Currently supports the NXP TJA1100, TJA1101 and TJA1102 PHYs. config NCN26000_PHY tristate "Onsemi 10BASE-T1S Ethernet PHY" From 7989fdce69ec0a928e136477da2aa208a191fba2 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Fri, 22 Aug 2025 15:55:16 -0700 Subject: [PATCH 1212/1307] percpu: fix race on alloc failed warning limit The 'allocation failed, ...' warning messages can cause unlimited log spam, contrary to the implementation's intent. The warn_limit variable is accessed without synchronization. If more than threads enter the warning path at the same time, the variable will get decremented past 0. Once it becomes negative, the non-zero check will always return true leading to unlimited log spam. Use atomic operation to access warn_limit and change condition to test for non-negative (>= 0) - atomic_dec_if_positive will return -1 once warn_limit becomes 0. Continue to print disable message alongside the last warning. While the change cited in Fixes is only adjacent, the warning limit implementation was correct before it. Only non-atomic allocations were considered for warnings, and those happened to hold pcpu_alloc_mutex while accessing warn_limit. [vdumitrescu@nvidia.com: prevent warn_limit from going negative, per Christoph Lameter] Link: https://lkml.kernel.org/r/ee87cc59-2717-4dbb-8052-1d2692c5aaaa@nvidia.com Link: https://lkml.kernel.org/r/ab22061a-a62f-4429-945b-744e5cc4ba35@nvidia.com Fixes: f7d77dfc91f7 ("mm/percpu.c: print error message too if atomic alloc failed") Signed-off-by: Vlad Dumitrescu Reviewed-by: Baoquan He Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Tejun Heo Signed-off-by: Andrew Morton --- mm/percpu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index a56f35dcc417..81462ce5866e 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1734,7 +1734,7 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, bool is_atomic; bool do_warn; struct obj_cgroup *objcg = NULL; - static int warn_limit = 10; + static atomic_t warn_limit = ATOMIC_INIT(10); struct pcpu_chunk *chunk, *next; const char *err; int slot, off, cpu, ret; @@ -1904,13 +1904,17 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, fail: trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); - if (do_warn && warn_limit) { - pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", - size, align, is_atomic, err); - if (!is_atomic) - dump_stack(); - if (!--warn_limit) - pr_info("limit reached, disable warning\n"); + if (do_warn) { + int remaining = atomic_dec_if_positive(&warn_limit); + + if (remaining >= 0) { + pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", + size, align, is_atomic, err); + if (!is_atomic) + dump_stack(); + if (remaining == 0) + pr_info("limit reached, disable warning\n"); + } } if (is_atomic) { From 78d2d32f0b789d67cbe5cfea0c0714cb2446c37e Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Thu, 28 Aug 2025 14:26:56 +0000 Subject: [PATCH 1213/1307] mm/mremap: fix regression in vrm->new_addr check Commit 3215eaceca87 ("mm/mremap: refactor initial parameter sanity checks") moved the sanity check for vrm->new_addr from mremap_to() to check_mremap_params(). However, this caused a regression as vrm->new_addr is now checked even when MREMAP_FIXED and MREMAP_DONTUNMAP flags are not specified. In this case, vrm->new_addr can be garbage and create unexpected failures. Fix this by moving the new_addr check after the vrm_implies_new_addr() guard. This ensures that the new_addr is only checked when the user has specified one explicitly. Link: https://lkml.kernel.org/r/20250828142657.770502-1-cmllamas@google.com Fixes: 3215eaceca87 ("mm/mremap: refactor initial parameter sanity checks") Signed-off-by: Carlos Llamas Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Reviewed-by: Lorenzo Stoakes Cc: Carlos Llamas Cc: Jann Horn Signed-off-by: Andrew Morton --- mm/mremap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index e618a706aff5..35de0a7b910e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1774,15 +1774,18 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm) if (!vrm->new_len) return -EINVAL; - /* Is the new length or address silly? */ - if (vrm->new_len > TASK_SIZE || - vrm->new_addr > TASK_SIZE - vrm->new_len) + /* Is the new length silly? */ + if (vrm->new_len > TASK_SIZE) return -EINVAL; /* Remainder of checks are for cases with specific new_addr. */ if (!vrm_implies_new_addr(vrm)) return 0; + /* Is the new address silly? */ + if (vrm->new_addr > TASK_SIZE - vrm->new_len) + return -EINVAL; + /* The new address must be page-aligned. */ if (offset_in_page(vrm->new_addr)) return -EINVAL; From d613f53c83ec47089c4e25859d5e8e0359f6f8da Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 28 Aug 2025 10:46:18 +0800 Subject: [PATCH 1214/1307] mm/memory-failure: fix VM_BUG_ON_PAGE(PagePoisoned(page)) when unpoison memory When I did memory failure tests, below panic occurs: page dumped because: VM_BUG_ON_PAGE(PagePoisoned(page)) kernel BUG at include/linux/page-flags.h:616! Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 720 Comm: bash Not tainted 6.10.0-rc1-00195-g148743902568 #40 RIP: 0010:unpoison_memory+0x2f3/0x590 RSP: 0018:ffffa57fc8787d60 EFLAGS: 00000246 RAX: 0000000000000037 RBX: 0000000000000009 RCX: ffff9be25fcdc9c8 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff9be25fcdc9c0 RBP: 0000000000300000 R08: ffffffffb4956f88 R09: 0000000000009ffb R10: 0000000000000284 R11: ffffffffb4926fa0 R12: ffffe6b00c000000 R13: ffff9bdb453dfd00 R14: 0000000000000000 R15: fffffffffffffffe FS: 00007f08f04e4740(0000) GS:ffff9be25fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000564787a30410 CR3: 000000010d4e2000 CR4: 00000000000006f0 Call Trace: unpoison_memory+0x2f3/0x590 simple_attr_write_xsigned.constprop.0.isra.0+0xb3/0x110 debugfs_attr_write+0x42/0x60 full_proxy_write+0x5b/0x80 vfs_write+0xd5/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xb9/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f08f0314887 RSP: 002b:00007ffece710078 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f08f0314887 RDX: 0000000000000009 RSI: 0000564787a30410 RDI: 0000000000000001 RBP: 0000564787a30410 R08: 000000000000fefe R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000009 R13: 00007f08f041b780 R14: 00007f08f0417600 R15: 00007f08f0416a00 Modules linked in: hwpoison_inject ---[ end trace 0000000000000000 ]--- RIP: 0010:unpoison_memory+0x2f3/0x590 RSP: 0018:ffffa57fc8787d60 EFLAGS: 00000246 RAX: 0000000000000037 RBX: 0000000000000009 RCX: ffff9be25fcdc9c8 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff9be25fcdc9c0 RBP: 0000000000300000 R08: ffffffffb4956f88 R09: 0000000000009ffb R10: 0000000000000284 R11: ffffffffb4926fa0 R12: ffffe6b00c000000 R13: ffff9bdb453dfd00 R14: 0000000000000000 R15: fffffffffffffffe FS: 00007f08f04e4740(0000) GS:ffff9be25fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000564787a30410 CR3: 000000010d4e2000 CR4: 00000000000006f0 Kernel panic - not syncing: Fatal exception Kernel Offset: 0x31c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception ]--- The root cause is that unpoison_memory() tries to check the PG_HWPoison flags of an uninitialized page. So VM_BUG_ON_PAGE(PagePoisoned(page)) is triggered. This can be reproduced by below steps: 1.Offline memory block: echo offline > /sys/devices/system/memory/memory12/state 2.Get offlined memory pfn: page-types -b n -rlN 3.Write pfn to unpoison-pfn echo > /sys/kernel/debug/hwpoison/unpoison-pfn This scenario can be identified by pfn_to_online_page() returning NULL. And ZONE_DEVICE pages are never expected, so we can simply fail if pfn_to_online_page() == NULL to fix the bug. Link: https://lkml.kernel.org/r/20250828024618.1744895-1-linmiaohe@huawei.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") Signed-off-by: Miaohe Lin Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 10b3c281c2ae..df6ee59527dd 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2568,10 +2568,9 @@ int unpoison_memory(unsigned long pfn) static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - if (!pfn_valid(pfn)) - return -ENXIO; - - p = pfn_to_page(pfn); + p = pfn_to_online_page(pfn); + if (!p) + return -EIO; folio = page_folio(p); mutex_lock(&mf_mutex); From 04100f775c2ea501927f508f17ad824ad1f23c8d Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Fri, 29 Aug 2025 10:18:15 -0500 Subject: [PATCH 1215/1307] ocfs2: fix recursive semaphore deadlock in fiemap call syzbot detected a OCFS2 hang due to a recursive semaphore on a FS_IOC_FIEMAP of the extent list on a specially crafted mmap file. context_switch kernel/sched/core.c:5357 [inline] __schedule+0x1798/0x4cc0 kernel/sched/core.c:6961 __schedule_loop kernel/sched/core.c:7043 [inline] schedule+0x165/0x360 kernel/sched/core.c:7058 schedule_preempt_disabled+0x13/0x30 kernel/sched/core.c:7115 rwsem_down_write_slowpath+0x872/0xfe0 kernel/locking/rwsem.c:1185 __down_write_common kernel/locking/rwsem.c:1317 [inline] __down_write kernel/locking/rwsem.c:1326 [inline] down_write+0x1ab/0x1f0 kernel/locking/rwsem.c:1591 ocfs2_page_mkwrite+0x2ff/0xc40 fs/ocfs2/mmap.c:142 do_page_mkwrite+0x14d/0x310 mm/memory.c:3361 wp_page_shared mm/memory.c:3762 [inline] do_wp_page+0x268d/0x5800 mm/memory.c:3981 handle_pte_fault mm/memory.c:6068 [inline] __handle_mm_fault+0x1033/0x5440 mm/memory.c:6195 handle_mm_fault+0x40a/0x8e0 mm/memory.c:6364 do_user_addr_fault+0x764/0x1390 arch/x86/mm/fault.c:1387 handle_page_fault arch/x86/mm/fault.c:1476 [inline] exc_page_fault+0x76/0xf0 arch/x86/mm/fault.c:1532 asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:623 RIP: 0010:copy_user_generic arch/x86/include/asm/uaccess_64.h:126 [inline] RIP: 0010:raw_copy_to_user arch/x86/include/asm/uaccess_64.h:147 [inline] RIP: 0010:_inline_copy_to_user include/linux/uaccess.h:197 [inline] RIP: 0010:_copy_to_user+0x85/0xb0 lib/usercopy.c:26 Code: e8 00 bc f7 fc 4d 39 fc 72 3d 4d 39 ec 77 38 e8 91 b9 f7 fc 4c 89 f7 89 de e8 47 25 5b fd 0f 01 cb 4c 89 ff 48 89 d9 4c 89 f6 a4 0f 1f 00 48 89 cb 0f 01 ca 48 89 d8 5b 41 5c 41 5d 41 5e 41 RSP: 0018:ffffc9000403f950 EFLAGS: 00050256 RAX: ffffffff84c7f101 RBX: 0000000000000038 RCX: 0000000000000038 RDX: 0000000000000000 RSI: ffffc9000403f9e0 RDI: 0000200000000060 RBP: ffffc9000403fa90 R08: ffffc9000403fa17 R09: 1ffff92000807f42 R10: dffffc0000000000 R11: fffff52000807f43 R12: 0000200000000098 R13: 00007ffffffff000 R14: ffffc9000403f9e0 R15: 0000200000000060 copy_to_user include/linux/uaccess.h:225 [inline] fiemap_fill_next_extent+0x1c0/0x390 fs/ioctl.c:145 ocfs2_fiemap+0x888/0xc90 fs/ocfs2/extent_map.c:806 ioctl_fiemap fs/ioctl.c:220 [inline] do_vfs_ioctl+0x1173/0x1430 fs/ioctl.c:532 __do_sys_ioctl fs/ioctl.c:596 [inline] __se_sys_ioctl+0x82/0x170 fs/ioctl.c:584 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f5f13850fd9 RSP: 002b:00007ffe3b3518b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000200000000000 RCX: 00007f5f13850fd9 RDX: 0000200000000040 RSI: 00000000c020660b RDI: 0000000000000004 RBP: 6165627472616568 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffe3b3518f0 R13: 00007ffe3b351b18 R14: 431bde82d7b634db R15: 00007f5f1389a03b ocfs2_fiemap() takes a read lock of the ip_alloc_sem semaphore (since v2.6.22-527-g7307de80510a) and calls fiemap_fill_next_extent() to read the extent list of this running mmap executable. The user supplied buffer to hold the fiemap information page faults calling ocfs2_page_mkwrite() which will take a write lock (since v2.6.27-38-g00dc417fa3e7) of the same semaphore. This recursive semaphore will hold filesystem locks and causes a hang of the fileystem. The ip_alloc_sem protects the inode extent list and size. Release the read semphore before calling fiemap_fill_next_extent() in ocfs2_fiemap() and ocfs2_fiemap_inline(). This does an unnecessary semaphore lock/unlock on the last extent but simplifies the error path. Link: https://lkml.kernel.org/r/61d1a62b-2631-4f12-81e2-cd689914360b@oracle.com Fixes: 00dc417fa3e7 ("ocfs2: fiemap support") Signed-off-by: Mark Tinguely Reported-by: syzbot+541dcc6ee768f77103e7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=541dcc6ee768f77103e7 Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/extent_map.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 930150ed5db1..ef147e8b3271 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -706,6 +706,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, * it not only handles the fiemap for inlined files, but also deals * with the fast symlink, cause they have no difference for extent * mapping per se. + * + * Must be called with ip_alloc_sem semaphore held. */ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, struct fiemap_extent_info *fieinfo, @@ -717,6 +719,7 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, u64 phys; u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; struct ocfs2_inode_info *oi = OCFS2_I(inode); + lockdep_assert_held_read(&oi->ip_alloc_sem); di = (struct ocfs2_dinode *)di_bh->b_data; if (ocfs2_inode_is_fast_symlink(inode)) @@ -732,8 +735,11 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); + /* Release the ip_alloc_sem to prevent deadlock on page fault */ + up_read(&OCFS2_I(inode)->ip_alloc_sem); ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, flags); + down_read(&OCFS2_I(inode)->ip_alloc_sem); if (ret < 0) return ret; } @@ -802,9 +808,11 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; - + /* Release the ip_alloc_sem to prevent deadlock on page fault */ + up_read(&OCFS2_I(inode)->ip_alloc_sem); ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, len_bytes, fe_flags); + down_read(&OCFS2_I(inode)->ip_alloc_sem); if (ret) break; From 79357cd06d41d0f5a11b17d7c86176e395d10ef2 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Sun, 31 Aug 2025 14:10:58 +0200 Subject: [PATCH 1216/1307] mm/vmalloc, mm/kasan: respect gfp mask in kasan_populate_vmalloc() kasan_populate_vmalloc() and its helpers ignore the caller's gfp_mask and always allocate memory using the hardcoded GFP_KERNEL flag. This makes them inconsistent with vmalloc(), which was recently extended to support GFP_NOFS and GFP_NOIO allocations. Page table allocations performed during shadow population also ignore the external gfp_mask. To preserve the intended semantics of GFP_NOFS and GFP_NOIO, wrap the apply_to_page_range() calls into the appropriate memalloc scope. xfs calls vmalloc with GFP_NOFS, so this bug could lead to deadlock. There was a report here https://lkml.kernel.org/r/686ea951.050a0220.385921.0016.GAE@google.com This patch: - Extends kasan_populate_vmalloc() and helpers to take gfp_mask; - Passes gfp_mask down to alloc_pages_bulk() and __get_free_page(); - Enforces GFP_NOFS/NOIO semantics with memalloc_*_save()/restore() around apply_to_page_range(); - Updates vmalloc.c and percpu allocator call sites accordingly. Link: https://lkml.kernel.org/r/20250831121058.92971-1-urezki@gmail.com Fixes: 451769ebb7e7 ("mm/vmalloc: alloc GFP_NO{FS,IO} for vmalloc") Signed-off-by: Uladzislau Rezki (Sony) Reported-by: syzbot+3470c9ffee63e4abafeb@syzkaller.appspotmail.com Reviewed-by: Andrey Ryabinin Cc: Baoquan He Cc: Michal Hocko Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- include/linux/kasan.h | 6 +++--- mm/kasan/shadow.c | 31 ++++++++++++++++++++++++------- mm/vmalloc.c | 8 ++++---- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 890011071f2b..fe5ce9215821 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -562,7 +562,7 @@ static inline void kasan_init_hw_tags(void) { } #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); -int kasan_populate_vmalloc(unsigned long addr, unsigned long size); +int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end, @@ -574,7 +574,7 @@ static inline void kasan_populate_early_vm_area_shadow(void *start, unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, - unsigned long size) + unsigned long size, gfp_t gfp_mask) { return 0; } @@ -610,7 +610,7 @@ static __always_inline void kasan_poison_vmalloc(const void *start, static inline void kasan_populate_early_vm_area_shadow(void *start, unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, - unsigned long size) + unsigned long size, gfp_t gfp_mask) { return 0; } diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index e2ceebf737ef..11d472a5c4e8 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -336,13 +336,13 @@ static void ___free_pages_bulk(struct page **pages, int nr_pages) } } -static int ___alloc_pages_bulk(struct page **pages, int nr_pages) +static int ___alloc_pages_bulk(struct page **pages, int nr_pages, gfp_t gfp_mask) { unsigned long nr_populated, nr_total = nr_pages; struct page **page_array = pages; while (nr_pages) { - nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); + nr_populated = alloc_pages_bulk(gfp_mask, nr_pages, pages); if (!nr_populated) { ___free_pages_bulk(page_array, nr_total - nr_pages); return -ENOMEM; @@ -354,25 +354,42 @@ static int ___alloc_pages_bulk(struct page **pages, int nr_pages) return 0; } -static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) +static int __kasan_populate_vmalloc(unsigned long start, unsigned long end, gfp_t gfp_mask) { unsigned long nr_pages, nr_total = PFN_UP(end - start); struct vmalloc_populate_data data; + unsigned int flags; int ret = 0; - data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO); + data.pages = (struct page **)__get_free_page(gfp_mask | __GFP_ZERO); if (!data.pages) return -ENOMEM; while (nr_total) { nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0])); - ret = ___alloc_pages_bulk(data.pages, nr_pages); + ret = ___alloc_pages_bulk(data.pages, nr_pages, gfp_mask); if (ret) break; data.start = start; + + /* + * page tables allocations ignore external gfp mask, enforce it + * by the scope API + */ + if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO) + flags = memalloc_nofs_save(); + else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0) + flags = memalloc_noio_save(); + ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE, kasan_populate_vmalloc_pte, &data); + + if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO) + memalloc_nofs_restore(flags); + else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0) + memalloc_noio_restore(flags); + ___free_pages_bulk(data.pages, nr_pages); if (ret) break; @@ -386,7 +403,7 @@ static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) return ret; } -int kasan_populate_vmalloc(unsigned long addr, unsigned long size) +int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask) { unsigned long shadow_start, shadow_end; int ret; @@ -415,7 +432,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) shadow_start = PAGE_ALIGN_DOWN(shadow_start); shadow_end = PAGE_ALIGN(shadow_end); - ret = __kasan_populate_vmalloc(shadow_start, shadow_end); + ret = __kasan_populate_vmalloc(shadow_start, shadow_end, gfp_mask); if (ret) return ret; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6dbcdceecae1..5edd536ba9d2 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2026,6 +2026,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, if (unlikely(!vmap_initialized)) return ERR_PTR(-EBUSY); + /* Only reclaim behaviour flags are relevant. */ + gfp_mask = gfp_mask & GFP_RECLAIM_MASK; might_sleep(); /* @@ -2038,8 +2040,6 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, */ va = node_alloc(size, align, vstart, vend, &addr, &vn_id); if (!va) { - gfp_mask = gfp_mask & GFP_RECLAIM_MASK; - va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node); if (unlikely(!va)) return ERR_PTR(-ENOMEM); @@ -2089,7 +2089,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, BUG_ON(va->va_start < vstart); BUG_ON(va->va_end > vend); - ret = kasan_populate_vmalloc(addr, size); + ret = kasan_populate_vmalloc(addr, size, gfp_mask); if (ret) { free_vmap_area(va); return ERR_PTR(ret); @@ -4826,7 +4826,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, /* populate the kasan shadow space */ for (area = 0; area < nr_vms; area++) { - if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) + if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area], GFP_KERNEL)) goto err_free_shadow; } From 3fac212fe489aa0dbe8d80a42a7809840ca7b0f9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 2 Sep 2025 15:49:26 -0700 Subject: [PATCH 1217/1307] compiler-clang.h: define __SANITIZE_*__ macros only when undefined Clang 22 recently added support for defining __SANITIZE__ macros similar to GCC [1], which causes warnings (or errors with CONFIG_WERROR=y or W=e) with the existing defines that the kernel creates to emulate this behavior with existing clang versions. In file included from :3: In file included from include/linux/compiler_types.h:171: include/linux/compiler-clang.h:37:9: error: '__SANITIZE_THREAD__' macro redefined [-Werror,-Wmacro-redefined] 37 | #define __SANITIZE_THREAD__ | ^ :352:9: note: previous definition is here 352 | #define __SANITIZE_THREAD__ 1 | ^ Refactor compiler-clang.h to only define the sanitizer macros when they are undefined and adjust the rest of the code to use these macros for checking if the sanitizers are enabled, clearing up the warnings and allowing the kernel to easily drop these defines when the minimum supported version of LLVM for building the kernel becomes 22.0.0 or newer. Link: https://lkml.kernel.org/r/20250902-clang-update-sanitize-defines-v1-1-cf3702ca3d92@kernel.org Link: https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c [1] Signed-off-by: Nathan Chancellor Reviewed-by: Justin Stitt Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Bill Wendling Cc: Dmitriy Vyukov Cc: Marco Elver Cc: Signed-off-by: Andrew Morton --- include/linux/compiler-clang.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index fa4ffe037bc7..8720a0705900 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -18,23 +18,42 @@ #define KASAN_ABI_VERSION 5 /* + * Clang 22 added preprocessor macros to match GCC, in hopes of eventually + * dropping __has_feature support for sanitizers: + * https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c + * Create these macros for older versions of clang so that it is easy to clean + * up once the minimum supported version of LLVM for building the kernel always + * creates these macros. + * * Note: Checking __has_feature(*_sanitizer) is only true if the feature is * enabled. Therefore it is not required to additionally check defined(CONFIG_*) * to avoid adding redundant attributes in other configurations. */ - -#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) -/* Emulate GCC's __SANITIZE_ADDRESS__ flag */ +#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__) #define __SANITIZE_ADDRESS__ +#endif +#if __has_feature(hwaddress_sanitizer) && !defined(__SANITIZE_HWADDRESS__) +#define __SANITIZE_HWADDRESS__ +#endif +#if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__) +#define __SANITIZE_THREAD__ +#endif + +/* + * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel. + */ +#ifdef __SANITIZE_HWADDRESS__ +#define __SANITIZE_ADDRESS__ +#endif + +#ifdef __SANITIZE_ADDRESS__ #define __no_sanitize_address \ __attribute__((no_sanitize("address", "hwaddress"))) #else #define __no_sanitize_address #endif -#if __has_feature(thread_sanitizer) -/* emulate gcc's __SANITIZE_THREAD__ flag */ -#define __SANITIZE_THREAD__ +#ifdef __SANITIZE_THREAD__ #define __no_sanitize_thread \ __attribute__((no_sanitize("thread"))) #else From 0ce9398aa0830f15f92bbed73853f9861c3e74ff Mon Sep 17 00:00:00 2001 From: wangzijie Date: Thu, 4 Sep 2025 21:57:15 +0800 Subject: [PATCH 1218/1307] proc: fix type confusion in pde_set_flags() Commit 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc files") missed a key part in the definition of proc_dir_entry: union { const struct proc_ops *proc_ops; const struct file_operations *proc_dir_ops; }; So dereference of ->proc_ops assumes it is a proc_ops structure results in type confusion and make NULL check for 'proc_ops' not work for proc dir. Add !S_ISDIR(dp->mode) test before calling pde_set_flags() to fix it. Link: https://lkml.kernel.org/r/20250904135715.3972782-1-wangzijie1@honor.com Fixes: 2ce3d282bd50 ("proc: fix missing pde_set_flags() for net proc files") Signed-off-by: wangzijie Reported-by: Brad Spengler Closes: https://lore.kernel.org/all/20250903065758.3678537-1-wangzijie1@honor.com/ Cc: Alexey Dobriyan Cc: Al Viro Cc: Christian Brauner Cc: Jiri Slaby Cc: Stefano Brivio Cc: Signed-off-by: Andrew Morton --- fs/proc/generic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index bd0c099cfdd2..176281112273 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -393,7 +393,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, if (proc_alloc_inum(&dp->low_ino)) goto out_free_entry; - pde_set_flags(dp); + if (!S_ISDIR(dp->mode)) + pde_set_flags(dp); write_lock(&proc_subdir_lock); dp->parent = dir; From 3260a3f0828e06f5f13fac69fb1999a6d60d9cff Mon Sep 17 00:00:00 2001 From: Stanislav Fort Date: Fri, 5 Sep 2025 13:10:46 +0300 Subject: [PATCH 1219/1307] mm/damon/sysfs: fix use-after-free in state_show() state_show() reads kdamond->damon_ctx without holding damon_sysfs_lock. This allows a use-after-free race: CPU 0 CPU 1 ----- ----- state_show() damon_sysfs_turn_damon_on() ctx = kdamond->damon_ctx; mutex_lock(&damon_sysfs_lock); damon_destroy_ctx(kdamond->damon_ctx); kdamond->damon_ctx = NULL; mutex_unlock(&damon_sysfs_lock); damon_is_running(ctx); /* ctx is freed */ mutex_lock(&ctx->kdamond_lock); /* UAF */ (The race can also occur with damon_sysfs_kdamonds_rm_dirs() and damon_sysfs_kdamond_release(), which free or replace the context under damon_sysfs_lock.) Fix by taking damon_sysfs_lock before dereferencing the context, mirroring the locking used in pid_show(). The bug has existed since state_show() first accessed kdamond->damon_ctx. Link: https://lkml.kernel.org/r/20250905101046.2288-1-disclosure@aisle.com Fixes: a61ea561c871 ("mm/damon/sysfs: link DAMON for virtual address spaces monitoring") Signed-off-by: Stanislav Fort Reported-by: Stanislav Fort Reviewed-by: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 6d2b0dab50cb..7b9254cadd5f 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1260,14 +1260,18 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, { struct damon_sysfs_kdamond *kdamond = container_of(kobj, struct damon_sysfs_kdamond, kobj); - struct damon_ctx *ctx = kdamond->damon_ctx; - bool running; + struct damon_ctx *ctx; + bool running = false; - if (!ctx) - running = false; - else + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + + ctx = kdamond->damon_ctx; + if (ctx) running = damon_is_running(ctx); + mutex_unlock(&damon_sysfs_lock); + return sysfs_emit(buf, "%s\n", running ? damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] : damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_OFF]); From a68172d95c2845d2b5455b072b4ff51ba32650e9 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Fri, 5 Sep 2025 12:15:57 +0300 Subject: [PATCH 1220/1307] MAINTAINERS: add tree entry to numa memblocks and emulation block Link: https://lkml.kernel.org/r/20250905091557.3529937-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Vlastimil Babka Acked-by: Lorenzo Stoakes Acked-by: Liam R. Howlett Cc: David Hildenbrand Cc: Michal Hocko Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6dcfbd11efef..fbdbf7c012a0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16127,6 +16127,7 @@ M: Andrew Morton M: Mike Rapoport L: linux-mm@kvack.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git F: include/linux/numa_memblks.h F: mm/numa.c F: mm/numa_emulation.c From cfa7b7659757f8d0fc4914429efa90d0d2577dd7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 5 Sep 2025 13:41:49 +0300 Subject: [PATCH 1221/1307] drm/i915/power: fix size for for_each_set_bit() in abox iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit for_each_set_bit() expects size to be in bits, not bytes. The abox mask iteration uses bytes, but it works by coincidence, because the local variable holding the mask is unsigned long, and the mask only ever has bit 2 as the highest bit. Using a smaller type could lead to subtle and very hard to track bugs. Fixes: 62afef2811e4 ("drm/i915/rkl: RKL uses ABOX0 for pixel transfers") Cc: Ville Syrjälä Cc: Matt Roper Cc: stable@vger.kernel.org # v5.9+ Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20250905104149.1144751-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 7ea3baa6efe4bb93d11e1c0e6528b1468d7debf6) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display_power.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 273054c22325..c92f3e736228 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -1172,7 +1172,7 @@ static void icl_mbus_init(struct intel_display *display) if (DISPLAY_VER(display) == 12) abox_regs |= BIT(0); - for_each_set_bit(i, &abox_regs, sizeof(abox_regs)) + for_each_set_bit(i, &abox_regs, BITS_PER_TYPE(abox_regs)) intel_de_rmw(display, MBUS_ABOX_CTL(i), mask, val); } @@ -1629,11 +1629,11 @@ static void tgl_bw_buddy_init(struct intel_display *display) if (table[config].page_mask == 0) { drm_dbg_kms(display->drm, "Unknown memory configuration; disabling address buddy logic.\n"); - for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) + for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask)) intel_de_write(display, BW_BUDDY_CTL(i), BW_BUDDY_DISABLE); } else { - for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) { + for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask)) { intel_de_write(display, BW_BUDDY_PAGE_MASK(i), table[config].page_mask); From bf59028ea8d42e8d10bb3d847c9982488ee9e3a0 Mon Sep 17 00:00:00 2001 From: Oscar Maes Date: Tue, 2 Sep 2025 17:02:40 +0200 Subject: [PATCH 1222/1307] selftests: net: add test for destination in broadcast packets Add test to check the broadcast ethernet destination field is set correctly. This test sends a broadcast ping, captures it using tcpdump and ensures that all bits of the 6 octet ethernet destination address are correctly set by examining the output capture file. Co-developed-by: Brett A C Sheffield Signed-off-by: Brett A C Sheffield Signed-off-by: Oscar Maes Link: https://patch.msgid.link/20250902150240.4272-1-oscmaes92@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/broadcast_ether_dst.sh | 83 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100755 tools/testing/selftests/net/broadcast_ether_dst.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index c7e03e1d6f63..2b31d4a93ad7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -116,6 +116,7 @@ TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh +TEST_PROGS += broadcast_ether_dst.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh new file mode 100755 index 000000000000..334a7eca8a80 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_ether_dst.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Brett A C Sheffield +# Author: Oscar Maes +# +# Ensure destination ethernet field is correctly set for +# broadcast packets + +source lib.sh + +CLIENT_IP4="192.168.0.1" +GW_IP4="192.168.0.2" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth \ + peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0 + + ip -net "${SERVER_NS}" link set link1 up + + ip -net "${CLIENT_NS}" route add default via "${GW_IP4}" + ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55 +} + +cleanup() { + rm -f "${CAPFILE}" "${OUTPUT}" + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +test_broadcast_ether_dst() { + local rc=0 + CAPFILE=$(mktemp -u cap.XXXXXXXXXX) + OUTPUT=$(mktemp -u out.XXXXXXXXXX) + + echo "Testing ethernet broadcast destination" + + # start tcpdump listening for icmp + # tcpdump will exit after receiving a single packet + # timeout will kill tcpdump if it is still running after 2s + timeout 2s ip netns exec "${CLIENT_NS}" \ + tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" & + pid=$! + slowwait 1 grep -qs "listening" "${OUTPUT}" + + # send broadcast ping + ip netns exec "${CLIENT_NS}" \ + ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null + + # wait for tcpdump for exit after receiving packet + wait "${pid}" + + # compare ethernet destination field to ff:ff:ff:ff:ff:ff + ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \ + awk '{sub(/,/,"",$3); print $3}') + if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \ + "got ${ether_dst}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup +test_broadcast_ether_dst + +exit $? From d2e1b84c5141ff2ad465279acfc3cf943c960b78 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 8 Sep 2025 15:15:51 -0700 Subject: [PATCH 1223/1307] fs/resctrl: Eliminate false positive lockdep warning when reading SNC counters Running resctrl_tests on an SNC-2 system with lockdep debugging enabled triggers several warnings with following trace: WARNING: CPU: 0 PID: 1914 at kernel/cpu.c:528 lockdep_assert_cpus_held ... Call Trace: __mon_event_count ? __lock_acquire ? __pfx___mon_event_count mon_event_count ? __pfx_smp_mon_event_count smp_mon_event_count smp_call_on_cpu_callback get_cpu_cacheinfo_level() called from __mon_event_count() requires CPU hotplug lock to be held. The hotplug lock is indeed held during this time, as confirmed by the lockdep_assert_cpus_held() within mon_event_read() that calls mon_event_count() via IPI, but the lockdep tracking is not able to follow the IPI. Fresh CPU cache information via get_cpu_cacheinfo_level() from __mon_event_count() was added to support the fix for the issue where resctrl inappropriately maintained links to L3 cache information that will be stale in the case when the associated CPU goes offline. Keep the cacheinfo ID in struct rdt_mon_domain to ensure that resctrl does not maintain stale cache information while CPUs can go offline. Return to using a pointer to the L3 cache information (struct cacheinfo) in struct rmid_read, rmid_read::ci. Initialize rmid_read::ci before the IPI where it is used. CPU hotplug lock is held across rmid_read::ci initialization and use to ensure that it points to accurate cache information. Fixes: 594902c986e2 ("x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem") Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov (AMD) --- fs/resctrl/ctrlmondata.c | 2 +- fs/resctrl/internal.h | 4 ++-- fs/resctrl/monitor.c | 6 ++---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index d98e0d2de09f..3c39cfacb251 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -625,11 +625,11 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) */ list_for_each_entry(d, &r->mon_domains, hdr.list) { if (d->ci_id == domid) { - rr.ci_id = d->ci_id; cpu = cpumask_any(&d->hdr.cpu_mask); ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); if (!ci) continue; + rr.ci = ci; mon_event_read(&rr, r, NULL, rdtgrp, &ci->shared_cpu_map, evtid, false); goto checkresult; diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 0a1eedba2b03..9a8cf6f11151 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -98,7 +98,7 @@ struct mon_data { * domains in @r sharing L3 @ci.id * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. - * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. + * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -112,7 +112,7 @@ struct rmid_read { struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; - unsigned int ci_id; + struct cacheinfo *ci; int err; u64 val; void *arch_mon_ctx; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index f5637855c3ac..7326c28a7908 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -361,7 +361,6 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); struct rdt_mon_domain *d; - struct cacheinfo *ci; struct mbm_state *m; int err, ret; u64 tval = 0; @@ -389,8 +388,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) } /* Summing domains that share a cache, must be on a CPU for that cache. */ - ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); - if (!ci || ci->id != rr->ci_id) + if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) return -EINVAL; /* @@ -402,7 +400,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) */ ret = -EINVAL; list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { - if (d->ci_id != rr->ci_id) + if (d->ci_id != rr->ci->id) continue; err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, rr->evtid, &tval, rr->arch_mon_ctx); From 2682e7a317504a9d81cbb397249d4299e84dfadd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Sep 2025 12:17:34 +0300 Subject: [PATCH 1224/1307] wifi: iwlwifi: fix 130/1030 configs The 130/1030 devices are really derivatives of 6030, with some small differences not pertaining to the MAC, so they must use the 6030 MAC config. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220472 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220517 Fixes: 35ac275ebe0c ("wifi: iwlwifi: cfg: finish config split") Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250909121728.8e4911f12528.I3aa7194012a4b584fbd5ddaa3a77e483280f1de4@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index f9e2095d6490..7e56e4ff7642 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -124,13 +124,13 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x0082, 0x1304, iwl6005_mac_cfg)},/* low 5GHz active */ {IWL_PCI_DEVICE(0x0082, 0x1305, iwl6005_mac_cfg)},/* high 5GHz active */ -/* 6x30 Series */ - {IWL_PCI_DEVICE(0x008A, 0x5305, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x008A, 0x5307, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x008A, 0x5325, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x008A, 0x5327, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x008B, 0x5315, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x008B, 0x5317, iwl1000_mac_cfg)}, +/* 1030/6x30 Series */ + {IWL_PCI_DEVICE(0x008A, 0x5305, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x008A, 0x5307, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x008A, 0x5325, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x008A, 0x5327, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x008B, 0x5315, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x008B, 0x5317, iwl6030_mac_cfg)}, {IWL_PCI_DEVICE(0x0090, 0x5211, iwl6030_mac_cfg)}, {IWL_PCI_DEVICE(0x0090, 0x5215, iwl6030_mac_cfg)}, {IWL_PCI_DEVICE(0x0090, 0x5216, iwl6030_mac_cfg)}, @@ -181,12 +181,12 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x08AE, 0x1027, iwl1000_mac_cfg)}, /* 130 Series WiFi */ - {IWL_PCI_DEVICE(0x0896, 0x5005, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x0896, 0x5007, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x0897, 0x5015, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x0897, 0x5017, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x0896, 0x5025, iwl1000_mac_cfg)}, - {IWL_PCI_DEVICE(0x0896, 0x5027, iwl1000_mac_cfg)}, + {IWL_PCI_DEVICE(0x0896, 0x5005, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x0896, 0x5007, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x0897, 0x5015, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x0897, 0x5017, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x0896, 0x5025, iwl6030_mac_cfg)}, + {IWL_PCI_DEVICE(0x0896, 0x5027, iwl6030_mac_cfg)}, /* 2x00 Series */ {IWL_PCI_DEVICE(0x0890, 0x4022, iwl2000_mac_cfg)}, From 15f519e9f883b316d86e2bb6b767a023aafd9d83 Mon Sep 17 00:00:00 2001 From: Alex Markuze Date: Tue, 12 Aug 2025 09:57:38 +0000 Subject: [PATCH 1225/1307] ceph: fix race condition validating r_parent before applying state Add validation to ensure the cached parent directory inode matches the directory info in MDS replies. This prevents client-side race conditions where concurrent operations (e.g. rename) cause r_parent to become stale between request initiation and reply processing, which could lead to applying state changes to incorrect directory inodes. [ idryomov: folded a kerneldoc fixup and a follow-up fix from Alex to move CEPH_CAP_PIN reference when r_parent is updated: When the parent directory lock is not held, req->r_parent can become stale and is updated to point to the correct inode. However, the associated CEPH_CAP_PIN reference was not being adjusted. The CEPH_CAP_PIN is a reference on an inode that is tracked for accounting purposes. Moving this pin is important to keep the accounting balanced. When the pin was not moved from the old parent to the new one, it created two problems: The reference on the old, stale parent was never released, causing a reference leak. A reference for the new parent was never acquired, creating the risk of a reference underflow later in ceph_mdsc_release_request(). This patch corrects the logic by releasing the pin from the old parent and acquiring it for the new parent when r_parent is switched. This ensures reference accounting stays balanced. ] Cc: stable@vger.kernel.org Signed-off-by: Alex Markuze Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/debugfs.c | 14 ++-- fs/ceph/dir.c | 17 ++--- fs/ceph/file.c | 24 +++--- fs/ceph/inode.c | 7 +- fs/ceph/mds_client.c | 172 ++++++++++++++++++++++++++----------------- fs/ceph/mds_client.h | 18 ++++- 6 files changed, 145 insertions(+), 107 deletions(-) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fdd404fc8112..f3fe786b4143 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -55,8 +55,6 @@ static int mdsc_show(struct seq_file *s, void *p) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct rb_node *rp; - int pathlen = 0; - u64 pathbase; char *path; mutex_lock(&mdsc->mutex); @@ -81,8 +79,8 @@ static int mdsc_show(struct seq_file *s, void *p) if (req->r_inode) { seq_printf(s, " #%llx", ceph_ino(req->r_inode)); } else if (req->r_dentry) { - path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, - &pathbase, 0); + struct ceph_path_info path_info; + path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0); if (IS_ERR(path)) path = NULL; spin_lock(&req->r_dentry->d_lock); @@ -91,7 +89,7 @@ static int mdsc_show(struct seq_file *s, void *p) req->r_dentry, path ? path : ""); spin_unlock(&req->r_dentry->d_lock); - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); } else if (req->r_path1) { seq_printf(s, " #%llx/%s", req->r_ino1.ino, req->r_path1); @@ -100,8 +98,8 @@ static int mdsc_show(struct seq_file *s, void *p) } if (req->r_old_dentry) { - path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen, - &pathbase, 0); + struct ceph_path_info path_info; + path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &path_info, 0); if (IS_ERR(path)) path = NULL; spin_lock(&req->r_old_dentry->d_lock); @@ -111,7 +109,7 @@ static int mdsc_show(struct seq_file *s, void *p) req->r_old_dentry, path ? path : ""); spin_unlock(&req->r_old_dentry->d_lock); - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); } else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) { if (req->r_ino2.ino) seq_printf(s, " #%llx/%s", req->r_ino2.ino, diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 8478e7e75df6..32973c62c1a2 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1271,10 +1271,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, /* If op failed, mark everyone involved for errors */ if (result) { - int pathlen = 0; - u64 base = 0; - char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, - &base, 0); + struct ceph_path_info path_info = {0}; + char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); /* mark error on parent + clear complete */ mapping_set_error(req->r_parent->i_mapping, result); @@ -1288,8 +1286,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, mapping_set_error(req->r_old_inode->i_mapping, result); pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n", - base, IS_ERR(path) ? "<>" : path, result); - ceph_mdsc_free_path(path, pathlen); + path_info.vino.ino, IS_ERR(path) ? "<>" : path, result); + ceph_mdsc_free_path_info(&path_info); } out: iput(req->r_old_inode); @@ -1347,8 +1345,6 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) int err = -EROFS; int op; char *path; - int pathlen; - u64 pathbase; if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ @@ -1367,14 +1363,15 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) if (!dn) { try_async = false; } else { - path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); + struct ceph_path_info path_info; + path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0); if (IS_ERR(path)) { try_async = false; err = 0; } else { err = ceph_mds_check_access(mdsc, path, MAY_WRITE); } - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); dput(dn); /* For none EACCES cases will let the MDS do the mds auth check */ diff --git a/fs/ceph/file.c b/fs/ceph/file.c index c02f100f8552..978acd3d4b32 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -368,8 +368,6 @@ int ceph_open(struct inode *inode, struct file *file) int flags, fmode, wanted; struct dentry *dentry; char *path; - int pathlen; - u64 pathbase; bool do_sync = false; int mask = MAY_READ; @@ -399,14 +397,15 @@ int ceph_open(struct inode *inode, struct file *file) if (!dentry) { do_sync = true; } else { - path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); + struct ceph_path_info path_info; + path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); if (IS_ERR(path)) { do_sync = true; err = 0; } else { err = ceph_mds_check_access(mdsc, path, mask); } - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); dput(dentry); /* For none EACCES cases will let the MDS do the mds auth check */ @@ -614,15 +613,13 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, mapping_set_error(req->r_parent->i_mapping, result); if (result) { - int pathlen = 0; - u64 base = 0; - char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, - &base, 0); + struct ceph_path_info path_info = {0}; + char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0); pr_warn_client(cl, "async create failure path=(%llx)%s result=%d!\n", - base, IS_ERR(path) ? "<>" : path, result); - ceph_mdsc_free_path(path, pathlen); + path_info.vino.ino, IS_ERR(path) ? "<>" : path, result); + ceph_mdsc_free_path_info(&path_info); ceph_dir_clear_complete(req->r_parent); if (!d_unhashed(dentry)) @@ -791,8 +788,6 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, int mask; int err; char *path; - int pathlen; - u64 pathbase; doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", dir, ceph_vinop(dir), dentry, dentry, @@ -814,7 +809,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (!dn) { try_async = false; } else { - path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); + struct ceph_path_info path_info; + path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0); if (IS_ERR(path)) { try_async = false; err = 0; @@ -826,7 +822,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, mask |= MAY_WRITE; err = ceph_mds_check_access(mdsc, path, mask); } - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); dput(dn); /* For none EACCES cases will let the MDS do the mds auth check */ diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index fc543075b827..8ac89ce6435c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2487,22 +2487,21 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, int truncate_retry = 20; /* The RMW will take around 50ms */ struct dentry *dentry; char *path; - int pathlen; - u64 pathbase; bool do_sync = false; dentry = d_find_alias(inode); if (!dentry) { do_sync = true; } else { - path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); + struct ceph_path_info path_info; + path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); if (IS_ERR(path)) { do_sync = true; err = 0; } else { err = ceph_mds_check_access(mdsc, path, MAY_WRITE); } - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); dput(dentry); /* For none EACCES cases will let the MDS do the mds auth check */ diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0f497c39ff82..3bc72b47fe4d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2681,8 +2681,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) * ceph_mdsc_build_path - build a path string to a given dentry * @mdsc: mds client * @dentry: dentry to which path should be built - * @plen: returned length of string - * @pbase: returned base inode number + * @path_info: output path, length, base ino+snap, and freepath ownership flag * @for_wire: is this path going to be sent to the MDS? * * Build a string that represents the path to the dentry. This is mostly called @@ -2700,7 +2699,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) * foo/.snap/bar -> foo//bar */ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, - int *plen, u64 *pbase, int for_wire) + struct ceph_path_info *path_info, int for_wire) { struct ceph_client *cl = mdsc->fsc->client; struct dentry *cur; @@ -2810,16 +2809,28 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, return ERR_PTR(-ENAMETOOLONG); } - *pbase = base; - *plen = PATH_MAX - 1 - pos; + /* Initialize the output structure */ + memset(path_info, 0, sizeof(*path_info)); + + path_info->vino.ino = base; + path_info->pathlen = PATH_MAX - 1 - pos; + path_info->path = path + pos; + path_info->freepath = true; + + /* Set snap from dentry if available */ + if (d_inode(dentry)) + path_info->vino.snap = ceph_snap(d_inode(dentry)); + else + path_info->vino.snap = CEPH_NOSNAP; + doutc(cl, "on %p %d built %llx '%.*s'\n", dentry, d_count(dentry), - base, *plen, path + pos); + base, PATH_MAX - 1 - pos, path + pos); return path + pos; } static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry, - struct inode *dir, const char **ppath, int *ppathlen, - u64 *pino, bool *pfreepath, bool parent_locked) + struct inode *dir, struct ceph_path_info *path_info, + bool parent_locked) { char *path; @@ -2828,41 +2839,47 @@ static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry dir = d_inode_rcu(dentry->d_parent); if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP && !IS_ENCRYPTED(dir)) { - *pino = ceph_ino(dir); + path_info->vino.ino = ceph_ino(dir); + path_info->vino.snap = ceph_snap(dir); rcu_read_unlock(); - *ppath = dentry->d_name.name; - *ppathlen = dentry->d_name.len; + path_info->path = dentry->d_name.name; + path_info->pathlen = dentry->d_name.len; + path_info->freepath = false; return 0; } rcu_read_unlock(); - path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1); if (IS_ERR(path)) return PTR_ERR(path); - *ppath = path; - *pfreepath = true; + /* + * ceph_mdsc_build_path already fills path_info, including snap handling. + */ return 0; } -static int build_inode_path(struct inode *inode, - const char **ppath, int *ppathlen, u64 *pino, - bool *pfreepath) +static int build_inode_path(struct inode *inode, struct ceph_path_info *path_info) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct dentry *dentry; char *path; if (ceph_snap(inode) == CEPH_NOSNAP) { - *pino = ceph_ino(inode); - *ppathlen = 0; + path_info->vino.ino = ceph_ino(inode); + path_info->vino.snap = ceph_snap(inode); + path_info->pathlen = 0; + path_info->freepath = false; return 0; } dentry = d_find_alias(inode); - path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1); dput(dentry); if (IS_ERR(path)) return PTR_ERR(path); - *ppath = path; - *pfreepath = true; + /* + * ceph_mdsc_build_path already fills path_info, including snap from dentry. + * Override with inode's snap since that's what this function is for. + */ + path_info->vino.snap = ceph_snap(inode); return 0; } @@ -2872,26 +2889,32 @@ static int build_inode_path(struct inode *inode, */ static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode, struct dentry *rdentry, struct inode *rdiri, - const char *rpath, u64 rino, const char **ppath, - int *pathlen, u64 *ino, bool *freepath, + const char *rpath, u64 rino, + struct ceph_path_info *path_info, bool parent_locked) { struct ceph_client *cl = mdsc->fsc->client; int r = 0; + /* Initialize the output structure */ + memset(path_info, 0, sizeof(*path_info)); + if (rinode) { - r = build_inode_path(rinode, ppath, pathlen, ino, freepath); + r = build_inode_path(rinode, path_info); doutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode), ceph_snap(rinode)); } else if (rdentry) { - r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino, - freepath, parent_locked); - doutc(cl, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); + r = build_dentry_path(mdsc, rdentry, rdiri, path_info, parent_locked); + doutc(cl, " dentry %p %llx/%.*s\n", rdentry, path_info->vino.ino, + path_info->pathlen, path_info->path); } else if (rpath || rino) { - *ino = rino; - *ppath = rpath; - *pathlen = rpath ? strlen(rpath) : 0; - doutc(cl, " path %.*s\n", *pathlen, rpath); + path_info->vino.ino = rino; + path_info->vino.snap = CEPH_NOSNAP; + path_info->path = rpath; + path_info->pathlen = rpath ? strlen(rpath) : 0; + path_info->freepath = false; + + doutc(cl, " path %.*s\n", path_info->pathlen, rpath); } return r; @@ -2968,11 +2991,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; struct ceph_mds_request_head_legacy *lhead; - const char *path1 = NULL; - const char *path2 = NULL; - u64 ino1 = 0, ino2 = 0; - int pathlen1 = 0, pathlen2 = 0; - bool freepath1 = false, freepath2 = false; + struct ceph_path_info path_info1 = {0}; + struct ceph_path_info path_info2 = {0}; struct dentry *old_dentry = NULL; int len; u16 releases; @@ -2982,25 +3002,49 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, u16 request_head_version = mds_supported_head_version(session); kuid_t caller_fsuid = req->r_cred->fsuid; kgid_t caller_fsgid = req->r_cred->fsgid; + bool parent_locked = test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, - req->r_parent, req->r_path1, req->r_ino1.ino, - &path1, &pathlen1, &ino1, &freepath1, - test_bit(CEPH_MDS_R_PARENT_LOCKED, - &req->r_req_flags)); + req->r_parent, req->r_path1, req->r_ino1.ino, + &path_info1, parent_locked); if (ret < 0) { msg = ERR_PTR(ret); goto out; } + /* + * When the parent directory's i_rwsem is *not* locked, req->r_parent may + * have become stale (e.g. after a concurrent rename) between the time the + * dentry was looked up and now. If we detect that the stored r_parent + * does not match the inode number we just encoded for the request, switch + * to the correct inode so that the MDS receives a valid parent reference. + */ + if (!parent_locked && req->r_parent && path_info1.vino.ino && + ceph_ino(req->r_parent) != path_info1.vino.ino) { + struct inode *old_parent = req->r_parent; + struct inode *correct_dir = ceph_get_inode(mdsc->fsc->sb, path_info1.vino, NULL); + if (!IS_ERR(correct_dir)) { + WARN_ONCE(1, "ceph: r_parent mismatch (had %llx wanted %llx) - updating\n", + ceph_ino(old_parent), path_info1.vino.ino); + /* + * Transfer CEPH_CAP_PIN from the old parent to the new one. + * The pin was taken earlier in ceph_mdsc_submit_request(). + */ + ceph_put_cap_refs(ceph_inode(old_parent), CEPH_CAP_PIN); + iput(old_parent); + req->r_parent = correct_dir; + ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); + } + } + /* If r_old_dentry is set, then assume that its parent is locked */ if (req->r_old_dentry && !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) old_dentry = req->r_old_dentry; ret = set_request_path_attr(mdsc, NULL, old_dentry, - req->r_old_dentry_dir, - req->r_path2, req->r_ino2.ino, - &path2, &pathlen2, &ino2, &freepath2, true); + req->r_old_dentry_dir, + req->r_path2, req->r_ino2.ino, + &path_info2, true); if (ret < 0) { msg = ERR_PTR(ret); goto out_free1; @@ -3031,7 +3075,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, /* filepaths */ len += 2 * (1 + sizeof(u32) + sizeof(u64)); - len += pathlen1 + pathlen2; + len += path_info1.pathlen + path_info2.pathlen; /* cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -3039,9 +3083,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, !!req->r_old_inode_drop + !!req->r_old_dentry_drop); if (req->r_dentry_drop) - len += pathlen1; + len += path_info1.pathlen; if (req->r_old_dentry_drop) - len += pathlen2; + len += path_info2.pathlen; /* MClientRequest tail */ @@ -3154,8 +3198,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, lhead->ino = cpu_to_le64(req->r_deleg_ino); lhead->args = req->r_args; - ceph_encode_filepath(&p, end, ino1, path1); - ceph_encode_filepath(&p, end, ino2, path2); + ceph_encode_filepath(&p, end, path_info1.vino.ino, path_info1.path); + ceph_encode_filepath(&p, end, path_info2.vino.ino, path_info2.path); /* make note of release offset, in case we need to replay */ req->r_request_release_offset = p - msg->front.iov_base; @@ -3218,11 +3262,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.data_off = cpu_to_le16(0); out_free2: - if (freepath2) - ceph_mdsc_free_path((char *)path2, pathlen2); + ceph_mdsc_free_path_info(&path_info2); out_free1: - if (freepath1) - ceph_mdsc_free_path((char *)path1, pathlen1); + ceph_mdsc_free_path_info(&path_info1); out: return msg; out_err: @@ -4579,24 +4621,20 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) struct ceph_pagelist *pagelist = recon_state->pagelist; struct dentry *dentry; struct ceph_cap *cap; - char *path; - int pathlen = 0, err; - u64 pathbase; + struct ceph_path_info path_info = {0}; + int err; u64 snap_follows; dentry = d_find_primary(inode); if (dentry) { /* set pathbase to parent dir when msg_version >= 2 */ - path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, + char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, recon_state->msg_version >= 2); dput(dentry); if (IS_ERR(path)) { err = PTR_ERR(path); goto out_err; } - } else { - path = NULL; - pathbase = 0; } spin_lock(&ci->i_ceph_lock); @@ -4629,7 +4667,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v2.pathbase = cpu_to_le64(pathbase); + rec.v2.pathbase = cpu_to_le64(path_info.vino.ino); rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { @@ -4644,7 +4682,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) ts = inode_get_atime(inode); ceph_encode_timespec64(&rec.v1.atime, &ts); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v1.pathbase = cpu_to_le64(pathbase); + rec.v1.pathbase = cpu_to_le64(path_info.vino.ino); } if (list_empty(&ci->i_cap_snaps)) { @@ -4706,7 +4744,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) sizeof(struct ceph_filelock); rec.v2.flock_len = cpu_to_le32(struct_len); - struct_len += sizeof(u32) + pathlen + sizeof(rec.v2); + struct_len += sizeof(u32) + path_info.pathlen + sizeof(rec.v2); if (struct_v >= 2) struct_len += sizeof(u64); /* snap_follows */ @@ -4730,7 +4768,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) ceph_pagelist_encode_8(pagelist, 1); ceph_pagelist_encode_32(pagelist, struct_len); } - ceph_pagelist_encode_string(pagelist, path, pathlen); + ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); ceph_locks_to_pagelist(flocks, pagelist, num_fcntl_locks, num_flock_locks); @@ -4741,17 +4779,17 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) } else { err = ceph_pagelist_reserve(pagelist, sizeof(u64) + sizeof(u32) + - pathlen + sizeof(rec.v1)); + path_info.pathlen + sizeof(rec.v1)); if (err) goto out_err; ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); - ceph_pagelist_encode_string(pagelist, path, pathlen); + ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); } out_err: - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); if (!err) recon_state->nr_caps++; return err; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 3e2a6fa7c19a..0428a5eaf28c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -617,14 +617,24 @@ extern int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); -static inline void ceph_mdsc_free_path(char *path, int len) +/* + * Structure to group path-related output parameters for build_*_path functions + */ +struct ceph_path_info { + const char *path; + int pathlen; + struct ceph_vino vino; + bool freepath; +}; + +static inline void ceph_mdsc_free_path_info(const struct ceph_path_info *path_info) { - if (!IS_ERR_OR_NULL(path)) - __putname(path - (PATH_MAX - 1 - len)); + if (path_info && path_info->freepath && !IS_ERR_OR_NULL(path_info->path)) + __putname((char *)path_info->path - (PATH_MAX - 1 - path_info->pathlen)); } extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, - struct dentry *dentry, int *plen, u64 *base, + struct dentry *dentry, struct ceph_path_info *path_info, int for_wire); extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry); From bec324f33d1ed346394b2eee25bf6dbf3511f727 Mon Sep 17 00:00:00 2001 From: Alex Markuze Date: Tue, 12 Aug 2025 09:57:39 +0000 Subject: [PATCH 1226/1307] ceph: fix race condition where r_parent becomes stale before sending message When the parent directory's i_rwsem is not locked, req->r_parent may become stale due to concurrent operations (e.g. rename) between dentry lookup and message creation. Validate that r_parent matches the encoded parent inode and update to the correct inode if a mismatch is detected. [ idryomov: folded a follow-up fix from Alex to drop extra reference from ceph_get_reply_dir() in ceph_fill_trace(): ceph_get_reply_dir() may return a different, referenced inode when r_parent is stale and the parent directory lock is not held. ceph_fill_trace() used that inode but failed to drop the reference when it differed from req->r_parent, leaking an inode reference. Keep the directory inode in a local variable and iput() it at function end if it does not match req->r_parent. ] Cc: stable@vger.kernel.org Signed-off-by: Alex Markuze Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 81 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 12 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8ac89ce6435c..f67025465de0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -55,6 +55,52 @@ static int ceph_set_ino_cb(struct inode *inode, void *data) return 0; } +/* + * Check if the parent inode matches the vino from directory reply info + */ +static inline bool ceph_vino_matches_parent(struct inode *parent, + struct ceph_vino vino) +{ + return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap; +} + +/* + * Validate that the directory inode referenced by @req->r_parent matches the + * inode number and snapshot id contained in the reply's directory record. If + * they do not match – which can theoretically happen if the parent dentry was + * moved between the time the request was issued and the reply arrived – fall + * back to looking up the correct inode in the inode cache. + * + * A reference is *always* returned. Callers that receive a different inode + * than the original @parent are responsible for dropping the extra reference + * once the reply has been processed. + */ +static struct inode *ceph_get_reply_dir(struct super_block *sb, + struct inode *parent, + struct ceph_mds_reply_info_parsed *rinfo) +{ + struct ceph_vino vino; + + if (unlikely(!rinfo->diri.in)) + return parent; /* nothing to compare against */ + + /* If we didn't have a cached parent inode to begin with, just bail out. */ + if (!parent) + return NULL; + + vino.ino = le64_to_cpu(rinfo->diri.in->ino); + vino.snap = le64_to_cpu(rinfo->diri.in->snapid); + + if (likely(ceph_vino_matches_parent(parent, vino))) + return parent; /* matches – use the original reference */ + + /* Mismatch – this should be rare. Emit a WARN and obtain the correct inode. */ + WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n", + ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap); + + return ceph_get_inode(sb, vino, NULL); +} + /** * ceph_new_inode - allocate a new inode in advance of an expected create * @dir: parent directory for new inode @@ -1523,6 +1569,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) struct ceph_vino tvino, dvino; struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); struct ceph_client *cl = fsc->client; + struct inode *parent_dir = NULL; int err = 0; doutc(cl, "%p is_dentry %d is_target %d\n", req, @@ -1536,10 +1583,17 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) } if (rinfo->head->is_dentry) { - struct inode *dir = req->r_parent; - - if (dir) { - err = ceph_fill_inode(dir, NULL, &rinfo->diri, + /* + * r_parent may be stale, in cases when R_PARENT_LOCKED is not set, + * so we need to get the correct inode + */ + parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo); + if (unlikely(IS_ERR(parent_dir))) { + err = PTR_ERR(parent_dir); + goto done; + } + if (parent_dir) { + err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri, rinfo->dirfrag, session, -1, &req->r_caps_reservation); if (err < 0) @@ -1548,14 +1602,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) WARN_ON_ONCE(1); } - if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && + if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { bool is_nokey = false; struct qstr dname; struct dentry *dn, *parent; struct fscrypt_str oname = FSTR_INIT(NULL, 0); - struct ceph_fname fname = { .dir = dir, + struct ceph_fname fname = { .dir = parent_dir, .name = rinfo->dname, .ctext = rinfo->altname, .name_len = rinfo->dname_len, @@ -1564,10 +1618,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) BUG_ON(!rinfo->head->is_target); BUG_ON(req->r_dentry); - parent = d_find_any_alias(dir); + parent = d_find_any_alias(parent_dir); BUG_ON(!parent); - err = ceph_fname_alloc_buffer(dir, &oname); + err = ceph_fname_alloc_buffer(parent_dir, &oname); if (err < 0) { dput(parent); goto done; @@ -1576,7 +1630,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey); if (err < 0) { dput(parent); - ceph_fname_free_buffer(dir, &oname); + ceph_fname_free_buffer(parent_dir, &oname); goto done; } dname.name = oname.name; @@ -1595,7 +1649,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) dname.len, dname.name, dn); if (!dn) { dput(parent); - ceph_fname_free_buffer(dir, &oname); + ceph_fname_free_buffer(parent_dir, &oname); err = -ENOMEM; goto done; } @@ -1610,12 +1664,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) ceph_snap(d_inode(dn)) != tvino.snap)) { doutc(cl, " dn %p points to wrong inode %p\n", dn, d_inode(dn)); - ceph_dir_clear_ordered(dir); + ceph_dir_clear_ordered(parent_dir); d_delete(dn); dput(dn); goto retry_lookup; } - ceph_fname_free_buffer(dir, &oname); + ceph_fname_free_buffer(parent_dir, &oname); req->r_dentry = dn; dput(parent); @@ -1794,6 +1848,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) &dvino, ptvino); } done: + /* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */ + if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent)) + iput(parent_dir); doutc(cl, "done err=%d\n", err); return err; } From cce7c15faaac79b532a07ed6ab8332280ad83762 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 27 Aug 2025 20:17:08 +0200 Subject: [PATCH 1227/1307] ceph: always call ceph_shift_unused_folios_left() The function ceph_process_folio_batch() sets folio_batch entries to NULL, which is an illegal state. Before folio_batch_release() crashes due to this API violation, the function ceph_shift_unused_folios_left() is supposed to remove those NULLs from the array. However, since commit ce80b76dd327 ("ceph: introduce ceph_process_folio_batch() method"), this shifting doesn't happen anymore because the "for" loop got moved to ceph_process_folio_batch(), and now the `i` variable that remains in ceph_writepages_start() doesn't get incremented anymore, making the shifting effectively unreachable much of the time. Later, commit 1551ec61dc55 ("ceph: introduce ceph_submit_write() method") added more preconditions for doing the shift, replacing the `i` check (with something that is still just as broken): - if ceph_process_folio_batch() fails, shifting never happens - if ceph_move_dirty_page_in_page_array() was never called (because ceph_process_folio_batch() has returned early for some of various reasons), shifting never happens - if `processed_in_fbatch` is zero (because ceph_process_folio_batch() has returned early for some of the reasons mentioned above or because ceph_move_dirty_page_in_page_array() has failed), shifting never happens Since those two commits, any problem in ceph_process_folio_batch() could crash the kernel, e.g. this way: BUG: kernel NULL pointer dereference, address: 0000000000000034 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: Oops: 0002 [#1] SMP NOPTI CPU: 172 UID: 0 PID: 2342707 Comm: kworker/u778:8 Not tainted 6.15.10-cm4all1-es #714 NONE Hardware name: Dell Inc. PowerEdge R7615/0G9DHV, BIOS 1.6.10 12/08/2023 Workqueue: writeback wb_workfn (flush-ceph-1) RIP: 0010:folios_put_refs+0x85/0x140 Code: 83 c5 01 39 e8 7e 76 48 63 c5 49 8b 5c c4 08 b8 01 00 00 00 4d 85 ed 74 05 41 8b 44 ad 00 48 8b 15 b0 > RSP: 0018:ffffb880af8db778 EFLAGS: 00010207 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000003 RDX: ffffe377cc3b0000 RSI: 0000000000000000 RDI: ffffb880af8db8c0 RBP: 0000000000000000 R08: 000000000000007d R09: 000000000102b86f R10: 0000000000000001 R11: 00000000000000ac R12: ffffb880af8db8c0 R13: 0000000000000000 R14: 0000000000000000 R15: ffff9bd262c97000 FS: 0000000000000000(0000) GS:ffff9c8efc303000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000034 CR3: 0000000160958004 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: ceph_writepages_start+0xeb9/0x1410 The crash can be reproduced easily by changing the ceph_check_page_before_write() return value to `-E2BIG`. (Interestingly, the crash happens only if `huge_zero_folio` has already been allocated; without `huge_zero_folio`, is_huge_zero_folio(NULL) returns true and folios_put_refs() skips NULL entries instead of dereferencing them. That makes reproducing the bug somewhat unreliable. See https://lore.kernel.org/20250826231626.218675-1-max.kellermann@ionos.com for a discussion of this detail.) My suggestion is to move the ceph_shift_unused_folios_left() to right after ceph_process_folio_batch() to ensure it always gets called to fix up the illegal folio_batch state. Cc: stable@vger.kernel.org Fixes: ce80b76dd327 ("ceph: introduce ceph_process_folio_batch() method") Link: https://lore.kernel.org/ceph-devel/aK4v548CId5GIKG1@swift.blarg.de/ Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8b202d789e93..8bc66b45dade 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1687,6 +1687,7 @@ static int ceph_writepages_start(struct address_space *mapping, process_folio_batch: rc = ceph_process_folio_batch(mapping, wbc, &ceph_wbc); + ceph_shift_unused_folios_left(&ceph_wbc.fbatch); if (rc) goto release_folios; @@ -1695,8 +1696,6 @@ static int ceph_writepages_start(struct address_space *mapping, goto release_folios; if (ceph_wbc.processed_in_fbatch) { - ceph_shift_unused_folios_left(&ceph_wbc.fbatch); - if (folio_batch_count(&ceph_wbc.fbatch) == 0 && ceph_wbc.locked_pages < ceph_wbc.max_pages) { doutc(cl, "reached end fbatch, trying for more\n"); From 249e0a47cdb46bb9eae65511c569044bd8698d7d Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 28 Aug 2025 13:15:52 +0200 Subject: [PATCH 1228/1307] ceph: fix crash after fscrypt_encrypt_pagecache_blocks() error The function move_dirty_folio_in_page_array() was created by commit ce80b76dd327 ("ceph: introduce ceph_process_folio_batch() method") by moving code from ceph_writepages_start() to this function. This new function is supposed to return an error code which is checked by the caller (now ceph_process_folio_batch()), and on error, the caller invokes redirty_page_for_writepage() and then breaks from the loop. However, the refactoring commit has gone wrong, and it by accident, it always returns 0 (= success) because it first NULLs the pointer and then returns PTR_ERR(NULL) which is always 0. This means errors are silently ignored, leaving NULL entries in the page array, which may later crash the kernel. The simple solution is to call PTR_ERR() before clearing the pointer. Cc: stable@vger.kernel.org Fixes: ce80b76dd327 ("ceph: introduce ceph_process_folio_batch() method") Link: https://lore.kernel.org/ceph-devel/aK4v548CId5GIKG1@swift.blarg.de/ Signed-off-by: Max Kellermann Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8bc66b45dade..322ed268f14a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1264,7 +1264,9 @@ static inline int move_dirty_folio_in_page_array(struct address_space *mapping, 0, gfp_flags); if (IS_ERR(pages[index])) { - if (PTR_ERR(pages[index]) == -EINVAL) { + int err = PTR_ERR(pages[index]); + + if (err == -EINVAL) { pr_err_client(cl, "inode->i_blkbits=%hhu\n", inode->i_blkbits); } @@ -1273,7 +1275,7 @@ static inline int move_dirty_folio_in_page_array(struct address_space *mapping, BUG_ON(ceph_wbc->locked_pages == 0); pages[index] = NULL; - return PTR_ERR(pages[index]); + return err; } } else { pages[index] = &folio->page; From e3c674db356c4303804b2415e7c2b11776cdd8c3 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 4 Sep 2025 14:53:50 +0200 Subject: [PATCH 1229/1307] tunnels: reset the GSO metadata before reusing the skb If a GSO skb is sent through a Geneve tunnel and if Geneve options are added, the split GSO skb might not fit in the MTU anymore and an ICMP frag needed packet can be generated. In such case the ICMP packet might go through the segmentation logic (and dropped) later if it reaches a path were the GSO status is checked and segmentation is required. This is especially true when an OvS bridge is used with a Geneve tunnel attached to it. The following set of actions could lead to the ICMP packet being wrongfully segmented: 1. An skb is constructed by the TCP layer (e.g. gso_type SKB_GSO_TCPV4, segs >= 2). 2. The skb hits the OvS bridge where Geneve options are added by an OvS action before being sent through the tunnel. 3. When the skb is xmited in the tunnel, the split skb does not fit anymore in the MTU and iptunnel_pmtud_build_icmp is called to generate an ICMP fragmentation needed packet. This is done by reusing the original (GSO!) skb. The GSO metadata is not cleared. 4. The ICMP packet being sent back hits the OvS bridge again and because skb_is_gso returns true, it goes through queue_gso_packets... 5. ...where __skb_gso_segment is called. The skb is then dropped. 6. Note that in the above example on re-transmission the skb won't be a GSO one as it would be segmented (len > MSS) and the ICMP packet should go through. Fix this by resetting the GSO information before reusing an skb in iptunnel_pmtud_build_icmp and iptunnel_pmtud_build_icmpv6. Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Reported-by: Adrian Moreno Signed-off-by: Antoine Tenart Reviewed-by: Stefano Brivio Link: https://patch.msgid.link/20250904125351.159740-1-atenart@kernel.org Signed-off-by: Paolo Abeni --- net/ipv4/ip_tunnel_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index cc9915543637..2e61ac137128 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -206,6 +206,9 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) return -EINVAL; + if (skb_is_gso(skb)) + skb_gso_reset(skb); + skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); skb_reset_network_header(skb); @@ -300,6 +303,9 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) return -EINVAL; + if (skb_is_gso(skb)) + skb_gso_reset(skb); + skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); skb_reset_network_header(skb); From e895f8e29119c8c966ea794af9e9100b10becb88 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 5 Aug 2025 16:10:25 +0800 Subject: [PATCH 1230/1307] hrtimers: Unconditionally update target CPU base after offline timer migration When testing softirq based hrtimers on an ARM32 board, with high resolution mode and NOHZ inactive, softirq based hrtimers fail to expire after being moved away from an offline CPU: CPU0 CPU1 hrtimer_start(..., HRTIMER_MODE_SOFT); cpu_down(CPU1) ... hrtimers_cpu_dying() // Migrate timers to CPU0 smp_call_function_single(CPU0, returgger_next_event); retrigger_next_event() if (!highres && !nohz) return; As retrigger_next_event() is a NOOP when both high resolution timers and NOHZ are inactive CPU0's hrtimer_cpu_base::softirq_expires_next is not updated and the migrated softirq timers never expire unless there is a softirq based hrtimer queued on CPU0 later. Fix this by removing the hrtimer_hres_active() and tick_nohz_active() check in retrigger_next_event(), which enforces a full update of the CPU base. As this is not a fast path the extra cost does not matter. [ tglx: Massaged change log ] Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Co-developed-by: Frederic Weisbecker Signed-off-by: Frederic Weisbecker Signed-off-by: Xiongfeng Wang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250805081025.54235-1-wangxiongfeng2@huawei.com --- kernel/time/hrtimer.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 30899a8cc52c..e8c479329282 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -787,10 +787,10 @@ static void retrigger_next_event(void *arg) * of the next expiring timer is enough. The return from the SMP * function call will take care of the reprogramming in case the * CPU was in a NOHZ idle sleep. + * + * In periodic low resolution mode, the next softirq expiration + * must also be updated. */ - if (!hrtimer_hres_active(base) && !tick_nohz_active) - return; - raw_spin_lock(&base->lock); hrtimer_update_base(base); if (hrtimer_hres_active(base)) @@ -2295,11 +2295,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) &new_base->clock_base[i]); } - /* - * The migration might have changed the first expiring softirq - * timer on this CPU. Update it. - */ - __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); /* Tell the other CPU to retrigger the next event */ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); From 641427d5bf90af0625081bf27555418b101274cd Mon Sep 17 00:00:00 2001 From: Alex Tran Date: Wed, 3 Sep 2025 20:17:09 -0700 Subject: [PATCH 1231/1307] docs: networking: can: change bcm_msg_head frames member to support flexible array The documentation of the 'bcm_msg_head' struct does not match how it is defined in 'bcm.h'. Changed the frames member to a flexible array, matching the definition in the header file. See commit 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Signed-off-by: Alex Tran Acked-by: Oliver Hartkopp Link: https://patch.msgid.link/20250904031709.1426895-1-alex.t.tran@gmail.com Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217783 Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index bc1b585355f7..7650c4b5be5f 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -742,7 +742,7 @@ The broadcast manager sends responses to user space in the same form: struct timeval ival1, ival2; /* count and subsequent interval */ canid_t can_id; /* unique can_id for task */ __u32 nframes; /* number of can_frames following */ - struct can_frame frames[0]; + struct can_frame frames[]; }; The aligned payload 'frames' uses the same basic CAN frame structure defined From f5c32370dba668c171c73684f489a3ea0b9503c5 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Thu, 4 Sep 2025 15:13:51 -0400 Subject: [PATCH 1232/1307] drm/amd/display: Disable DPCD Probe Quirk Disable dpcd probe quirk to native aux. Signed-off-by: Fangzhi Zuo Reviewed-by: Imre Deak Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4500 Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250904191351.746707-1-Jerry.Zuo@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit c5f4fb40584ee591da9fa090c6f265d11cbb1acf) Cc: stable@vger.kernel.org # 6.16.y: 5281cbe0b55a Cc: stable@vger.kernel.org # 6.16.y: 0b4aa85e8981 Cc: stable@vger.kernel.org # 6.16.y: b87ed522b364 Cc: stable@vger.kernel.org # 6.16.y --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7187d5aedf0a..77a9d2c7d318 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -809,6 +809,7 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, drm_dp_aux_init(&aconnector->dm_dp_aux.aux); drm_dp_cec_register_connector(&aconnector->dm_dp_aux.aux, &aconnector->base); + drm_dp_dpcd_set_probe(&aconnector->dm_dp_aux.aux, false); if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_eDP) return; From 70f0b051f82d0234ade2f6753f72a2610048db3b Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Mon, 25 Aug 2025 14:45:33 -0400 Subject: [PATCH 1233/1307] drm/amd/display: Correct sequences and delays for DCN35 PG & RCG [why] The current PG & RCG programming in driver has some gaps and incorrect sequences. [how] Added delays after ungating clocks to allow ramp up, increased polling to allow more time for power up, and removed the incorrect sequences. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Charlene Liu Signed-off-by: Ovidiu Bunea Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1bde5584e297921f45911ae874b0175dce5ed4b5) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../amd/display/dc/dccg/dcn35/dcn35_dccg.c | 74 +++++------ .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 115 +++--------------- .../amd/display/dc/hwss/dcn35/dcn35_init.c | 3 - .../amd/display/dc/hwss/dcn351/dcn351_init.c | 3 - .../gpu/drm/amd/display/dc/inc/hw/pg_cntl.h | 1 + .../amd/display/dc/pg/dcn35/dcn35_pg_cntl.c | 78 +++++++----- 7 files changed, 111 insertions(+), 164 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 59c07756130d..f24e1da68269 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1145,6 +1145,7 @@ struct dc_debug_options { bool enable_hblank_borrow; bool force_subvp_df_throttle; uint32_t acpi_transition_bitmasks[MAX_PIPES]; + bool enable_pg_cntl_debug_logs; }; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 58c84f555c0f..0ce9489ac6b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -133,30 +133,34 @@ enum dsc_clk_source { }; -static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) +static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool allow_rcg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && enable) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && allow_rcg) return; switch (inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); return; } + + /* Wait for clock to ramp */ + if (!allow_rcg) + udelay(10); } static void dccg35_set_symclk32_se_rcg( @@ -385,35 +389,34 @@ static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) } } -static void dccg35_set_dppclk_rcg(struct dccg *dccg, - int inst, bool enable) +static void dccg35_set_dppclk_rcg(struct dccg *dccg, int inst, bool allow_rcg) { - struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && allow_rcg) return; switch (inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); break; } - //DC_LOG_DEBUG("%s: inst(%d) DPPCLK rcg_disable: %d\n", __func__, inst, enable ? 0 : 1); + /* Wait for clock to ramp */ + if (!allow_rcg) + udelay(10); } static void dccg35_set_dpstreamclk_rcg( @@ -1177,32 +1180,34 @@ static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst, } static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg, - uint32_t dpp_inst, uint32_t enable) + uint32_t dpp_inst, uint32_t disallow_rcg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && !disallow_rcg) return; switch (dpp_inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, disallow_rcg); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, disallow_rcg); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, disallow_rcg); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, disallow_rcg); break; default: break; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) rcg: %d\n", __func__, dpp_inst, enable); + /* Wait for clock to ramp */ + if (disallow_rcg) + udelay(10); } static void dccg35_get_pixel_rate_div( @@ -1782,8 +1787,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) //Disable DTO switch (inst) { case 0: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, 0, @@ -1791,8 +1795,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1); break; case 1: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, 0, @@ -1800,8 +1803,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1); break; case 2: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, 0, @@ -1809,8 +1811,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1); break; case 3: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, 0, @@ -1821,6 +1822,9 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) BREAK_TO_DEBUGGER(); return; } + + /* Wait for clock to ramp */ + udelay(10); } static void dccg35_disable_dscclk(struct dccg *dccg, @@ -1864,6 +1868,9 @@ static void dccg35_disable_dscclk(struct dccg *dccg, default: return; } + + /* Wait for clock ramp */ + udelay(10); } static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) @@ -2349,10 +2356,7 @@ static void dccg35_disable_symclk_se_cb( void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating) { - - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) { - dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating); - } + dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating); } static const struct dccg_funcs dccg35_funcs_new = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index a267f574b619..764eff6a4ec6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -113,6 +113,14 @@ static void enable_memory_low_power(struct dc *dc) } #endif +static void print_pg_status(struct dc *dc, const char *debug_func, const char *debug_log) +{ + if (dc->debug.enable_pg_cntl_debug_logs && dc->res_pool->pg_cntl) { + if (dc->res_pool->pg_cntl->funcs->print_pg_status) + dc->res_pool->pg_cntl->funcs->print_pg_status(dc->res_pool->pg_cntl, debug_func, debug_log); + } +} + void dcn35_set_dmu_fgcg(struct dce_hwseq *hws, bool enable) { REG_UPDATE_3(DMU_CLK_CNTL, @@ -137,6 +145,8 @@ void dcn35_init_hw(struct dc *dc) uint32_t user_level = MAX_BACKLIGHT_LEVEL; int i; + print_pg_status(dc, __func__, ": start"); + if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -200,10 +210,7 @@ void dcn35_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc->link_srv->blank_all_dp_displays(dc); -/* - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); -*/ + if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init) res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub); /* If taking control over from VBIOS, we may want to optimize our first @@ -236,6 +243,8 @@ void dcn35_init_hw(struct dc *dc) } hws->funcs.init_pipes(dc, dc->current_state); + print_pg_status(dc, __func__, ": after init_pipes"); + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control && !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, @@ -312,6 +321,7 @@ void dcn35_init_hw(struct dc *dc) if (dc->res_pool->pg_cntl->funcs->init_pg_status) dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl); } + print_pg_status(dc, __func__, ": after init_pg_status"); } static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) @@ -500,97 +510,6 @@ void dcn35_physymclk_root_clock_control(struct dce_hwseq *hws, unsigned int phy_ } } -void dcn35_dsc_pg_control( - struct dce_hwseq *hws, - unsigned int dsc_inst, - bool power_on) -{ - uint32_t power_gate = power_on ? 0 : 1; - uint32_t pwr_status = power_on ? 0 : 2; - uint32_t org_ip_request_cntl = 0; - - if (hws->ctx->dc->debug.disable_dsc_power_gate) - return; - if (hws->ctx->dc->debug.ignore_pg) - return; - REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - - switch (dsc_inst) { - case 0: /* DSC0 */ - REG_UPDATE(DOMAIN16_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN16_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 1: /* DSC1 */ - REG_UPDATE(DOMAIN17_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN17_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 2: /* DSC2 */ - REG_UPDATE(DOMAIN18_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN18_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 3: /* DSC3 */ - REG_UPDATE(DOMAIN19_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN19_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - default: - BREAK_TO_DEBUGGER(); - break; - } - - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); -} - -void dcn35_enable_power_gating_plane(struct dce_hwseq *hws, bool enable) -{ - bool force_on = true; /* disable power gating */ - uint32_t org_ip_request_cntl = 0; - - if (hws->ctx->dc->debug.disable_hubp_power_gate) - return; - if (hws->ctx->dc->debug.ignore_pg) - return; - REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - /* DCHUBP0/1/2/3/4/5 */ - REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - /* DPP0/1/2/3/4/5 */ - REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - - force_on = true; /* disable power gating */ - if (enable && !hws->ctx->dc->debug.disable_dsc_power_gate) - force_on = false; - - /* DCS0/1/2/3/4 */ - REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - - -} - /* In headless boot cases, DIG may be turned * on which causes HW/SW discrepancies. * To avoid this, power down hardware on boot @@ -1453,6 +1372,8 @@ void dcn35_prepare_bandwidth( } dcn20_prepare_bandwidth(dc, context); + + print_pg_status(dc, __func__, ": after rcg and power up"); } void dcn35_optimize_bandwidth( @@ -1461,6 +1382,8 @@ void dcn35_optimize_bandwidth( { struct pg_block_update pg_update_state; + print_pg_status(dc, __func__, ": before rcg and power up"); + dcn20_optimize_bandwidth(dc, context); if (dc->hwss.calc_blocks_to_gate) { @@ -1472,6 +1395,8 @@ void dcn35_optimize_bandwidth( if (dc->hwss.root_clock_control) dc->hwss.root_clock_control(dc, &pg_update_state, false); } + + print_pg_status(dc, __func__, ": after rcg and power up"); } void dcn35_set_drr(struct pipe_ctx **pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index a3ccf805bd16..aefb7c473741 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -115,7 +115,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, - .update_dsc_pg = dcn32_update_dsc_pg, .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, .hw_block_power_up = dcn35_hw_block_power_up, @@ -150,7 +149,6 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .plane_atomic_disable = dcn35_plane_atomic_disable, //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ //.hubp_pg_control = dcn35_hubp_pg_control, - .enable_power_gating_plane = dcn35_enable_power_gating_plane, .dpp_root_clock_control = dcn35_dpp_root_clock_control, .dpstream_root_clock_control = dcn35_dpstream_root_clock_control, .physymclk_root_clock_control = dcn35_physymclk_root_clock_control, @@ -165,7 +163,6 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio, .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, - .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 58f2be2a326b..a580a55695c3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -114,7 +114,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, - .update_dsc_pg = dcn32_update_dsc_pg, .calc_blocks_to_gate = dcn351_calc_blocks_to_gate, .calc_blocks_to_ungate = dcn351_calc_blocks_to_ungate, .hw_block_power_up = dcn351_hw_block_power_up, @@ -145,7 +144,6 @@ static const struct hwseq_private_funcs dcn351_private_funcs = { .plane_atomic_disable = dcn35_plane_atomic_disable, //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ //.hubp_pg_control = dcn35_hubp_pg_control, - .enable_power_gating_plane = dcn35_enable_power_gating_plane, .dpp_root_clock_control = dcn35_dpp_root_clock_control, .dpstream_root_clock_control = dcn35_dpstream_root_clock_control, .physymclk_root_clock_control = dcn35_physymclk_root_clock_control, @@ -159,7 +157,6 @@ static const struct hwseq_private_funcs dcn351_private_funcs = { .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, - .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h index 44f86cc2d1d6..227e3f8d7e5f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h @@ -49,6 +49,7 @@ struct pg_cntl_funcs { void (*mem_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*dio_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*init_pg_status)(struct pg_cntl *pg_cntl); + void (*print_pg_status)(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log); }; #endif //__DC_PG_CNTL_H__ diff --git a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c index af21c0a27f86..72bd43f9bbe2 100644 --- a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c @@ -79,16 +79,12 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl = 0; - bool block_enabled; + bool block_enabled = false; + bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg || + pg_cntl->ctx->dc->debug.disable_dsc_power_gate || + pg_cntl->ctx->dc->idle_optimizations_allowed; - /*need to enable dscclk regardless DSC_PG*/ - if (pg_cntl->ctx->dc->res_pool->dccg->funcs->enable_dsc && power_on) - pg_cntl->ctx->dc->res_pool->dccg->funcs->enable_dsc( - pg_cntl->ctx->dc->res_pool->dccg, dsc_inst); - - if (pg_cntl->ctx->dc->debug.ignore_pg || - pg_cntl->ctx->dc->debug.disable_dsc_power_gate || - pg_cntl->ctx->dc->idle_optimizations_allowed) + if (skip_pg && !power_on) return; block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, dsc_inst); @@ -111,7 +107,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 1: /* DSC1 */ REG_UPDATE(DOMAIN17_PG_CONFIG, @@ -119,7 +115,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 2: /* DSC2 */ REG_UPDATE(DOMAIN18_PG_CONFIG, @@ -127,7 +123,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 3: /* DSC3 */ REG_UPDATE(DOMAIN19_PG_CONFIG, @@ -135,7 +131,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; default: BREAK_TO_DEBUGGER(); @@ -144,12 +140,6 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo if (dsc_inst < MAX_PIPES) pg_cntl->pg_pipe_res_enable[PG_DSC][dsc_inst] = power_on; - - if (pg_cntl->ctx->dc->res_pool->dccg->funcs->disable_dsc && !power_on) { - /*this is to disable dscclk*/ - pg_cntl->ctx->dc->res_pool->dccg->funcs->disable_dsc( - pg_cntl->ctx->dc->res_pool->dccg, dsc_inst); - } } static bool pg_cntl35_hubp_dpp_pg_status(struct pg_cntl *pg_cntl, unsigned int hubp_dpp_inst) @@ -189,11 +179,12 @@ void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dp uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; bool block_enabled; + bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg || + pg_cntl->ctx->dc->debug.disable_hubp_power_gate || + pg_cntl->ctx->dc->debug.disable_dpp_power_gate || + pg_cntl->ctx->dc->idle_optimizations_allowed; - if (pg_cntl->ctx->dc->debug.ignore_pg || - pg_cntl->ctx->dc->debug.disable_hubp_power_gate || - pg_cntl->ctx->dc->debug.disable_dpp_power_gate || - pg_cntl->ctx->dc->idle_optimizations_allowed) + if (skip_pg && !power_on) return; block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, hubp_dpp_inst); @@ -213,22 +204,22 @@ void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dp case 0: /* DPP0 & HUBP0 */ REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 1: /* DPP1 & HUBP1 */ REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 2: /* DPP2 & HUBP2 */ REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 3: /* DPP3 & HUBP3 */ REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; default: BREAK_TO_DEBUGGER(); @@ -501,6 +492,36 @@ void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl) pg_cntl->pg_res_enable[PG_DWB] = block_enabled; } +static void pg_cntl35_print_pg_status(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log) +{ + int i = 0; + bool block_enabled = false; + + DC_LOG_DEBUG("%s: %s", debug_func, debug_log); + + DC_LOG_DEBUG("PG_CNTL status:\n"); + + block_enabled = pg_cntl35_io_clk_status(pg_cntl); + DC_LOG_DEBUG("ONO0=%d (DCCG, DIO, DCIO)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_mem_status(pg_cntl); + DC_LOG_DEBUG("ONO1=%d (DCHUBBUB, DCHVM, DCHUBBUBMEM)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_plane_otg_status(pg_cntl); + DC_LOG_DEBUG("ONO2=%d (MPC, OPP, OPTC, DWB)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_hpo_pg_status(pg_cntl); + DC_LOG_DEBUG("ONO3=%d (HPO)\n", block_enabled ? 1 : 0); + + for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) { + block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, i); + DC_LOG_DEBUG("ONO%d=%d (DCHUBP%d, DPP%d)\n", 4 + i * 2, block_enabled ? 1 : 0, i, i); + + block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, i); + DC_LOG_DEBUG("ONO%d=%d (DSC%d)\n", 5 + i * 2, block_enabled ? 1 : 0, i); + } +} + static const struct pg_cntl_funcs pg_cntl35_funcs = { .init_pg_status = pg_cntl35_init_pg_status, .dsc_pg_control = pg_cntl35_dsc_pg_control, @@ -511,7 +532,8 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = { .mpcc_pg_control = pg_cntl35_mpcc_pg_control, .opp_pg_control = pg_cntl35_opp_pg_control, .optc_pg_control = pg_cntl35_optc_pg_control, - .dwb_pg_control = pg_cntl35_dwb_pg_control + .dwb_pg_control = pg_cntl35_dwb_pg_control, + .print_pg_status = pg_cntl35_print_pg_status }; struct pg_cntl *pg_cntl35_create( From 60f71f0db7b12f303789ef59949e38ee5838ee8b Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Fri, 5 Sep 2025 10:36:27 -0500 Subject: [PATCH 1234/1307] drm/amd/display: Drop dm_prepare_suspend() and dm_complete() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] dm_prepare_suspend() was added in commit 50e0bae34fa6b ("drm/amd/display: Add and use new dm_prepare_suspend() callback") to allow display to turn off earlier in the suspend sequence. This caused a regression that HDMI audio sometimes didn't work properly after resume unless audio was playing during suspend. [How] Drop dm_prepare_suspend() callback. All code in it will still run during dm_suspend(). Also drop unnecessary dm_complete() callback. dm_complete() was used for failed prepare and also for any case of successful resume. The code in it already runs in dm_resume(). This change will introduce more time that the display is turned on during suspend sequence. The compositor can turn it off sooner if desired. Cc: Harry Wentland Reported-by: Przemysław Kopa Closes: https://lore.kernel.org/amd-gfx/1cea0d56-7739-4ad9-bf8e-c9330faea2bb@kernel.org/T/#m383d9c08397043a271b36c32b64bb80e524e4b0f Reported-by: Kalvin Closes: https://github.com/alsa-project/alsa-lib/issues/465 Closes: https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/4809 Fixes: 50e0bae34fa6b ("drm/amd/display: Add and use new dm_prepare_suspend() callback") Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 2fd653b9bb5aacec5d4c421ab290905898fe85a2) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7808a647a306..352b3dcd0e0e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3127,25 +3127,6 @@ static void dm_destroy_cached_state(struct amdgpu_device *adev) dm->cached_state = NULL; } -static void dm_complete(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - dm_destroy_cached_state(adev); -} - -static int dm_prepare_suspend(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - if (amdgpu_in_reset(adev)) - return 0; - - WARN_ON(adev->dm.cached_state); - - return dm_cache_state(adev); -} - static int dm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -3571,10 +3552,8 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { .early_fini = amdgpu_dm_early_fini, .hw_init = dm_hw_init, .hw_fini = dm_hw_fini, - .prepare_suspend = dm_prepare_suspend, .suspend = dm_suspend, .resume = dm_resume, - .complete = dm_complete, .is_idle = dm_is_idle, .wait_for_idle = dm_wait_for_idle, .check_soft_reset = dm_check_soft_reset, From 1dfd2864a1c4909147663e5a27c055f50f7c2796 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Thu, 28 Aug 2025 22:26:22 +1000 Subject: [PATCH 1235/1307] drm/amd/display: remove oem i2c adapter on finish Fixes a bug where unbinding of the GPU would leave the oem i2c adapter registered resulting in a null pointer dereference when applications try to access the invalid device. Fixes: 3d5470c97314 ("drm/amd/display/dm: add support for OEM i2c bus") Cc: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Geoffrey McRae Signed-off-by: Alex Deucher (cherry picked from commit 89923fb7ead4fdd37b78dd49962d9bb5892403e6) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 352b3dcd0e0e..4e86370ae705 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2913,6 +2913,17 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) return 0; } +static void dm_oem_i2c_hw_fini(struct amdgpu_device *adev) +{ + struct amdgpu_display_manager *dm = &adev->dm; + + if (dm->oem_i2c) { + i2c_del_adapter(&dm->oem_i2c->base); + kfree(dm->oem_i2c); + dm->oem_i2c = NULL; + } +} + /** * dm_hw_init() - Initialize DC device * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. @@ -2963,7 +2974,7 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - kfree(adev->dm.oem_i2c); + dm_oem_i2c_hw_fini(adev); amdgpu_dm_hpd_fini(adev); From ce42a3b581a9db10765eb835840b04dbe7972135 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 25 Aug 2025 09:50:49 -0400 Subject: [PATCH 1236/1307] drm/amdkfd: fix p2p links bug in topology When creating p2p links, KFD needs to check XGMI link with two conditions, hive_id and is_sharing_enabled, but it is missing to check is_sharing_enabled, so add it to fix the error. Signed-off-by: Eric Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 36cc7d13178d901982da7a122c883861d98da624) --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 4ec73f33535e..720b20e842ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1587,7 +1587,8 @@ static int kfd_dev_create_p2p_links(void) break; if (!dev->gpu || !dev->gpu->adev || (dev->gpu->kfd->hive_id && - dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) + dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id && + amdgpu_xgmi_get_is_sharing_enabled(dev->gpu->adev, new_dev->gpu->adev))) goto next; /* check if node(s) is/are peer accessible in one direction or bi-direction */ From 53503556273a5ead8b75534085e2dcb46e96f883 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 20 Aug 2025 16:10:51 +0800 Subject: [PATCH 1237/1307] amd/amdkfd: correct mem limit calculation for small APUs Current mem limit check leaks some GTT memory (reserved_for_pt reserved_for_ras + adev->vram_pin_size) for small APUs. Since carveout VRAM is tunable on APUs, there are three case regarding the carveout VRAM size relative to GTT: 1. 0 < carveout < gtt apu_prefer_gtt = true, is_app_apu = false 2. carveout > gtt / 2 apu_prefer_gtt = false, is_app_apu = false 3. 0 = carveout apu_prefer_gtt = true, is_app_apu = true It doesn't make sense to check below limitation in case 1 (default case, small carveout) because the values in the below expression are mixed with carveout and gtt. adev->kfd.vram_used[xcp_id] + vram_needed > vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size) gtt: kfd.vram_used, vram_needed, vram_size carveout: reserved_for_pt, reserved_for_ras, adev->vram_pin_size In case 1, vram allocation will go to gtt domain, skip vram check since ttm_mem_limit check already cover this allocation. Signed-off-by: Yifan Zhang Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit fa7c99f04f6dd299388e9282812b14e95558ac8e) --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 260165bbe373..b16cce7c22c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, spin_lock(&kfd_mem_limit.mem_limit_lock); if (kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit) + kfd_mem_limit.max_system_mem_limit) { pr_debug("Set no_system_mem_limit=1 if using shared memory\n"); + if (!no_system_mem_limit) { + ret = -ENOMEM; + goto release; + } + } - if ((kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || - (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > - kfd_mem_limit.max_ttm_mem_limit) || - (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) { + if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit) { ret = -ENOMEM; goto release; } + /*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with + * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip + * VRAM check since ttm_mem_limit check already cover this allocation + */ + + if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) { + uint64_t vram_available = + vram_size - reserved_for_pt - reserved_for_ras - + atomic64_read(&adev->vram_pin_size); + if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) { + ret = -ENOMEM; + goto release; + } + } + /* Update memory accounting by decreasing available system * memory, TTM memory and GPU memory as computed above */ @@ -1626,11 +1642,15 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, uint64_t vram_available, system_mem_available, ttm_mem_available; spin_lock(&kfd_mem_limit.mem_limit_lock); - vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) - - adev->kfd.vram_used_aligned[xcp_id] - - atomic64_read(&adev->vram_pin_size) - - reserved_for_pt - - reserved_for_ras; + if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu) + vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) + - adev->kfd.vram_used_aligned[xcp_id]; + else + vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) + - adev->kfd.vram_used_aligned[xcp_id] + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt + - reserved_for_ras; if (adev->apu_prefer_gtt) { system_mem_available = no_system_mem_limit ? From 75871a525a596ff4d16c4aebc0018f8d0923c9b1 Mon Sep 17 00:00:00 2001 From: Tianyu Xu Date: Tue, 12 Aug 2025 21:10:56 +0800 Subject: [PATCH 1238/1307] igb: Fix NULL pointer dereference in ethtool loopback test The igb driver currently causes a NULL pointer dereference when executing the ethtool loopback test. This occurs because there is no associated q_vector for the test ring when it is set up, as interrupts are typically not added to the test rings. Since commit 5ef44b3cb43b removed the napi_id assignment in __xdp_rxq_info_reg(), there is no longer a need to pass a napi_id to it. Therefore, simply use 0 as the last parameter. Fixes: 2c6196013f84 ("igb: Add AF_XDP zero-copy Rx support") Reviewed-by: Aleksandr Loktionov Reviewed-by: Joe Damato Signed-off-by: Tianyu Xu Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a9a7a94ae61e..453deb6d14b3 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4453,8 +4453,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, - rx_ring->q_vector->napi.napi_id); + rx_ring->queue_index, 0); if (res < 0) { dev_err(dev, "Failed to register xdp_rxq index %u\n", rx_ring->queue_index); From d709f178abca22a4d3642513df29afe4323a594b Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Fri, 15 Aug 2025 15:26:31 +0900 Subject: [PATCH 1239/1307] igb: fix link test skipping when interface is admin down The igb driver incorrectly skips the link test when the network interface is admin down (if_running == false), causing the test to always report PASS regardless of the actual physical link state. This behavior is inconsistent with other drivers (e.g. i40e, ice, ixgbe, etc.) which correctly test the physical link state regardless of admin state. Remove the if_running check to ensure link test always reflects the physical link state. Fixes: 8d420a1b3ea6 ("igb: correct link test not being run when link is down") Signed-off-by: Kohei Enju Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 92ef33459aec..7b8f32c5169a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2081,11 +2081,8 @@ static void igb_diag_test(struct net_device *netdev, } else { dev_info(&adapter->pdev->dev, "online testing starting\n"); - /* PHY is powered down when interface is down */ - if (if_running && igb_link_test(adapter, &data[TEST_LINK])) + if (igb_link_test(adapter, &data[TEST_LINK])) eth_test->flags |= ETH_TEST_FL_FAILED; - else - data[TEST_LINK] = 0; /* Online tests aren't run; pass by default */ data[TEST_REG] = 0; From 915470e1b44e71d1dd07ee067276f003c3521ee3 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 18 Aug 2025 17:39:03 +0200 Subject: [PATCH 1240/1307] i40e: fix IRQ freeing in i40e_vsi_request_irq_msix error path If request_irq() in i40e_vsi_request_irq_msix() fails in an iteration later than the first, the error path wants to free the IRQs requested so far. However, it uses the wrong dev_id argument for free_irq(), so it does not free the IRQs correctly and instead triggers the warning: Trying to free already-free IRQ 173 WARNING: CPU: 25 PID: 1091 at kernel/irq/manage.c:1829 __free_irq+0x192/0x2c0 Modules linked in: i40e(+) [...] CPU: 25 UID: 0 PID: 1091 Comm: NetworkManager Not tainted 6.17.0-rc1+ #1 PREEMPT(lazy) Hardware name: [...] RIP: 0010:__free_irq+0x192/0x2c0 [...] Call Trace: free_irq+0x32/0x70 i40e_vsi_request_irq_msix.cold+0x63/0x8b [i40e] i40e_vsi_request_irq+0x79/0x80 [i40e] i40e_vsi_open+0x21f/0x2f0 [i40e] i40e_open+0x63/0x130 [i40e] __dev_open+0xfc/0x210 __dev_change_flags+0x1fc/0x240 netif_change_flags+0x27/0x70 do_setlink.isra.0+0x341/0xc70 rtnl_newlink+0x468/0x860 rtnetlink_rcv_msg+0x375/0x450 netlink_rcv_skb+0x5c/0x110 netlink_unicast+0x288/0x3c0 netlink_sendmsg+0x20d/0x430 ____sys_sendmsg+0x3a2/0x3d0 ___sys_sendmsg+0x99/0xe0 __sys_sendmsg+0x8a/0xf0 do_syscall_64+0x82/0x2c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] ---[ end trace 0000000000000000 ]--- Use the same dev_id for free_irq() as for request_irq(). I tested this with inserting code to fail intentionally. Fixes: 493fb30011b3 ("i40e: Move q_vectors from pointer to array to array of pointers") Signed-off-by: Michal Schmidt Reviewed-by: Aleksandr Loktionov Reviewed-by: Subbaraya Sundeep Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b83f823e4917..dd21d93d39dd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4156,7 +4156,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) irq_num = pf->msix_entries[base + vector].vector; irq_set_affinity_notifier(irq_num, NULL); irq_update_affinity_hint(irq_num, NULL); - free_irq(irq_num, &vsi->q_vectors[vector]); + free_irq(irq_num, vsi->q_vectors[vector]); } return err; } From 7934fdc25ad642ab3dbc16d734ab58638520ea60 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 4 Sep 2025 12:35:21 +0200 Subject: [PATCH 1241/1307] drm/xe/configfs: Don't touch survivability_mode on fini This is a user controlled configfs attribute, we should not modify that outside the configfs attr.store() implementation. Fixes: bc417e54e24b ("drm/xe: Enable configfs support for survivability mode") Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Riana Tauro Reviewed-by: Stuart Summers Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250904103521.7130-1-michal.wajdeczko@intel.com (cherry picked from commit 079a5c83dbd23db7a6eed8f558cf75e264d8a17b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_survivability_mode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 41705f5d52e3..8f7b0add2364 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -41,6 +41,8 @@ * * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode * + * It is the responsibility of the user to clear the mode once firmware flash is complete. + * * Refer :ref:`xe_configfs` for more details on how to use configfs * * Survivability mode is indicated by the below admin-only readable sysfs which provides additional @@ -147,7 +149,6 @@ static void xe_survivability_mode_fini(void *arg) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct device *dev = &pdev->dev; - xe_configfs_clear_survivability_mode(pdev); sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); } From 5c87fee3c96ce898ad681552404a66c7605193c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 4 Sep 2025 18:07:13 +0200 Subject: [PATCH 1242/1307] drm/xe: Attempt to bring bos back to VRAM after eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VRAM+TT bos that are evicted from VRAM to TT may remain in TT also after a revalidation following eviction or suspend. This manifests itself as applications becoming sluggish after buffer objects get evicted or after a resume from suspend or hibernation. If the bo supports placement in both VRAM and TT, and we are on DGFX, mark the TT placement as fallback. This means that it is tried only after VRAM + eviction. This flaw has probably been present since the xe module was upstreamed but use a Fixes: commit below where backporting is likely to be simple. For earlier versions we need to open- code the fallback algorithm in the driver. v2: - Remove check for dgfx. (Matthew Auld) - Update the xe_dma_buf kunit test for the new strategy (CI) - Allow dma-buf to pin in current placement (CI) - Make xe_bo_validate() for pinned bos a NOP. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5995 Fixes: a78a8da51b36 ("drm/ttm: replace busy placement with flags v6") Cc: Matthew Brost Cc: Matthew Auld Cc: # v6.9+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250904160715.2613-2-thomas.hellstrom@linux.intel.com (cherry picked from commit cb3d7b3b46b799c96b54f8e8fe36794a55a77f0b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 2 +- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 10 +--------- drivers/gpu/drm/xe/xe_bo.c | 16 ++++++++++++---- drivers/gpu/drm/xe/xe_bo.h | 2 +- drivers/gpu/drm/xe/xe_dma_buf.c | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index bb469096d072..7b40cc8be1c9 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -236,7 +236,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc } xe_bo_lock(external, false); - err = xe_bo_pin_external(external); + err = xe_bo_pin_external(external, false); xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo pin err=%pe\n", diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index c53f67ce4b0a..121f17c112ec 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -89,15 +89,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, return; } - /* - * If on different devices, the exporter is kept in system if - * possible, saving a migration step as the transfer is just - * likely as fast from system memory. - */ - if (params->mem_mask & XE_BO_FLAG_SYSTEM) - KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); - else - KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); if (params->force_different_devices) KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 9954bb458ce1..bae7ff2e5927 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -186,6 +186,8 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_TT, + .flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ? + TTM_PL_FLAG_FALLBACK : 0, }; *c += 1; } @@ -2269,6 +2271,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) /** * xe_bo_pin_external - pin an external BO * @bo: buffer object to be pinned + * @in_place: Pin in current placement, don't attempt to migrate. * * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) * BO. Unique call compared to xe_bo_pin as this function has it own set of @@ -2276,7 +2279,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) * * Returns 0 for success, negative error code otherwise. */ -int xe_bo_pin_external(struct xe_bo *bo) +int xe_bo_pin_external(struct xe_bo *bo, bool in_place) { struct xe_device *xe = xe_bo_device(bo); int err; @@ -2285,9 +2288,11 @@ int xe_bo_pin_external(struct xe_bo *bo) xe_assert(xe, xe_bo_is_user(bo)); if (!xe_bo_is_pinned(bo)) { - err = xe_bo_validate(bo, NULL, false); - if (err) - return err; + if (!in_place) { + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + } spin_lock(&xe->pinned.lock); list_add_tail(&bo->pinned_link, &xe->pinned.late.external); @@ -2440,6 +2445,9 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) }; int ret; + if (xe_bo_is_pinned(bo)) + return 0; + if (vm) { lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 02e8cde4c6b2..9ce94d252015 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -198,7 +198,7 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) } } -int xe_bo_pin_external(struct xe_bo *bo); +int xe_bo_pin_external(struct xe_bo *bo, bool in_place); int xe_bo_pin(struct xe_bo *bo); void xe_bo_unpin_external(struct xe_bo *bo); void xe_bo_unpin(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 346f857f3837..af64baf872ef 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -72,7 +72,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) return ret; } - ret = xe_bo_pin_external(bo); + ret = xe_bo_pin_external(bo, true); xe_assert(xe, !ret); return 0; From d84820309ed34cc412ce76ecfa9471dae7d7d144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 4 Sep 2025 18:07:14 +0200 Subject: [PATCH 1243/1307] drm/xe: Allow the pm notifier to continue on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Its actions are opportunistic anyway and will be completed on device suspend. Marking as a fix to simplify backporting of the fix that follows in the series. v2: - Keep the runtime pm reference over suspend / hibernate and document why. (Matt Auld, Rodrigo Vivi): Fixes: c6a4d46ec1d7 ("drm/xe: evict user memory in PM notifier") Cc: Matthew Auld Cc: Rodrigo Vivi Cc: # v6.16+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250904160715.2613-3-thomas.hellstrom@linux.intel.com (cherry picked from commit ebd546fdffddfcaeab08afdd68ec93052c8fa740) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index e279b47ba03b..c9d5f0a21c48 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -301,17 +301,17 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, case PM_SUSPEND_PREPARE: xe_pm_runtime_get(xe); err = xe_bo_evict_all_user(xe); - if (err) { + if (err) drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); - xe_pm_runtime_put(xe); - break; - } err = xe_bo_notifier_prepare_all_pinned(xe); - if (err) { + if (err) drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); - xe_pm_runtime_put(xe); - } + /* + * Keep the runtime pm reference until post hibernation / post suspend to + * avoid a runtime suspend interfering with evicted objects or backup + * allocations. + */ break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: @@ -320,9 +320,6 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, break; } - if (err) - return NOTIFY_BAD; - return NOTIFY_DONE; } From eb5723a75104605b7d2207a7d598e314166fbef4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 4 Sep 2025 18:07:15 +0200 Subject: [PATCH 1244/1307] drm/xe: Block exec and rebind worker while evicting for suspend / hibernate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the xe pm_notifier evicts for suspend / hibernate, there might be racing tasks trying to re-validate again. This can lead to suspend taking excessive time or get stuck in a live-lock. This behaviour becomes much worse with the fix that actually makes re-validation bring back bos to VRAM rather than letting them remain in TT. Prevent that by having exec and the rebind worker waiting for a completion that is set to block by the pm_notifier before suspend and is signaled by the pm_notifier after resume / wakeup. It's probably still possible to craft malicious applications that block suspending. More work is pending to fix that. v3: - Avoid wait_for_completion() in the kernel worker since it could potentially cause work item flushes from freezable processes to wait forever. Instead terminate the rebind workers if needed and re-launch at resume. (Matt Auld) v4: - Fix some bad naming and leftover debug printouts. - Fix kerneldoc. - Use drmm_mutex_init() for the xe->rebind_resume_lock (Matt Auld). - Rework the interface of xe_vm_rebind_resume_worker (Matt Auld). Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4288 Fixes: c6a4d46ec1d7 ("drm/xe: evict user memory in PM notifier") Cc: Matthew Auld Cc: Rodrigo Vivi Cc: # v6.16+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250904160715.2613-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 599334572a5a99111015fbbd5152ce4dedc2f8b7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 6 ++++ drivers/gpu/drm/xe/xe_exec.c | 9 ++++++ drivers/gpu/drm/xe/xe_pm.c | 25 +++++++++++++++++ drivers/gpu/drm/xe/xe_vm.c | 42 +++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_vm.h | 2 ++ drivers/gpu/drm/xe/xe_vm_types.h | 5 ++++ 6 files changed, 88 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index d4d2c6854790..7ceb0c90f391 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -553,6 +553,12 @@ struct xe_device { /** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */ struct notifier_block pm_notifier; + /** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */ + struct completion pm_block; + /** @rebind_resume_list: List of wq items to kick on resume. */ + struct list_head rebind_resume_list; + /** @rebind_resume_lock: Lock to protect the rebind_resume_list */ + struct mutex rebind_resume_lock; /** @pmt: Support the PMT driver callback interface */ struct { diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 44364c042ad7..374c831e691b 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -237,6 +237,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_unlock_list; } + /* + * It's OK to block interruptible here with the vm lock held, since + * on task freezing during suspend / hibernate, the call will + * return -ERESTARTSYS and the IOCTL will be rerun. + */ + err = wait_for_completion_interruptible(&xe->pm_block); + if (err) + goto err_unlock_list; + vm_exec.vm = &vm->gpuvm; vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; if (xe_vm_in_lr_mode(vm)) { diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index c9d5f0a21c48..bb9b6ecad2af 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -24,6 +24,7 @@ #include "xe_pcode.h" #include "xe_pxp.h" #include "xe_trace.h" +#include "xe_vm.h" #include "xe_wa.h" /** @@ -290,6 +291,19 @@ static u32 vram_threshold_value(struct xe_device *xe) return DEFAULT_VRAM_THRESHOLD; } +static void xe_pm_wake_rebind_workers(struct xe_device *xe) +{ + struct xe_vm *vm, *next; + + mutex_lock(&xe->rebind_resume_lock); + list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, + preempt.pm_activate_link) { + list_del_init(&vm->preempt.pm_activate_link); + xe_vm_resume_rebind_worker(vm); + } + mutex_unlock(&xe->rebind_resume_lock); +} + static int xe_pm_notifier_callback(struct notifier_block *nb, unsigned long action, void *data) { @@ -299,6 +313,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, switch (action) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: + reinit_completion(&xe->pm_block); xe_pm_runtime_get(xe); err = xe_bo_evict_all_user(xe); if (err) @@ -315,6 +330,8 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: + complete_all(&xe->pm_block); + xe_pm_wake_rebind_workers(xe); xe_bo_notifier_unprepare_all_pinned(xe); xe_pm_runtime_put(xe); break; @@ -341,6 +358,14 @@ int xe_pm_init(struct xe_device *xe) if (err) return err; + err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); + if (err) + goto err_unregister; + + init_completion(&xe->pm_block); + complete_all(&xe->pm_block); + INIT_LIST_HEAD(&xe->rebind_resume_list); + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) return 0; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d60c4b115304..dc4f61e56579 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -393,6 +393,9 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, &vm->rebind_list); + if (!try_wait_for_completion(&vm->xe->pm_block)) + return -EAGAIN; + ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); if (ret) return ret; @@ -479,6 +482,33 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } +static bool vm_suspend_rebind_worker(struct xe_vm *vm) +{ + struct xe_device *xe = vm->xe; + bool ret = false; + + mutex_lock(&xe->rebind_resume_lock); + if (!try_wait_for_completion(&vm->xe->pm_block)) { + ret = true; + list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); + } + mutex_unlock(&xe->rebind_resume_lock); + + return ret; +} + +/** + * xe_vm_resume_rebind_worker() - Resume the rebind worker. + * @vm: The vm whose preempt worker to resume. + * + * Resume a preempt worker that was previously suspended by + * vm_suspend_rebind_worker(). + */ +void xe_vm_resume_rebind_worker(struct xe_vm *vm) +{ + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); +} + static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); @@ -502,6 +532,11 @@ static void preempt_rebind_work_func(struct work_struct *w) } retry: + if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { + up_write(&vm->lock); + return; + } + if (xe_vm_userptr_check_repin(vm)) { err = xe_vm_userptr_pin(vm); if (err) @@ -1714,6 +1749,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) if (flags & XE_VM_FLAG_LR_MODE) { INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + INIT_LIST_HEAD(&vm->preempt.pm_activate_link); } if (flags & XE_VM_FLAG_FAULT_MODE) { @@ -1895,8 +1931,12 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_assert(xe, !vm->preempt.num_exec_queues); xe_vm_close(vm); - if (xe_vm_in_preempt_fence_mode(vm)) + if (xe_vm_in_preempt_fence_mode(vm)) { + mutex_lock(&xe->rebind_resume_lock); + list_del_init(&vm->preempt.pm_activate_link); + mutex_unlock(&xe->rebind_resume_lock); flush_work(&vm->preempt.rebind_work); + } if (xe_vm_in_fault_mode(vm)) xe_svm_close(vm); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 2ecb417c19a2..82b112795807 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -273,6 +273,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, struct xe_exec_queue *q, u64 addr, enum xe_cache_level cache_lvl); +void xe_vm_resume_rebind_worker(struct xe_vm *vm); + /** * xe_vm_resv() - Return's the vm's reservation object * @vm: The vm diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 8a07feef503b..6058cf739388 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -293,6 +293,11 @@ struct xe_vm { * BOs */ struct work_struct rebind_work; + /** + * @preempt.pm_activate_link: Link to list of rebind workers to be + * kicked on resume. + */ + struct list_head pm_activate_link; } preempt; /** @um: unified memory state */ From fd99415ec8a80866e5e19f7835876e7b4f294946 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Wed, 3 Sep 2025 12:00:38 -0700 Subject: [PATCH 1245/1307] drm/xe: Extend Wa_13011645652 to PTL-H, WCL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expand workaround to additional graphics architectures. Cc: Vinay Belgaumkar Cc: Stuart Summers Cc: Daniele Ceraolo Spurio Cc: Lucas De Marchi Cc: Thomas Hellström Cc: Rodrigo Vivi Cc: intel-xe@lists.freedesktop.org Cc: # v6.17+ Signed-off-by: Julia Filipchuk Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250903190122.1028373-2-julia.filipchuk@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 6fc957185e1691bb6dfa4193698a229db537c2a2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa_oob.rules | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index e990f20eccfe..710f4423726c 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -30,7 +30,8 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) - GRAPHICS_VERSION(3001) + GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) From 503f1c72c31bbee21e669a08cf65c49e96d42755 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 27 Aug 2025 14:17:36 -0700 Subject: [PATCH 1246/1307] i40e: fix Jumbo Frame support after iPXE boot The i40e hardware has multiple hardware settings which define the Maximum Frame Size (MFS) of the physical port. The firmware has an AdminQ command (0x0603) to configure the MFS, but the i40e Linux driver never issues this command. In most cases this is no problem, as the NVM default value has the device configured for its maximum value of 9728. Unfortunately, recent versions of the iPXE intelxl driver now issue the 0x0603 Set Mac Config command, modifying the MFS and reducing it from its default value of 9728. This occurred as part of iPXE commit 6871a7de705b ("[intelxl] Use admin queue to set port MAC address and maximum frame size"), a prerequisite change for supporting the E800 series hardware in iPXE. Both the E700 and E800 firmware support the AdminQ command, and the iPXE code shares much of the logic between the two device drivers. The ice E800 Linux driver already issues the 0x0603 Set Mac Config command early during probe, and is thus unaffected by the iPXE change. Since commit 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set"), the i40e driver does check the I40E_PRTGL_SAH register, but it only logs a warning message if its value is below the 9728 default. This register also only covers received packets and not transmitted packets. A warning can inform system administrators, but does not correct the issue. No interactions from userspace cause the driver to write to PRTGL_SAH or issue the 0x0603 AdminQ command. Only a GLOBR reset will restore the value to its default value. There is no obvious method to trigger a GLOBR reset from user space. To fix this, introduce the i40e_aq_set_mac_config() function, similar to the one from the ice driver. Call this during early probe to ensure that the device configuration matches driver expectation. Unlike E800, the E700 firmware also has a bit to control whether the MAC should append CRC data. It is on by default, but setting a 0 to this bit would disable CRC. The i40e implementation must set this bit to ensure CRC will be appended by the MAC. In addition to the AQ command, instead of just checking the I40E_PRTGL_SAH register, update its value to the 9728 default and write it back. This ensures that the hardware is in the expected state, regardless of whether the iPXE (or any other early boot driver) has modified this state. This is a better user experience, as we now fix the issues with larger MTU instead of merely warning. It also aligns with the way the ice E800 series driver works. A final note: The Fixes tag provided here is not strictly accurate. The issue occurs as a result of an external entity (the iPXE intelxl driver), and this is not a regression specifically caused by the mentioned change. However, I believe the original change to just warn about PRTGL_SAH being too low was an insufficient fix. Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") Link: https://github.com/ipxe/ipxe/commit/6871a7de705b6f6a4046f0d19da9bcd689c3bc8e Signed-off-by: Jacob Keller Signed-off-by: Aleksandr Loktionov Reviewed-by: Michal Schmidt Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 1 + drivers/net/ethernet/intel/i40e/i40e_common.c | 34 +++++++++++++++++++ drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++++---- .../net/ethernet/intel/i40e/i40e_prototype.h | 2 ++ 4 files changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 76d872b91a38..cc02a85ad42b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1561,6 +1561,7 @@ I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config); struct i40e_aq_set_mac_config { __le16 max_frame_size; u8 params; +#define I40E_AQ_SET_MAC_CONFIG_CRC_EN BIT(2) u8 tx_timer_priority; /* bitmap */ __le16 tx_timer_value; __le16 fc_refresh_threshold; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 270e7e8cf9cf..59f5c1e810eb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1189,6 +1189,40 @@ int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures, return status; } +/** + * i40e_aq_set_mac_config - Configure MAC settings + * @hw: pointer to the hw struct + * @max_frame_size: Maximum Frame Size to be supported by the port + * @cmd_details: pointer to command details structure or NULL + * + * Set MAC configuration (0x0603). Note that max_frame_size must be greater + * than zero. + * + * Return: 0 on success, or a negative error code on failure. + */ +int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_set_mac_config *cmd; + struct libie_aq_desc desc; + + cmd = libie_aq_raw(&desc); + + if (max_frame_size == 0) + return -EINVAL; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_mac_config); + + cmd->max_frame_size = cpu_to_le16(max_frame_size); + cmd->params = I40E_AQ_SET_MAC_CONFIG_CRC_EN; + +#define I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD 0x7FFF + cmd->fc_refresh_threshold = + cpu_to_le16(I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD); + + return i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); +} + /** * i40e_aq_clear_pxe_mode * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index dd21d93d39dd..b14019d44b58 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16045,13 +16045,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_dbg(&pf->pdev->dev, "get supported phy types ret = %pe last_status = %s\n", ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); - /* make sure the MFS hasn't been set lower than the default */ #define MAX_FRAME_SIZE_DEFAULT 0x2600 - val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK, - rd32(&pf->hw, I40E_PRTGL_SAH)); - if (val < MAX_FRAME_SIZE_DEFAULT) - dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", - pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); + + err = i40e_aq_set_mac_config(hw, MAX_FRAME_SIZE_DEFAULT, NULL); + if (err) + dev_warn(&pdev->dev, "set mac config ret = %pe last_status = %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); + + /* Make sure the MFS is set to the expected value */ + val = rd32(hw, I40E_PRTGL_SAH); + FIELD_MODIFY(I40E_PRTGL_SAH_MFS_MASK, &val, MAX_FRAME_SIZE_DEFAULT); + wr32(hw, I40E_PRTGL_SAH, val); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index aef5de53ce3b..26bb7bffe361 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -98,6 +98,8 @@ int i40e_aq_set_mac_loopback(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); int i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask, struct i40e_asq_cmd_details *cmd_details); +int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size, + struct i40e_asq_cmd_details *cmd_details); int i40e_aq_clear_pxe_mode(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); int i40e_aq_set_link_restart_an(struct i40e_hw *hw, From 7838fb5f119191403560eca2e23613380c0e425e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 4 Sep 2025 12:35:05 -0400 Subject: [PATCH 1247/1307] drm/amdgpu: fix a memory leak in fence cleanup when unloading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b61badd20b44 ("drm/amdgpu: fix usage slab after free") reordered when amdgpu_fence_driver_sw_fini() was called after that patch, amdgpu_fence_driver_sw_fini() effectively became a no-op as the sched entities we never freed because the ring pointers were already set to NULL. Remove the NULL setting. Reported-by: Lin.Cao Cc: Vitaly Prosyak Cc: Christian König Fixes: b61badd20b44 ("drm/amdgpu: fix usage slab after free") Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit a525fa37aac36c4591cc8b07ae8957862415fbd5) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6379bb25bf5c..486c3646710c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -421,8 +421,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; - - ring->adev->rings[ring->idx] = NULL; } /** From 1d66c3f2b8c0b5c51f3f4fe29b362c9851190c5a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Sep 2025 09:11:12 -0400 Subject: [PATCH 1248/1307] drm/amd/display: use udelay rather than fsleep This function can be called from an atomic context so we can't use fsleep(). Fixes: 01f60348d8fb ("drm/amd/display: Fix 'failed to blank crtc!'") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4549 Cc: Wen Chen Cc: Fangzhi Zuo Cc: Nicholas Kazlauskas Cc: Harry Wentland Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher (cherry picked from commit 27e4dc2c0543fd1808cc52bd888ee1e0533c4a2e) --- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 3207addbd4eb..b7c2d3095b25 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -955,7 +955,7 @@ enum dc_status dcn20_enable_stream_timing( return DC_ERROR_UNEXPECTED; } - fsleep(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz)); + udelay(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz)); params.vertical_total_min = stream->adjust.v_total_min; params.vertical_total_max = stream->adjust.v_total_max; From 857ccfc19f9be1269716f3d681650c1bd149a656 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Wed, 3 Sep 2025 16:00:24 -0400 Subject: [PATCH 1249/1307] drm/amd/amdgpu: Declare isp firmware binary file Declare isp firmware file isp_4_1_1.bin required by isp4.1.1 device. Suggested-by: Alexey Zagorodnikov Reviewed-by: Mario Limonciello Signed-off-by: Pratap Nirujogi Signed-off-by: Alex Deucher (cherry picked from commit d97b74a833eba1f4f69f67198fd98ef036c0e5f9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index a887df520414..4258d3e0b706 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -29,6 +29,8 @@ #include "amdgpu.h" #include "isp_v4_1_1.h" +MODULE_FIRMWARE("amdgpu/isp_4_1_1.bin"); + #define ISP_PERFORMANCE_STATE_LOW 0 #define ISP_PERFORMANCE_STATE_HIGH 1 From 2b10cb58d7a3fd621ec9b2ba765a092e562ef998 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Mon, 18 Aug 2025 09:06:58 +0200 Subject: [PATCH 1250/1307] drm/amdgpu/vcn4: Fix IB parsing with multiple engine info packages There can be multiple engine info packages in one IB and the first one may be common engine, not decode/encode. We need to parse the entire IB instead of stopping after finding first engine info. Signed-off-by: David Rosca Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit dc8f9f0f45166a6b37864e7a031c726981d6e5fc) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 48 +++++++++++---------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 1924e075b66f..311fb595bd69 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1907,22 +1907,16 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, #define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) - #define RADEON_VCN_ENGINE_INFO (0x30000001) -#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 - #define RENCODE_ENCODE_STANDARD_AV1 2 #define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 -#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 -/* return the offset in ib if id is found, -1 otherwise - * to speed up the searching we only search upto max_offset - */ -static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) +/* return the offset in ib if id is found, -1 otherwise */ +static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start) { int i; - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { + for (i = start; i < ib->length_dw && ib->ptr[i] >= 8; i += ib->ptr[i] / 4) { if (ib->ptr[i + 1] == id) return i; } @@ -1937,33 +1931,29 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_vcn_decode_buffer *decode_buffer; uint64_t addr; uint32_t val; - int idx; + int idx = 0, sidx; /* The first instance can decode anything */ if (!ring->me) return 0; - /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ - idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, - RADEON_VCN_ENGINE_INFO_MAX_OFFSET); - if (idx < 0) /* engine info is missing */ - return 0; + while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) { + val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; - val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; + if (!(decode_buffer->valid_buf_flag & 0x1)) + return 0; - if (!(decode_buffer->valid_buf_flag & 0x1)) - return 0; - - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo; - return vcn_v4_0_dec_msg(p, job, addr); - } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { - idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); - if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) - return vcn_v4_0_limit_sched(p, job); + addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo; + return vcn_v4_0_dec_msg(p, job, addr); + } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { + sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx); + if (sidx >= 0 && ib->ptr[sidx + 2] == RENCODE_ENCODE_STANDARD_AV1) + return vcn_v4_0_limit_sched(p, job); + } + idx += ib->ptr[idx] / 4; } return 0; } From 3318f2d20ce48849855df5e190813826d0bc3653 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Mon, 18 Aug 2025 09:18:37 +0200 Subject: [PATCH 1251/1307] drm/amdgpu/vcn: Allow limiting ctx to instance 0 for AV1 at any time There is no reason to require this to happen on first submitted IB only. We need to wait for the queue to be idle, but it can be done at any time (including when there are multiple video sessions active). Signed-off-by: David Rosca Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit 8908fdce0634a623404e9923ed2f536101a39db5) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 12 ++++++++---- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 4b8f4407047f..2811226b0ea5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1888,15 +1888,19 @@ static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; + struct dma_fence *fence; /* if VCN0 is harvested, we can't support AV1 */ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) return -EINVAL; + /* wait for all jobs to finish before switching to instance 0 */ + fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; drm_sched_entity_modify_sched(job->base.entity, scheds, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 311fb595bd69..706f3b2f484f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1808,15 +1808,19 @@ static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p, struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; + struct dma_fence *fence; /* if VCN0 is harvested, we can't support AV1 */ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) return -EINVAL; + /* wait for all jobs to finish before switching to instance 0 */ + fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] [AMDGPU_RING_PRIO_0].sched; drm_sched_entity_modify_sched(job->base.entity, scheds, 1); From f9bb6ffa7f5ad0f8ee0f53fc4a10655872ee4a14 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 Aug 2025 16:36:56 +0200 Subject: [PATCH 1252/1307] bpf: Fix out-of-bounds dynptr write in bpf_crypto_crypt Stanislav reported that in bpf_crypto_crypt() the destination dynptr's size is not validated to be at least as large as the source dynptr's size before calling into the crypto backend with 'len = src_len'. This can result in an OOB write when the destination is smaller than the source. Concretely, in mentioned function, psrc and pdst are both linear buffers fetched from each dynptr: psrc = __bpf_dynptr_data(src, src_len); [...] pdst = __bpf_dynptr_data_rw(dst, dst_len); [...] err = decrypt ? ctx->type->decrypt(ctx->tfm, psrc, pdst, src_len, piv) : ctx->type->encrypt(ctx->tfm, psrc, pdst, src_len, piv); The crypto backend expects pdst to be large enough with a src_len length that can be written. Add an additional src_len > dst_len check and bail out if it's the case. Note that these kfuncs are accessible under root privileges only. Fixes: 3e1c6f35409f ("bpf: make common crypto API for TC/XDP programs") Reported-by: Stanislav Fort Signed-off-by: Daniel Borkmann Cc: Vadim Fedorenko Reviewed-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20250829143657.318524-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c index 94854cd9c4cc..83c4d9943084 100644 --- a/kernel/bpf/crypto.c +++ b/kernel/bpf/crypto.c @@ -278,7 +278,7 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, siv_len = siv ? __bpf_dynptr_size(siv) : 0; src_len = __bpf_dynptr_size(src); dst_len = __bpf_dynptr_size(dst); - if (!src_len || !dst_len) + if (!src_len || !dst_len || src_len > dst_len) return -EINVAL; if (siv_len != ctx->siv_len) From 3aa9b9a165d5e9afc7d8b5dbcd508810c05c8e89 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 Aug 2025 16:36:57 +0200 Subject: [PATCH 1253/1307] selftests/bpf: Extend crypto_sanity selftest with invalid dst buffer Small cleanup and test extension to probe the bpf_crypto_{encrypt,decrypt}() kfunc when a bad dst buffer is passed in to assert that an error is returned. Also, encrypt_sanity() and skb_crypto_setup() were explicit to set the global status variable to zero before any test, so do the same for decrypt_sanity(). Do not explicitly zero the on-stack err before bpf_crypto_ctx_create() given the kfunc is expected to do it internally for the success case. Before kernel fix: # ./vmtest.sh -- ./test_progs -t crypto [...] [ 1.531200] bpf_testmod: loading out-of-tree module taints kernel. [ 1.533388] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #87/1 crypto_basic/crypto_release:OK #87/2 crypto_basic/crypto_acquire:OK #87 crypto_basic:OK test_crypto_sanity:PASS:skel open 0 nsec test_crypto_sanity:PASS:ip netns add crypto_sanity_ns 0 nsec test_crypto_sanity:PASS:ip -net crypto_sanity_ns -6 addr add face::1/128 dev lo nodad 0 nsec test_crypto_sanity:PASS:ip -net crypto_sanity_ns link set dev lo up 0 nsec test_crypto_sanity:PASS:open_netns 0 nsec test_crypto_sanity:PASS:AF_ALG init fail 0 nsec test_crypto_sanity:PASS:if_nametoindex lo 0 nsec test_crypto_sanity:PASS:skb_crypto_setup fd 0 nsec test_crypto_sanity:PASS:skb_crypto_setup 0 nsec test_crypto_sanity:PASS:skb_crypto_setup retval 0 nsec test_crypto_sanity:PASS:skb_crypto_setup status 0 nsec test_crypto_sanity:PASS:create qdisc hook 0 nsec test_crypto_sanity:PASS:make_sockaddr 0 nsec test_crypto_sanity:PASS:attach encrypt filter 0 nsec test_crypto_sanity:PASS:encrypt socket 0 nsec test_crypto_sanity:PASS:encrypt send 0 nsec test_crypto_sanity:FAIL:encrypt status unexpected error: -5 (errno 95) #88 crypto_sanity:FAIL Summary: 1/2 PASSED, 0 SKIPPED, 1 FAILED After kernel fix: # ./vmtest.sh -- ./test_progs -t crypto [...] [ 1.540963] bpf_testmod: loading out-of-tree module taints kernel. [ 1.542404] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel #87/1 crypto_basic/crypto_release:OK #87/2 crypto_basic/crypto_acquire:OK #87 crypto_basic:OK #88 crypto_sanity:OK Summary: 2/2 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Cc: Vadim Fedorenko Link: https://lore.kernel.org/r/20250829143657.318524-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/crypto_sanity.c | 46 +++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c index 645be6cddf36..dfd8a258f14a 100644 --- a/tools/testing/selftests/bpf/progs/crypto_sanity.c +++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c @@ -14,7 +14,7 @@ unsigned char key[256] = {}; u16 udp_test_port = 7777; u32 authsize, key_len; char algo[128] = {}; -char dst[16] = {}; +char dst[16] = {}, dst_bad[8] = {}; int status; static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc) @@ -59,10 +59,9 @@ int skb_crypto_setup(void *ctx) .authsize = authsize, }; struct bpf_crypto_ctx *cctx; - int err = 0; + int err; status = 0; - if (key_len > 256) { status = -EINVAL; return 0; @@ -70,8 +69,8 @@ int skb_crypto_setup(void *ctx) __builtin_memcpy(¶ms.algo, algo, sizeof(algo)); __builtin_memcpy(¶ms.key, key, sizeof(key)); - cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); + cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); if (!cctx) { status = err; return 0; @@ -80,7 +79,6 @@ int skb_crypto_setup(void *ctx) err = crypto_ctx_insert(cctx); if (err && err != -EEXIST) status = err; - return 0; } @@ -92,6 +90,7 @@ int decrypt_sanity(struct __sk_buff *skb) struct bpf_dynptr psrc, pdst; int err; + status = 0; err = skb_dynptr_validate(skb, &psrc); if (err < 0) { status = err; @@ -110,13 +109,23 @@ int decrypt_sanity(struct __sk_buff *skb) return TC_ACT_SHOT; } - /* dst is a global variable to make testing part easier to check. In real - * production code, a percpu map should be used to store the result. + /* Check also bad case where the dst buffer is smaller than the + * skb's linear section. + */ + bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst); + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); + if (!status) + status = -EIO; + if (status != -EINVAL) + goto err; + + /* dst is a global variable to make testing part easier to check. + * In real production code, a percpu map should be used to store + * the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); - +err: return TC_ACT_SHOT; } @@ -129,7 +138,6 @@ int encrypt_sanity(struct __sk_buff *skb) int err; status = 0; - err = skb_dynptr_validate(skb, &psrc); if (err < 0) { status = err; @@ -148,13 +156,23 @@ int encrypt_sanity(struct __sk_buff *skb) return TC_ACT_SHOT; } - /* dst is a global variable to make testing part easier to check. In real - * production code, a percpu map should be used to store the result. + /* Check also bad case where the dst buffer is smaller than the + * skb's linear section. + */ + bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst); + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); + if (!status) + status = -EIO; + if (status != -EINVAL) + goto err; + + /* dst is a global variable to make testing part easier to check. + * In real production code, a percpu map should be used to store + * the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); - +err: return TC_ACT_SHOT; } From 7edfc024708258d75f65fadffd7e5f6ac46810b6 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Sat, 30 Aug 2025 00:31:58 +0800 Subject: [PATCH 1254/1307] bpf: Fix bpf_strnstr() to handle suffix match cases better bpf_strnstr() should not treat the ending '\0' of s2 as a matching character if the parameter 'len' equal to s2 string length, for example: 1. bpf_strnstr("openat", "open", 4) = -ENOENT 2. bpf_strnstr("openat", "open", 5) = 0 This patch makes (1) return 0, fix just the `len == strlen(s2)` case. And fix a more general case when s2 is a suffix of the first len characters of s1. Fixes: e91370550f1f ("bpf: Add kfuncs for read-only string operations") Signed-off-by: Rong Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/tencent_17DC57B9D16BC443837021BEACE84B7C1507@qq.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6b4877e85a68..b9b0c5fe33f6 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3664,10 +3664,17 @@ __bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { - for (j = 0; i + j < len && j < XATTR_SIZE_MAX; j++) { + for (j = 0; i + j <= len && j < XATTR_SIZE_MAX; j++) { __get_kernel_nofault(&c2, s2__ign + j, char, err_out); if (c2 == '\0') return i; + /* + * We allow reading an extra byte from s2 (note the + * `i + j <= len` above) to cover the case when s2 is + * a suffix of the first len chars of s1. + */ + if (i + j == len) + break; __get_kernel_nofault(&c1, s1__ign + j, char, err_out); if (c1 == '\0') return -ENOENT; From 5d40c038c879eeb910039adeaf6102e1c4dda807 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 29 Aug 2025 04:53:57 +0200 Subject: [PATCH 1255/1307] selftests/bpf: Fix "expression result unused" warnings with icecc icecc is a compiler wrapper that distributes compile jobs over a build farm [1]. It works by sending toolchain binaries and preprocessed source code to remote machines. Unfortunately using it with BPF selftests causes build failures due to a clang bug [2]. The problem is that clang suppresses the -Wunused-value warning if the unused expression comes from a macro expansion. Since icecc compiles preprocessed source code, this information is not available. This leads to -Wunused-value false positives. obj_new_no_struct() and obj_new_acq() use the bpf_obj_new() macro and discard the result. arena_spin_lock_slowpath() uses two macros that produce values and ignores the results. Add (void) casts to explicitly indicate that this is intentional and suppress the warning. An alternative solution is to change the macros to not produce values. This would work today for the arena_spin_lock_slowpath() issue, but in the future there may appear users who need them. Another potential solution is to replace these macros with functions. Unfortunately this would not work, because these macros work with unknown types and control flow. [1] https://github.com/icecc/icecream [2] https://github.com/llvm/llvm-project/issues/142614 Signed-off-by: Ilya Leoshkevich Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20250829030017.102615-2-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h | 4 ++-- tools/testing/selftests/bpf/progs/linked_list_fail.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h index d67466c1ff77..f90531cf3ee5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h +++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h @@ -302,7 +302,7 @@ int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val * barriers. */ if (val & _Q_LOCKED_MASK) - smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); + (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); /* * take ownership and clear the pending bit. @@ -380,7 +380,7 @@ int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); - arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); + (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); /* * While waiting for the MCS lock, the next pointer may have diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c index 6438982b928b..ddd26d1a083f 100644 --- a/tools/testing/selftests/bpf/progs/linked_list_fail.c +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -226,8 +226,7 @@ int obj_new_no_composite(void *ctx) SEC("?tc") int obj_new_no_struct(void *ctx) { - - bpf_obj_new(union { int data; unsigned udata; }); + (void)bpf_obj_new(union { int data; unsigned udata; }); return 0; } @@ -252,7 +251,7 @@ int new_null_ret(void *ctx) SEC("?tc") int obj_new_acq(void *ctx) { - bpf_obj_new(struct foo); + (void)bpf_obj_new(struct foo); return 0; } From 6624fb2f3382271953f951d46f2ea30415a0917e Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Sat, 30 Aug 2025 00:32:13 +0800 Subject: [PATCH 1256/1307] selftests/bpf: Add tests for bpf_strnstr Add tests for bpf_strnstr(): bpf_strnstr("", "", 0) = 0 bpf_strnstr("hello world", "hello", 5) = 0 bpf_strnstr(str, "hello", 4) = -ENOENT bpf_strnstr("", "a", 0) = -ENOENT Signed-off-by: Rong Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/tencent_2ED218F8082565C95D86A804BDDA8DBA200A@qq.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/string_kfuncs_success.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index 46697f381878..a47690174e0e 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -30,8 +30,12 @@ __test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } __test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } __test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } __test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } -__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); } -__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); } +__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); } +__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); } +__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); } +__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); } __test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } char _license[] SEC("license") = "GPL"; From 6c6f5c19e67c89e974ef0dd8601804eaa4ae868d Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 9 Sep 2025 10:16:38 -0700 Subject: [PATCH 1257/1307] bpf: Update the list of BPF selftests maintainers Unfortunately Mykola won't participate in BPF selftests maintenance anymore. Remove the entry on his behalf. Acked-by: Mykola Lysenko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250909171638.2417272-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..6056ad6f1afa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4682,7 +4682,6 @@ F: security/bpf/ BPF [SELFTESTS] (Test Runners & Infrastructure) M: Andrii Nakryiko M: Eduard Zingerman -R: Mykola Lysenko L: bpf@vger.kernel.org S: Maintained F: tools/testing/selftests/bpf/ From 30f241fcf52aaaef7ac16e66530faa11be78a865 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 4 Sep 2025 21:49:07 +0200 Subject: [PATCH 1258/1307] xsk: Fix immature cq descriptor production Eryk reported an issue that I have put under Closes: tag, related to umem addrs being prematurely produced onto pool's completion queue. Let us make the skb's destructor responsible for producing all addrs that given skb used. Commit from fixes tag introduced the buggy behavior, it was not broken from day 1, but rather when xsk multi-buffer got introduced. In order to mitigate performance impact as much as possible, mimic the linear and frag parts within skb by storing the first address from XSK descriptor at sk_buff::destructor_arg. For fragments, store them at ::cb via list. The nodes that will go onto list will be allocated via kmem_cache. xsk_destruct_skb() will consume address stored at ::destructor_arg and optionally go through list from ::cb, if count of descriptors associated with this particular skb is bigger than 1. Previous approach where whole array for storing UMEM addresses from XSK descriptors was pre-allocated during first fragment processing yielded too big performance regression for 64b traffic. In current approach impact is much reduced on my tests and for jumbo frames I observed traffic being slower by at most 9%. Magnus suggested to have this way of processing special cased for XDP_SHARED_UMEM, so we would identify this during bind and set different hooks for 'backpressure mechanism' on CQ and for skb destructor, but given that results looked promising on my side I decided to have a single data path for XSK generic Tx. I suppose other auxiliary stuff would have to land as well in order to make it work. Fixes: b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path") Reported-by: Eryk Kubanski Closes: https://lore.kernel.org/netdev/20250530103456.53564-1-e.kubanski@partner.samsung.com/ Acked-by: Stanislav Fomichev Signed-off-by: Maciej Fijalkowski Tested-by: Jason Xing Reviewed-by: Jason Xing Link: https://lore.kernel.org/r/20250904194907.2342177-1-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- net/xdp/xsk.c | 113 ++++++++++++++++++++++++++++++++++++++------ net/xdp/xsk_queue.h | 12 +++++ 2 files changed, 111 insertions(+), 14 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 9c3acecc14b1..72e34bd2d925 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -36,6 +36,20 @@ #define TX_BATCH_SIZE 32 #define MAX_PER_SOCKET_BUDGET 32 +struct xsk_addr_node { + u64 addr; + struct list_head addr_node; +}; + +struct xsk_addr_head { + u32 num_descs; + struct list_head addrs_list; +}; + +static struct kmem_cache *xsk_tx_generic_cache; + +#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb)) + void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { if (pool->cached_need_wakeup & XDP_WAKEUP_RX) @@ -532,24 +546,43 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags) return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); } -static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr) +static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool) { unsigned long flags; int ret; spin_lock_irqsave(&pool->cq_lock, flags); - ret = xskq_prod_reserve_addr(pool->cq, addr); + ret = xskq_prod_reserve(pool->cq); spin_unlock_irqrestore(&pool->cq_lock, flags); return ret; } -static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n) +static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, + struct sk_buff *skb) { + struct xsk_addr_node *pos, *tmp; + u32 descs_processed = 0; unsigned long flags; + u32 idx; spin_lock_irqsave(&pool->cq_lock, flags); - xskq_prod_submit_n(pool->cq, n); + idx = xskq_get_prod(pool->cq); + + xskq_prod_write_addr(pool->cq, idx, + (u64)(uintptr_t)skb_shinfo(skb)->destructor_arg); + descs_processed++; + + if (unlikely(XSKCB(skb)->num_descs > 1)) { + list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) { + xskq_prod_write_addr(pool->cq, idx + descs_processed, + pos->addr); + descs_processed++; + list_del(&pos->addr_node); + kmem_cache_free(xsk_tx_generic_cache, pos); + } + } + xskq_prod_submit_n(pool->cq, descs_processed); spin_unlock_irqrestore(&pool->cq_lock, flags); } @@ -562,9 +595,14 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n) spin_unlock_irqrestore(&pool->cq_lock, flags); } +static void xsk_inc_num_desc(struct sk_buff *skb) +{ + XSKCB(skb)->num_descs++; +} + static u32 xsk_get_num_desc(struct sk_buff *skb) { - return skb ? (long)skb_shinfo(skb)->destructor_arg : 0; + return XSKCB(skb)->num_descs; } static void xsk_destruct_skb(struct sk_buff *skb) @@ -576,23 +614,33 @@ static void xsk_destruct_skb(struct sk_buff *skb) *compl->tx_timestamp = ktime_get_tai_fast_ns(); } - xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb)); + xsk_cq_submit_addr_locked(xdp_sk(skb->sk)->pool, skb); sock_wfree(skb); } -static void xsk_set_destructor_arg(struct sk_buff *skb) +static void xsk_set_destructor_arg(struct sk_buff *skb, u64 addr) { - long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1; - - skb_shinfo(skb)->destructor_arg = (void *)num; + BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb)); + INIT_LIST_HEAD(&XSKCB(skb)->addrs_list); + XSKCB(skb)->num_descs = 0; + skb_shinfo(skb)->destructor_arg = (void *)(uintptr_t)addr; } static void xsk_consume_skb(struct sk_buff *skb) { struct xdp_sock *xs = xdp_sk(skb->sk); + u32 num_descs = xsk_get_num_desc(skb); + struct xsk_addr_node *pos, *tmp; + + if (unlikely(num_descs > 1)) { + list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) { + list_del(&pos->addr_node); + kmem_cache_free(xsk_tx_generic_cache, pos); + } + } skb->destructor = sock_wfree; - xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb)); + xsk_cq_cancel_locked(xs->pool, num_descs); /* Free skb without triggering the perf drop trace */ consume_skb(skb); xs->skb = NULL; @@ -609,6 +657,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, { struct xsk_buff_pool *pool = xs->pool; u32 hr, len, ts, offset, copy, copied; + struct xsk_addr_node *xsk_addr; struct sk_buff *skb = xs->skb; struct page *page; void *buffer; @@ -623,6 +672,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, return ERR_PTR(err); skb_reserve(skb, hr); + + xsk_set_destructor_arg(skb, desc->addr); + } else { + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); + if (!xsk_addr) + return ERR_PTR(-ENOMEM); + + /* in case of -EOVERFLOW that could happen below, + * xsk_consume_skb() will release this node as whole skb + * would be dropped, which implies freeing all list elements + */ + xsk_addr->addr = desc->addr; + list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); } addr = desc->addr; @@ -694,8 +756,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, err = skb_store_bits(skb, 0, buffer, len); if (unlikely(err)) goto free_err; + + xsk_set_destructor_arg(skb, desc->addr); } else { int nr_frags = skb_shinfo(skb)->nr_frags; + struct xsk_addr_node *xsk_addr; struct page *page; u8 *vaddr; @@ -710,12 +775,22 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, goto free_err; } + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); + if (!xsk_addr) { + __free_page(page); + err = -ENOMEM; + goto free_err; + } + vaddr = kmap_local_page(page); memcpy(vaddr, buffer, len); kunmap_local(vaddr); skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); + + xsk_addr->addr = desc->addr; + list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); } if (first_frag && desc->options & XDP_TX_METADATA) { @@ -759,7 +834,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); - xsk_set_destructor_arg(skb); + xsk_inc_num_desc(skb); return skb; @@ -769,7 +844,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, if (err == -EOVERFLOW) { /* Drop the packet */ - xsk_set_destructor_arg(xs->skb); + xsk_inc_num_desc(xs->skb); xsk_drop_skb(xs->skb); xskq_cons_release(xs->tx); } else { @@ -812,7 +887,7 @@ static int __xsk_generic_xmit(struct sock *sk) * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ - err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr); + err = xsk_cq_reserve_locked(xs->pool); if (err) { err = -EAGAIN; goto out; @@ -1815,8 +1890,18 @@ static int __init xsk_init(void) if (err) goto out_pernet; + xsk_tx_generic_cache = kmem_cache_create("xsk_generic_xmit_cache", + sizeof(struct xsk_addr_node), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!xsk_tx_generic_cache) { + err = -ENOMEM; + goto out_unreg_notif; + } + return 0; +out_unreg_notif: + unregister_netdevice_notifier(&xsk_netdev_notifier); out_pernet: unregister_pernet_subsys(&xsk_net_ops); out_sk: diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 46d87e961ad6..f16f390370dc 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -344,6 +344,11 @@ static inline u32 xskq_cons_present_entries(struct xsk_queue *q) /* Functions for producers */ +static inline u32 xskq_get_prod(struct xsk_queue *q) +{ + return READ_ONCE(q->ring->producer); +} + static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max) { u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); @@ -390,6 +395,13 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) return 0; } +static inline void xskq_prod_write_addr(struct xsk_queue *q, u32 idx, u64 addr) +{ + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; + + ring->desc[idx & q->ring_mask] = addr; +} + static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs, u32 nb_entries) { From 0d80e7f951be1bdd08d328fd87694be0d6e8aaa8 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 9 Sep 2025 18:49:59 +0000 Subject: [PATCH 1259/1307] rqspinlock: Choose trylock fallback for NMI waiters Currently, out of all 3 types of waiters in the rqspinlock slow path (i.e., pending bit waiter, wait queue head waiter, and wait queue non-head waiter), only the pending bit waiter and wait queue head waiters apply deadlock checks and a timeout on their waiting loop. The assumption here was that the wait queue head's forward progress would be sufficient to identify cases where the lock owner or pending bit waiter is stuck, and non-head waiters relying on the head waiter would prove to be sufficient for their own forward progress. However, the head waiter itself can be preempted by a non-head waiter for the same lock (AA) or a different lock (ABBA) in a manner that impedes its forward progress. In such a case, non-head waiters not performing deadlock and timeout checks becomes insufficient, and the system can enter a state of lockup. This is typically not a concern with non-NMI lock acquisitions, as lock holders which in run in different contexts (IRQ, non-IRQ) use "irqsave" variants of the lock APIs, which naturally excludes such lock holders from preempting one another on the same CPU. It might seem likely that a similar case may occur for rqspinlock when programs are attached to contention tracepoints (begin, end), however, these tracepoints either precede the enqueue into the wait queue, or succeed it, therefore cannot be used to preempt a head waiter's waiting loop. We must still be careful against nested kprobe and fentry programs that may attach to the middle of the head's waiting loop to stall forward progress and invoke another rqspinlock acquisition that proceeds as a non-head waiter. To this end, drop CC_FLAGS_FTRACE from the rqspinlock.o object file. For now, this issue is resolved by falling back to a repeated trylock on the lock word from NMI context, while performing the deadlock checks to break out early in case forward progress is impossible, and use the timeout as a final fallback. A more involved fix to terminate the queue when such a condition occurs will be made as a follow up. A selftest to stress this aspect of nested NMI/non-NMI locking attempts will be added in a subsequent patch to the bpf-next tree when this fix lands and trees are synchronized. Reported-by: Josef Bacik Fixes: 164c246571e9 ("rqspinlock: Protect waiters in queue from stalls") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250909184959.3509085-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/Makefile | 1 + kernel/bpf/rqspinlock.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 269c04a24664..f6cf8c2af5f7 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -62,3 +62,4 @@ CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_queue_stack_maps.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_lpm_trie.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_ringbuf.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rqspinlock.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c index 5ab354d55d82..a00561b1d3e5 100644 --- a/kernel/bpf/rqspinlock.c +++ b/kernel/bpf/rqspinlock.c @@ -471,7 +471,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) * any MCS node. This is not the most elegant solution, but is * simple enough. */ - if (unlikely(idx >= _Q_MAX_NODES)) { + if (unlikely(idx >= _Q_MAX_NODES || in_nmi())) { lockevent_inc(lock_no_node); RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); while (!queued_spin_trylock(lock)) { From df0cb5cb50bd54d3cd4d0d83417ceec6a66404aa Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Tue, 9 Sep 2025 22:46:14 +0800 Subject: [PATCH 1260/1307] bpf: Allow fall back to interpreter for programs with stack size <= 512 OpenWRT users reported regression on ARMv6 devices after updating to latest HEAD, where tcpdump filter: tcpdump "not ether host 3c37121a2b3c and not ether host 184ecbca2a3a \ and not ether host 14130b4d3f47 and not ether host f0f61cf440b7 \ and not ether host a84b4dedf471 and not ether host d022be17e1d7 \ and not ether host 5c497967208b and not ether host 706655784d5b" fails with warning: "Kernel filter failed: No error information" when using config: # CONFIG_BPF_JIT_ALWAYS_ON is not set CONFIG_BPF_JIT_DEFAULT_ON=y The issue arises because commits: 1. "bpf: Fix array bounds error with may_goto" changed default runtime to __bpf_prog_ret0_warn when jit_requested = 1 2. "bpf: Avoid __bpf_prog_ret0_warn when jit fails" returns error when jit_requested = 1 but jit fails This change restores interpreter fallback capability for BPF programs with stack size <= 512 bytes when jit fails. Reported-by: Felix Fietkau Closes: https://lore.kernel.org/bpf/2e267b4b-0540-45d8-9310-e127bf95fc63@nbd.name/ Fixes: 6ebc5030e0c5 ("bpf: Fix array bounds error with may_goto") Signed-off-by: KaFai Wan Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250909144614.2991253-1-kafai.wan@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f8ac77d08ca7..e4568d44e827 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2366,8 +2366,7 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, const struct bpf_insn *insn) { /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON - * is not working properly, or interpreter is being used when - * prog->jit_requested is not 0, so warn about it! + * is not working properly, so warn about it! */ WARN_ON_ONCE(1); return 0; @@ -2468,8 +2467,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) return ret; } -static void bpf_prog_select_func(struct bpf_prog *fp) +static bool bpf_prog_select_interpreter(struct bpf_prog *fp) { + bool select_interpreter = false; #ifndef CONFIG_BPF_JIT_ALWAYS_ON u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); u32 idx = (round_up(stack_depth, 32) / 32) - 1; @@ -2478,15 +2478,16 @@ static void bpf_prog_select_func(struct bpf_prog *fp) * But for non-JITed programs, we don't need bpf_func, so no bounds * check needed. */ - if (!fp->jit_requested && - !WARN_ON_ONCE(idx >= ARRAY_SIZE(interpreters))) { + if (idx < ARRAY_SIZE(interpreters)) { fp->bpf_func = interpreters[idx]; + select_interpreter = true; } else { fp->bpf_func = __bpf_prog_ret0_warn; } #else fp->bpf_func = __bpf_prog_ret0_warn; #endif + return select_interpreter; } /** @@ -2505,7 +2506,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) /* In case of BPF to BPF calls, verifier did all the prep * work with regards to JITing, etc. */ - bool jit_needed = fp->jit_requested; + bool jit_needed = false; if (fp->bpf_func) goto finalize; @@ -2514,7 +2515,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) bpf_prog_has_kfunc_call(fp)) jit_needed = true; - bpf_prog_select_func(fp); + if (!bpf_prog_select_interpreter(fp)) + jit_needed = true; /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during From 6d78b4473cdb08b74662355a9e8510bde09c511e Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 9 Sep 2025 09:52:20 +0000 Subject: [PATCH 1261/1307] bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init() Currently, calling bpf_map_kmalloc_node() from __bpf_async_init() can cause various locking issues; see the following stack trace (edited for style) as one example: ... [10.011566] do_raw_spin_lock.cold [10.011570] try_to_wake_up (5) double-acquiring the same [10.011575] kick_pool rq_lock, causing a hardlockup [10.011579] __queue_work [10.011582] queue_work_on [10.011585] kernfs_notify [10.011589] cgroup_file_notify [10.011593] try_charge_memcg (4) memcg accounting raises an [10.011597] obj_cgroup_charge_pages MEMCG_MAX event [10.011599] obj_cgroup_charge_account [10.011600] __memcg_slab_post_alloc_hook [10.011603] __kmalloc_node_noprof ... [10.011611] bpf_map_kmalloc_node [10.011612] __bpf_async_init [10.011615] bpf_timer_init (3) BPF calls bpf_timer_init() [10.011617] bpf_prog_xxxxxxxxxxxxxxxx_fcg_runnable [10.011619] bpf__sched_ext_ops_runnable [10.011620] enqueue_task_scx (2) BPF runs with rq_lock held [10.011622] enqueue_task [10.011626] ttwu_do_activate [10.011629] sched_ttwu_pending (1) grabs rq_lock ... The above was reproduced on bpf-next (b338cf849ec8) by modifying ./tools/sched_ext/scx_flatcg.bpf.c to call bpf_timer_init() during ops.runnable(), and hacking the memcg accounting code a bit to make a bpf_timer_init() call more likely to raise an MEMCG_MAX event. We have also run into other similar variants (both internally and on bpf-next), including double-acquiring cgroup_file_kn_lock, the same worker_pool::lock, etc. As suggested by Shakeel, fix this by using __GFP_HIGH instead of GFP_ATOMIC in __bpf_async_init(), so that e.g. if try_charge_memcg() raises an MEMCG_MAX event, we call __memcg_memory_event() with @allow_spinning=false and avoid calling cgroup_file_notify() there. Depends on mm patch "memcg: skip cgroup_file_notify if spinning is not allowed": https://lore.kernel.org/bpf/20250905201606.66198-1-shakeel.butt@linux.dev/ v0 approach s/bpf_map_kmalloc_node/bpf_mem_alloc/ https://lore.kernel.org/bpf/20250905061919.439648-1-yepeilin@google.com/ v1 approach: https://lore.kernel.org/bpf/20250905234547.862249-1-yepeilin@google.com/ Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") Suggested-by: Shakeel Butt Signed-off-by: Peilin Ye Link: https://lore.kernel.org/r/20250909095222.2121438-1-yepeilin@google.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b9b0c5fe33f6..8af62cb243d9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1274,8 +1274,11 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u goto out; } - /* allocate hrtimer via map_kmalloc to use memcg accounting */ - cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node); + /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until + * kmalloc_nolock() is available, avoid locking issues by using + * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM). + */ + cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); if (!cb) { ret = -ENOMEM; goto out; From 90f7c100d2dd99d5cd5be950d553edd2647e6cc8 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sat, 6 Sep 2025 21:19:29 -0300 Subject: [PATCH 1262/1307] smb: client: fix compound alignment with encryption The encryption layer can't handle the padding iovs, so flatten the compound request into a single buffer with required padding to prevent the server from dropping the connection when finding unaligned compound requests. Fixes: bc925c1216f0 ("smb: client: improve compound padding in encryption") Signed-off-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Cc: linux-cifs@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 94b1d7a395d5..9f9955d274cf 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2640,13 +2640,35 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst) } /* SMB headers in a compound are 8 byte aligned. */ - if (!IS_ALIGNED(len, 8)) { - num_padding = 8 - (len & 7); + if (IS_ALIGNED(len, 8)) + goto out; + + num_padding = 8 - (len & 7); + if (smb3_encryption_required(tcon)) { + int i; + + /* + * Flatten request into a single buffer with required padding as + * the encryption layer can't handle the padding iovs. + */ + for (i = 1; i < rqst->rq_nvec; i++) { + memcpy(rqst->rq_iov[0].iov_base + + rqst->rq_iov[0].iov_len, + rqst->rq_iov[i].iov_base, + rqst->rq_iov[i].iov_len); + rqst->rq_iov[0].iov_len += rqst->rq_iov[i].iov_len; + } + memset(rqst->rq_iov[0].iov_base + rqst->rq_iov[0].iov_len, + 0, num_padding); + rqst->rq_iov[0].iov_len += num_padding; + rqst->rq_nvec = 1; + } else { rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding; rqst->rq_iov[rqst->rq_nvec].iov_len = num_padding; rqst->rq_nvec++; - len += num_padding; } + len += num_padding; +out: shdr->NextCommand = cpu_to_le32(len); } From e0d1c55501d377163eb57feed863777ed1c973ad Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 7 Sep 2025 21:44:01 +0100 Subject: [PATCH 1263/1307] net: phy: fix phy_uses_state_machine() The blamed commit changed the conditions which phylib uses to stop and start the state machine in the suspend and resume paths, and while improving it, has caused two issues. The original code used this test: phydev->attached_dev && phydev->adjust_link and if true, the paths would handle the PHY state machine. This test evaluates true for normal drivers that are using phylib directly while the PHY is attached to the network device, but false in all other cases, which include the following cases: - when the PHY has never been attached to a network device. - when the PHY has been detached from a network device (as phy_detach() sets phydev->attached_dev to NULL, phy_disconnect() calls phy_detach() and additionally sets phydev->adjust_link NULL.) - when phylink is using the driver (as phydev->adjust_link is NULL.) Only the third case was incorrect, and the blamed commit attempted to fix this by changing this test to (simplified for brevity, see phy_uses_state_machine()): phydev->phy_link_change == phy_link_change ? phydev->attached_dev && phydev->adjust_link : true However, this also incorrectly evaluates true in the first two cases. Fix the first case by ensuring that phy_uses_state_machine() returns false when phydev->phy_link_change is NULL. Fix the second case by ensuring that phydev->phy_link_change is set to NULL when phy_detach() is called. Reported-by: Xu Yang Link: https://lore.kernel.org/r/20250806082931.3289134-1-xu.yang_2@nxp.com Fixes: fc75ea20ffb4 ("net: phy: allow MDIO bus PM ops to start/stop state machine for phylink-controlled PHY") Signed-off-by: Russell King (Oracle) Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1uvMEz-00000003Aoe-3qWe@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7556aa3dd7ee..c82c1997147b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -287,8 +287,7 @@ static bool phy_uses_state_machine(struct phy_device *phydev) if (phydev->phy_link_change == phy_link_change) return phydev->attached_dev && phydev->adjust_link; - /* phydev->phy_link_change is implicitly phylink_phy_change() */ - return true; + return !!phydev->phy_link_change; } static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) @@ -1864,6 +1863,8 @@ void phy_detach(struct phy_device *phydev) phydev->attached_dev = NULL; phy_link_topo_del_phy(dev, phydev); } + + phydev->phy_link_change = NULL; phydev->phylink = NULL; if (!phydev->is_on_sfp_module) From c5ea3065586d790ea5193a679b85585173d59866 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sun, 7 Sep 2025 21:24:06 -0300 Subject: [PATCH 1264/1307] smb: client: fix data loss due to broken rename(2) Rename of open files in SMB2+ has been broken for a very long time, resulting in data loss as the CIFS client would fail the rename(2) call with -ENOENT and then removing the target file. Fix this by implementing ->rename_pending_delete() for SMB2+, which will rename busy files to random filenames (e.g. silly rename) during unlink(2) or rename(2), and then marking them to delete-on-close. Besides, introduce a FIND_WR_NO_PENDING_DELETE flag to prevent open(2) from reusing open handles that had been marked as delete pending. Handle it in cifs_get_readable_path() as well. Reported-by: Jean-Baptiste Denis Closes: https://marc.info/?i=16aeb380-30d4-4551-9134-4e7d1dc833c0@pasteur.fr Reviewed-by: David Howells Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Cc: Frank Sorenson Cc: Olga Kornievskaia Cc: Benjamin Coddington Cc: Scott Mayhew Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 13 ++- fs/smb/client/file.c | 18 +++- fs/smb/client/inode.c | 86 ++++++++++++---- fs/smb/client/smb2glob.h | 3 +- fs/smb/client/smb2inode.c | 204 ++++++++++++++++++++++++++++++-------- fs/smb/client/smb2ops.c | 4 + fs/smb/client/smb2proto.h | 3 + fs/smb/client/trace.h | 9 +- 8 files changed, 268 insertions(+), 72 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 1e64a4fb6af0..0fae95cf81c4 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -87,7 +87,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 7 +#define MAX_COMPOUND 10 /* * Default number of credits to keep available for SMB3. @@ -1882,9 +1882,12 @@ static inline bool is_replayable_error(int error) /* cifs_get_writable_file() flags */ -#define FIND_WR_ANY 0 -#define FIND_WR_FSUID_ONLY 1 -#define FIND_WR_WITH_DELETE 2 +enum cifs_writable_file_flags { + FIND_WR_ANY = 0U, + FIND_WR_FSUID_ONLY = (1U << 0), + FIND_WR_WITH_DELETE = (1U << 1), + FIND_WR_NO_PENDING_DELETE = (1U << 2), +}; #define MID_FREE 0 #define MID_REQUEST_ALLOCATED 1 @@ -2343,6 +2346,8 @@ struct smb2_compound_vars { struct kvec qi_iov; struct kvec io_iov[SMB2_IOCTL_IOV_SIZE]; struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE]; + struct kvec unlink_iov[SMB2_SET_INFO_IOV_SIZE]; + struct kvec rename_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec close_iov; struct smb2_file_rename_info_hdr rename_info; struct smb2_file_link_info_hdr link_info; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 186e061068be..cb907e18cc35 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -998,7 +998,10 @@ int cifs_open(struct inode *inode, struct file *file) /* Get the cached handle as SMB2 close is deferred */ if (OPEN_FMODE(file->f_flags) & FMODE_WRITE) { - rc = cifs_get_writable_path(tcon, full_path, FIND_WR_FSUID_ONLY, &cfile); + rc = cifs_get_writable_path(tcon, full_path, + FIND_WR_FSUID_ONLY | + FIND_WR_NO_PENDING_DELETE, + &cfile); } else { rc = cifs_get_readable_path(tcon, full_path, &cfile); } @@ -2530,6 +2533,9 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, continue; if (with_delete && !(open_file->fid.access & DELETE)) continue; + if ((flags & FIND_WR_NO_PENDING_DELETE) && + open_file->status_file_deleted) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ @@ -2647,6 +2653,16 @@ cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, spin_unlock(&tcon->open_file_lock); free_dentry_path(page); *ret_file = find_readable_file(cinode, 0); + if (*ret_file) { + spin_lock(&cinode->open_file_lock); + if ((*ret_file)->status_file_deleted) { + spin_unlock(&cinode->open_file_lock); + cifsFileInfo_put(*ret_file); + *ret_file = NULL; + } else { + spin_unlock(&cinode->open_file_lock); + } + } return *ret_file ? 0 : -ENOENT; } diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index fe453a4b3dc8..11d442e8b3d6 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1931,7 +1931,7 @@ cifs_drop_nlink(struct inode *inode) * but will return the EACCES to the caller. Note that the VFS does not call * unlink on negative dentries currently. */ -int cifs_unlink(struct inode *dir, struct dentry *dentry) +static int __cifs_unlink(struct inode *dir, struct dentry *dentry, bool sillyrename) { int rc = 0; unsigned int xid; @@ -2003,7 +2003,11 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) goto psx_del_no_retry; } - rc = server->ops->unlink(xid, tcon, full_path, cifs_sb, dentry); + if (sillyrename || (server->vals->protocol_id > SMB10_PROT_ID && + d_is_positive(dentry) && d_count(dentry) > 2)) + rc = -EBUSY; + else + rc = server->ops->unlink(xid, tcon, full_path, cifs_sb, dentry); psx_del_no_retry: if (!rc) { @@ -2071,6 +2075,11 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) return rc; } +int cifs_unlink(struct inode *dir, struct dentry *dentry) +{ + return __cifs_unlink(dir, dentry, false); +} + static int cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode, const char *full_path, struct cifs_sb_info *cifs_sb, @@ -2358,14 +2367,16 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) rc = server->ops->rmdir(xid, tcon, full_path, cifs_sb); cifs_put_tlink(tlink); + cifsInode = CIFS_I(d_inode(direntry)); + if (!rc) { + set_bit(CIFS_INO_DELETE_PENDING, &cifsInode->flags); spin_lock(&d_inode(direntry)->i_lock); i_size_write(d_inode(direntry), 0); clear_nlink(d_inode(direntry)); spin_unlock(&d_inode(direntry)->i_lock); } - cifsInode = CIFS_I(d_inode(direntry)); /* force revalidate to go get info when needed */ cifsInode->time = 0; @@ -2458,8 +2469,11 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ do_rename_exit: - if (rc == 0) + if (rc == 0) { d_move(from_dentry, to_dentry); + /* Force a new lookup */ + d_drop(from_dentry); + } cifs_put_tlink(tlink); return rc; } @@ -2470,6 +2484,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, struct dentry *target_dentry, unsigned int flags) { const char *from_name, *to_name; + struct TCP_Server_Info *server; void *page1, *page2; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; @@ -2505,6 +2520,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, if (IS_ERR(tlink)) return PTR_ERR(tlink); tcon = tlink_tcon(tlink); + server = tcon->ses->server; page1 = alloc_dentry_path(); page2 = alloc_dentry_path(); @@ -2591,19 +2607,53 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, unlink_target: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ - - /* Try unlinking the target dentry if it's not negative */ - if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) { - if (d_is_dir(target_dentry)) - tmprc = cifs_rmdir(target_dir, target_dentry); - else - tmprc = cifs_unlink(target_dir, target_dentry); - if (tmprc) - goto cifs_rename_exit; - rc = cifs_do_rename(xid, source_dentry, from_name, - target_dentry, to_name); - if (!rc) - rehash = false; + if (d_really_is_positive(target_dentry)) { + if (!rc) { + struct inode *inode = d_inode(target_dentry); + /* + * Samba and ksmbd servers allow renaming a target + * directory that is open, so make sure to update + * ->i_nlink and then mark it as delete pending. + */ + if (S_ISDIR(inode->i_mode)) { + drop_cached_dir_by_name(xid, tcon, to_name, cifs_sb); + spin_lock(&inode->i_lock); + i_size_write(inode, 0); + clear_nlink(inode); + spin_unlock(&inode->i_lock); + set_bit(CIFS_INO_DELETE_PENDING, &CIFS_I(inode)->flags); + CIFS_I(inode)->time = 0; /* force reval */ + inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); + } + } else if (rc == -EACCES || rc == -EEXIST) { + /* + * Rename failed, possibly due to a busy target. + * Retry it by unliking the target first. + */ + if (d_is_dir(target_dentry)) { + tmprc = cifs_rmdir(target_dir, target_dentry); + } else { + tmprc = __cifs_unlink(target_dir, target_dentry, + server->vals->protocol_id > SMB10_PROT_ID); + } + if (tmprc) { + /* + * Some servers will return STATUS_ACCESS_DENIED + * or STATUS_DIRECTORY_NOT_EMPTY when failing to + * rename a non-empty directory. Make sure to + * propagate the appropriate error back to + * userspace. + */ + if (tmprc == -EEXIST || tmprc == -ENOTEMPTY) + rc = tmprc; + goto cifs_rename_exit; + } + rc = cifs_do_rename(xid, source_dentry, from_name, + target_dentry, to_name); + if (!rc) + rehash = false; + } } /* force revalidate to go get info when needed */ @@ -2629,6 +2679,8 @@ cifs_dentry_needs_reval(struct dentry *dentry) struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct cached_fid *cfid = NULL; + if (test_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags)) + return false; if (cifs_i->time == 0) return true; diff --git a/fs/smb/client/smb2glob.h b/fs/smb/client/smb2glob.h index 224495322a05..e56e4d402f13 100644 --- a/fs/smb/client/smb2glob.h +++ b/fs/smb/client/smb2glob.h @@ -30,10 +30,9 @@ enum smb2_compound_ops { SMB2_OP_QUERY_DIR, SMB2_OP_MKDIR, SMB2_OP_RENAME, - SMB2_OP_DELETE, SMB2_OP_HARDLINK, SMB2_OP_SET_EOF, - SMB2_OP_RMDIR, + SMB2_OP_UNLINK, SMB2_OP_POSIX_QUERY_INFO, SMB2_OP_SET_REPARSE, SMB2_OP_GET_REPARSE, diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 31c13fb5b85b..7cadc8ca4f55 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -346,9 +346,6 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, trace_smb3_posix_query_info_compound_enter(xid, tcon->tid, ses->Suid, full_path); break; - case SMB2_OP_DELETE: - trace_smb3_delete_enter(xid, tcon->tid, ses->Suid, full_path); - break; case SMB2_OP_MKDIR: /* * Directories are created through parameters in the @@ -356,23 +353,40 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, */ trace_smb3_mkdir_enter(xid, tcon->tid, ses->Suid, full_path); break; - case SMB2_OP_RMDIR: - rqst[num_rqst].rq_iov = &vars->si_iov[0]; + case SMB2_OP_UNLINK: + rqst[num_rqst].rq_iov = vars->unlink_iov; rqst[num_rqst].rq_nvec = 1; size[0] = 1; /* sizeof __u8 See MS-FSCC section 2.4.11 */ data[0] = &delete_pending[0]; - rc = SMB2_set_info_init(tcon, server, - &rqst[num_rqst], COMPOUND_FID, - COMPOUND_FID, current->tgid, - FILE_DISPOSITION_INFORMATION, - SMB2_O_INFO_FILE, 0, data, size); - if (rc) + if (cfile) { + rc = SMB2_set_info_init(tcon, server, + &rqst[num_rqst], + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, + FILE_DISPOSITION_INFORMATION, + SMB2_O_INFO_FILE, 0, + data, size); + } else { + rc = SMB2_set_info_init(tcon, server, + &rqst[num_rqst], + COMPOUND_FID, + COMPOUND_FID, + current->tgid, + FILE_DISPOSITION_INFORMATION, + SMB2_O_INFO_FILE, 0, + data, size); + } + if (!rc && (!cfile || num_rqst > 1)) { + smb2_set_next_command(tcon, &rqst[num_rqst]); + smb2_set_related(&rqst[num_rqst]); + } else if (rc) { goto finished; - smb2_set_next_command(tcon, &rqst[num_rqst]); - smb2_set_related(&rqst[num_rqst++]); - trace_smb3_rmdir_enter(xid, tcon->tid, ses->Suid, full_path); + } + num_rqst++; + trace_smb3_unlink_enter(xid, tcon->tid, ses->Suid, full_path); break; case SMB2_OP_SET_EOF: rqst[num_rqst].rq_iov = &vars->si_iov[0]; @@ -442,7 +456,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, ses->Suid, full_path); break; case SMB2_OP_RENAME: - rqst[num_rqst].rq_iov = &vars->si_iov[0]; + rqst[num_rqst].rq_iov = vars->rename_iov; rqst[num_rqst].rq_nvec = 2; len = in_iov[i].iov_len; @@ -732,19 +746,6 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, trace_smb3_posix_query_info_compound_done(xid, tcon->tid, ses->Suid); break; - case SMB2_OP_DELETE: - if (rc) - trace_smb3_delete_err(xid, tcon->tid, ses->Suid, rc); - else { - /* - * If dentry (hence, inode) is NULL, lease break is going to - * take care of degrading leases on handles for deleted files. - */ - if (inode) - cifs_mark_open_handles_for_deleted_file(inode, full_path); - trace_smb3_delete_done(xid, tcon->tid, ses->Suid); - } - break; case SMB2_OP_MKDIR: if (rc) trace_smb3_mkdir_err(xid, tcon->tid, ses->Suid, rc); @@ -765,11 +766,11 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, trace_smb3_rename_done(xid, tcon->tid, ses->Suid); SMB2_set_info_free(&rqst[num_rqst++]); break; - case SMB2_OP_RMDIR: - if (rc) - trace_smb3_rmdir_err(xid, tcon->tid, ses->Suid, rc); + case SMB2_OP_UNLINK: + if (!rc) + trace_smb3_unlink_done(xid, tcon->tid, ses->Suid); else - trace_smb3_rmdir_done(xid, tcon->tid, ses->Suid); + trace_smb3_unlink_err(xid, tcon->tid, ses->Suid, rc); SMB2_set_info_free(&rqst[num_rqst++]); break; case SMB2_OP_SET_EOF: @@ -1166,7 +1167,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE); return smb2_compound_op(xid, tcon, cifs_sb, name, &oparms, NULL, - &(int){SMB2_OP_RMDIR}, 1, + &(int){SMB2_OP_UNLINK}, 1, NULL, NULL, NULL, NULL); } @@ -1175,20 +1176,29 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb, struct dentry *dentry) { struct cifs_open_parms oparms; + struct inode *inode = NULL; + int rc; - oparms = CIFS_OPARMS(cifs_sb, tcon, name, - DELETE, FILE_OPEN, - CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, - ACL_NO_MODE); - int rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms, - NULL, &(int){SMB2_OP_DELETE}, 1, - NULL, NULL, NULL, dentry); + if (dentry) + inode = d_inode(dentry); + + oparms = CIFS_OPARMS(cifs_sb, tcon, name, DELETE, + FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE); + rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms, + NULL, &(int){SMB2_OP_UNLINK}, + 1, NULL, NULL, NULL, dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease"); rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms, - NULL, &(int){SMB2_OP_DELETE}, 1, - NULL, NULL, NULL, NULL); + NULL, &(int){SMB2_OP_UNLINK}, + 1, NULL, NULL, NULL, NULL); } + /* + * If dentry (hence, inode) is NULL, lease break is going to + * take care of degrading leases on handles for deleted files. + */ + if (!rc && inode) + cifs_mark_open_handles_for_deleted_file(inode, name); return rc; } @@ -1441,3 +1451,113 @@ int smb2_query_reparse_point(const unsigned int xid, cifs_free_open_info(&data); return rc; } + +static inline __le16 *utf16_smb2_path(struct cifs_sb_info *cifs_sb, + const char *name, size_t namelen) +{ + int len; + + if (*name == '\\' || + (cifs_sb_master_tlink(cifs_sb) && + cifs_sb_master_tcon(cifs_sb)->posix_extensions && *name == '/')) + name++; + return cifs_strndup_to_utf16(name, namelen, &len, + cifs_sb->local_nls, + cifs_remap(cifs_sb)); +} + +int smb2_rename_pending_delete(const char *full_path, + struct dentry *dentry, + const unsigned int xid) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(d_inode(dentry)->i_sb); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(dentry)); + __le16 *utf16_path __free(kfree) = NULL; + __u32 co = file_create_options(dentry); + int cmds[] = { + SMB2_OP_SET_INFO, + SMB2_OP_RENAME, + SMB2_OP_UNLINK, + }; + const int num_cmds = ARRAY_SIZE(cmds); + char *to_name __free(kfree) = NULL; + __u32 attrs = cinode->cifsAttrs; + struct cifs_open_parms oparms; + static atomic_t sillycounter; + struct cifsFileInfo *cfile; + struct tcon_link *tlink; + struct cifs_tcon *tcon; + struct kvec iov[2]; + const char *ppath; + void *page; + size_t len; + int rc; + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + + page = alloc_dentry_path(); + + ppath = build_path_from_dentry(dentry->d_parent, page); + if (IS_ERR(ppath)) { + rc = PTR_ERR(ppath); + goto out; + } + + len = strlen(ppath) + strlen("/.__smb1234") + 1; + to_name = kmalloc(len, GFP_KERNEL); + if (!to_name) { + rc = -ENOMEM; + goto out; + } + + scnprintf(to_name, len, "%s%c.__smb%04X", ppath, CIFS_DIR_SEP(cifs_sb), + atomic_inc_return(&sillycounter) & 0xffff); + + utf16_path = utf16_smb2_path(cifs_sb, to_name, len); + if (!utf16_path) { + rc = -ENOMEM; + goto out; + } + + drop_cached_dir_by_name(xid, tcon, full_path, cifs_sb); + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, + DELETE | FILE_WRITE_ATTRIBUTES, + FILE_OPEN, co, ACL_NO_MODE); + + attrs &= ~ATTR_READONLY; + if (!attrs) + attrs = ATTR_NORMAL; + if (d_inode(dentry)->i_nlink <= 1) + attrs |= ATTR_HIDDEN; + iov[0].iov_base = &(FILE_BASIC_INFO) { + .Attributes = cpu_to_le32(attrs), + }; + iov[0].iov_len = sizeof(FILE_BASIC_INFO); + iov[1].iov_base = utf16_path; + iov[1].iov_len = sizeof(*utf16_path) * UniStrlen((wchar_t *)utf16_path); + + cifs_get_writable_path(tcon, full_path, FIND_WR_WITH_DELETE, &cfile); + rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov, + cmds, num_cmds, cfile, NULL, NULL, dentry); + if (rc == -EINVAL) { + cifs_dbg(FYI, "invalid lease key, resending request without lease\n"); + cifs_get_writable_path(tcon, full_path, + FIND_WR_WITH_DELETE, &cfile); + rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov, + cmds, num_cmds, cfile, NULL, NULL, NULL); + } + if (!rc) { + set_bit(CIFS_INO_DELETE_PENDING, &cinode->flags); + } else { + cifs_tcon_dbg(FYI, "%s: failed to rename '%s' to '%s': %d\n", + __func__, full_path, to_name, rc); + rc = -EIO; + } +out: + cifs_put_tlink(tlink); + free_dentry_path(page); + return rc; +} diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 9f9955d274cf..e586f3f4b5c9 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5398,6 +5398,7 @@ struct smb_version_operations smb20_operations = { .llseek = smb3_llseek, .is_status_io_timeout = smb2_is_status_io_timeout, .is_network_name_deleted = smb2_is_network_name_deleted, + .rename_pending_delete = smb2_rename_pending_delete, }; #endif /* CIFS_ALLOW_INSECURE_LEGACY */ @@ -5503,6 +5504,7 @@ struct smb_version_operations smb21_operations = { .llseek = smb3_llseek, .is_status_io_timeout = smb2_is_status_io_timeout, .is_network_name_deleted = smb2_is_network_name_deleted, + .rename_pending_delete = smb2_rename_pending_delete, }; struct smb_version_operations smb30_operations = { @@ -5619,6 +5621,7 @@ struct smb_version_operations smb30_operations = { .llseek = smb3_llseek, .is_status_io_timeout = smb2_is_status_io_timeout, .is_network_name_deleted = smb2_is_network_name_deleted, + .rename_pending_delete = smb2_rename_pending_delete, }; struct smb_version_operations smb311_operations = { @@ -5735,6 +5738,7 @@ struct smb_version_operations smb311_operations = { .llseek = smb3_llseek, .is_status_io_timeout = smb2_is_status_io_timeout, .is_network_name_deleted = smb2_is_network_name_deleted, + .rename_pending_delete = smb2_rename_pending_delete, }; #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 6e805ece6a7b..b3f1398c9f79 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -317,5 +317,8 @@ int posix_info_sid_size(const void *beg, const void *end); int smb2_make_nfs_node(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev); +int smb2_rename_pending_delete(const char *full_path, + struct dentry *dentry, + const unsigned int xid); #endif /* _SMB2PROTO_H */ diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index fe0e075bc63c..fd650e2afc76 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -669,13 +669,12 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_info_compound_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(posix_query_info_compound_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(hardlink_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rename_enter); -DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rmdir_enter); +DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(unlink_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_eof_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_info_compound_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_reparse_compound_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_wsl_ea_compound_enter); -DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter); DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter); @@ -710,13 +709,12 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_info_compound_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(posix_query_info_compound_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(hardlink_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rename_done); -DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rmdir_done); +DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(unlink_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_eof_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_info_compound_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_reparse_compound_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(get_reparse_compound_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done); -DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done); DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done); @@ -756,14 +754,13 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_info_compound_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(posix_query_info_compound_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(hardlink_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rename_err); -DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rmdir_err); +DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(unlink_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_eof_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_info_compound_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_reparse_compound_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(get_reparse_compound_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err); -DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err); DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err); From 686cab5a18e443e1d5f2abb17bed45837836425f Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 7 Sep 2025 11:08:21 +0300 Subject: [PATCH 1265/1307] net: dev_ioctl: take ops lock in hwtstamp lower paths ndo hwtstamp callbacks are expected to run under the per-device ops lock. Make the lower get/set paths consistent with the rest of ndo invocations. Kernel log: WARNING: CPU: 13 PID: 51364 at ./include/net/netdev_lock.h:70 __netdev_update_features+0x4bd/0xe60 ... RIP: 0010:__netdev_update_features+0x4bd/0xe60 ... Call Trace: netdev_update_features+0x1f/0x60 mlx5_hwtstamp_set+0x181/0x290 [mlx5_core] mlx5e_hwtstamp_set+0x19/0x30 [mlx5_core] dev_set_hwtstamp_phylib+0x9f/0x220 dev_set_hwtstamp_phylib+0x9f/0x220 dev_set_hwtstamp+0x13d/0x240 dev_ioctl+0x12f/0x4b0 sock_ioctl+0x171/0x370 __x64_sys_ioctl+0x3f7/0x900 ? __sys_setsockopt+0x69/0xb0 do_syscall_64+0x6f/0x2e0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 ... .... ---[ end trace 0000000000000000 ]--- Note that the mlx5_hwtstamp_set and mlx5e_hwtstamp_set functions shown in the trace come from an in progress patch converting the legacy ioctl to ndo_hwtstamp_get/set and are not present in mainline. Fixes: ffb7ed19ac0a ("net: hold netdev instance lock during ioctl operations") Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Link: https://patch.msgid.link/20250907080821.2353388-1-cjubran@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/dev_ioctl.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 9c0ad7f4b5d8..ad54b12d4b4c 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -464,8 +464,15 @@ int generic_hwtstamp_get_lower(struct net_device *dev, if (!netif_device_present(dev)) return -ENODEV; - if (ops->ndo_hwtstamp_get) - return dev_get_hwtstamp_phylib(dev, kernel_cfg); + if (ops->ndo_hwtstamp_get) { + int err; + + netdev_lock_ops(dev); + err = dev_get_hwtstamp_phylib(dev, kernel_cfg); + netdev_unlock_ops(dev); + + return err; + } /* Legacy path: unconverted lower driver */ return generic_hwtstamp_ioctl_lower(dev, SIOCGHWTSTAMP, kernel_cfg); @@ -481,8 +488,15 @@ int generic_hwtstamp_set_lower(struct net_device *dev, if (!netif_device_present(dev)) return -ENODEV; - if (ops->ndo_hwtstamp_set) - return dev_set_hwtstamp_phylib(dev, kernel_cfg, extack); + if (ops->ndo_hwtstamp_set) { + int err; + + netdev_lock_ops(dev); + err = dev_set_hwtstamp_phylib(dev, kernel_cfg, extack); + netdev_unlock_ops(dev); + + return err; + } /* Legacy path: unconverted lower driver */ return generic_hwtstamp_ioctl_lower(dev, SIOCSHWTSTAMP, kernel_cfg); From 0f82c3ba66c6b2e3cde0f255156a753b108ee9dc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 8 Sep 2025 10:36:14 -0700 Subject: [PATCH 1266/1307] macsec: sync features on RTM_NEWLINK Syzkaller managed to lock the lower device via ETHTOOL_SFEATURES: netdev_lock include/linux/netdevice.h:2761 [inline] netdev_lock_ops include/net/netdev_lock.h:42 [inline] netdev_sync_lower_features net/core/dev.c:10649 [inline] __netdev_update_features+0xcb1/0x1be0 net/core/dev.c:10819 netdev_update_features+0x6d/0xe0 net/core/dev.c:10876 macsec_notify+0x2f5/0x660 drivers/net/macsec.c:4533 notifier_call_chain+0x1b3/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] netdev_features_change+0x85/0xc0 net/core/dev.c:1570 __dev_ethtool net/ethtool/ioctl.c:3469 [inline] dev_ethtool+0x1536/0x19b0 net/ethtool/ioctl.c:3502 dev_ioctl+0x392/0x1150 net/core/dev_ioctl.c:759 It happens because lower features are out of sync with the upper: __dev_ethtool (real_dev) netdev_lock_ops(real_dev) ETHTOOL_SFEATURES __netdev_features_change netdev_sync_upper_features disable LRO on the lower if (old_features != dev->features) netdev_features_change fires NETDEV_FEAT_CHANGE macsec_notify NETDEV_FEAT_CHANGE netdev_update_features (for each macsec dev) netdev_sync_lower_features if (upper_features != lower_features) netdev_lock_ops(lower) # lower == real_dev stuck ... netdev_unlock_ops(real_dev) Per commit af5f54b0ef9e ("net: Lock lower level devices when updating features"), we elide the lock/unlock when the upper and lower features are synced. Makes sure the lower (real_dev) has proper features after the macsec link has been created. This makes sure we never hit the situation where we need to sync upper flags to the lower. Reported-by: syzbot+7e0f89fb6cae5d002de0@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7e0f89fb6cae5d002de0 Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Signed-off-by: Stanislav Fomichev Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250908173614.3358264-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 01329fe7451a..0eca96eeed58 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -4286,6 +4286,7 @@ static int macsec_newlink(struct net_device *dev, if (err < 0) goto del_dev; + netdev_update_features(dev); netif_stacked_transfer_operstate(real_dev, dev); linkwatch_fire_event(dev); From 648de37416b301f046f62f1b65715c7fa8ebaa67 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Mon, 8 Sep 2025 11:16:01 -0700 Subject: [PATCH 1267/1307] mptcp: sockopt: make sync_socket_options propagate SOCK_KEEPOPEN Users reported a scenario where MPTCP connections that were configured with SO_KEEPALIVE prior to connect would fail to enable their keepalives if MTPCP fell back to TCP mode. After investigating, this affects keepalives for any connection where sync_socket_options is called on a socket that is in the closed or listening state. Joins are handled properly. For connects, sync_socket_options is called when the socket is still in the closed state. The tcp_set_keepalive() function does not act on sockets that are closed or listening, hence keepalive is not immediately enabled. Since the SO_KEEPOPEN flag is absent, it is not enabled later in the connect sequence via tcp_finish_connect. Setting the keepalive via sockopt after connect does work, but would not address any subsequently created flows. Fortunately, the fix here is straight-forward: set SOCK_KEEPOPEN on the subflow when calling sync_socket_options. The fix was valdidated both by using tcpdump to observe keepalive packets not being sent before the fix, and being sent after the fix. It was also possible to observe via ss that the keepalive timer was not enabled on these sockets before the fix, but was enabled afterwards. Fixes: 1b3e7ede1365 ("mptcp: setsockopt: handle SO_KEEPALIVE and SO_PRIORITY") Cc: stable@vger.kernel.org Signed-off-by: Krister Johansen Reviewed-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/aL8dYfPZrwedCIh9@templeofstupid.com Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 2c267aff95be..2abe6f1e9940 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1532,13 +1532,12 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) { static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; struct sock *sk = (struct sock *)msk; + bool keep_open; - if (ssk->sk_prot->keepalive) { - if (sock_flag(sk, SOCK_KEEPOPEN)) - ssk->sk_prot->keepalive(ssk, 1); - else - ssk->sk_prot->keepalive(ssk, 0); - } + keep_open = sock_flag(sk, SOCK_KEEPOPEN); + if (ssk->sk_prot->keepalive) + ssk->sk_prot->keepalive(ssk, keep_open); + sock_valbool_flag(ssk, SOCK_KEEPOPEN, keep_open); ssk->sk_priority = sk->sk_priority; ssk->sk_bound_dev_if = sk->sk_bound_dev_if; From 7094b84863e5832cb1cd9c4b9d648904775b6bd9 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 8 Sep 2025 23:27:27 +0200 Subject: [PATCH 1268/1307] netlink: specs: mptcp: fix if-idx attribute type This attribute is used as a signed number in the code in pm_netlink.c: nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) The specs should then reflect that. Note that other 'if-idx' attributes from the same .yaml file use a signed number as well. Fixes: bc8aeb2045e2 ("Documentation: netlink: add a YAML spec for mptcp") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250908-net-mptcp-misc-fixes-6-17-rc5-v1-1-5f2168a66079@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/mptcp_pm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index 02f1ddcfbf1c..d15335684ec3 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -256,7 +256,7 @@ attribute-sets: type: u32 - name: if-idx - type: u32 + type: s32 - name: reset-reason type: u32 From 6f021e95d0828edc8ed104a294594c2f9569383a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 8 Sep 2025 23:27:28 +0200 Subject: [PATCH 1269/1307] doc: mptcp: net.mptcp.pm_type is deprecated The net.mptcp.pm_type sysctl knob has been deprecated in v6.15, net.mptcp.path_manager should be used instead. Adapt the section about path managers to suggest using the new sysctl knob instead of the deprecated one. Fixes: 595c26d122d1 ("mptcp: sysctl: set path manager by name") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250908-net-mptcp-misc-fixes-6-17-rc5-v1-2-5f2168a66079@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/mptcp.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/mptcp.rst b/Documentation/networking/mptcp.rst index 17f2bab61164..2e31038d6462 100644 --- a/Documentation/networking/mptcp.rst +++ b/Documentation/networking/mptcp.rst @@ -60,10 +60,10 @@ address announcements. Typically, it is the client side that initiates subflows, and the server side that announces additional addresses via the ``ADD_ADDR`` and ``REMOVE_ADDR`` options. -Path managers are controlled by the ``net.mptcp.pm_type`` sysctl knob -- see -mptcp-sysctl.rst. There are two types: the in-kernel one (type ``0``) where the -same rules are applied for all the connections (see: ``ip mptcp``) ; and the -userspace one (type ``1``), controlled by a userspace daemon (i.e. `mptcpd +Path managers are controlled by the ``net.mptcp.path_manager`` sysctl knob -- +see mptcp-sysctl.rst. There are two types: the in-kernel one (``kernel``) where +the same rules are applied for all the connections (see: ``ip mptcp``) ; and the +userspace one (``userspace``), controlled by a userspace daemon (i.e. `mptcpd `_) where different rules can be applied for each connection. The path managers can be controlled via a Netlink API; see netlink_spec/mptcp_pm.rst. From ef1bd93b3b924086088b7818d9e5d89ede944f1f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 8 Sep 2025 23:27:29 +0200 Subject: [PATCH 1270/1307] selftests: mptcp: shellcheck: support v0.11.0 This v0.11.0 version introduces SC2329: Warn when (non-escaping) functions are never invoked. Except that, similar to SC2317, ShellCheck is currently unable to figure out functions that are invoked via trap, or indirectly, when calling functions via variables. It is then needed to disable this new SC2329. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250908-net-mptcp-misc-fixes-6-17-rc5-v1-3-5f2168a66079@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 2 +- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 2 +- tools/testing/selftests/net/mptcp/pm_netlink.sh | 5 +++-- tools/testing/selftests/net/mptcp/simult_flows.sh | 2 +- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 7a3cb4c09e45..d847ff1737c3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -28,7 +28,7 @@ flush_pids() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 5e3c56253274..c2ab9f7f0d21 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -134,7 +134,7 @@ ns4="" TEST_GROUP="" # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cin_disconnect" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 82cae37d9c20..7fd555b123b9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,7 +8,7 @@ # ShellCheck incorrectly believes that most of the code here is unreachable # because it's invoked by variable name, see how the "tests" array is used -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 . "$(dirname "${0}")/mptcp_lib.sh" diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 418a903c3a4d..f01989be6e9b 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -95,7 +95,7 @@ init() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index ac7ec6f94023..ec6a87588191 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -32,7 +32,7 @@ ns1="" err=$(mktemp) # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "${err}" @@ -70,8 +70,9 @@ format_endpoints() { mptcp_lib_pm_nl_format_endpoints "${@}" } +# This function is invoked indirectly +#shellcheck disable=SC2317,SC2329 get_endpoint() { - # shellcheck disable=SC2317 # invoked indirectly mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" } diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 2329c2f8519b..1903e8e84a31 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -35,7 +35,7 @@ usage() { } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cout" "$sout" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 333064b0b5ac..970c329735ff 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -94,7 +94,7 @@ test_fail() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { print_title "Cleanup" From 181993bb0d626cf88cc803f4356ce5c5abe86278 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Wed, 10 Sep 2025 13:33:40 +0800 Subject: [PATCH 1271/1307] erofs: fix runtime warning on truncate_folio_batch_exceptionals() Commit 0e2f80afcfa6("fs/dax: ensure all pages are idle prior to filesystem unmount") introduced the WARN_ON_ONCE to capture whether the filesystem has removed all DAX entries or not and applied the fix to xfs and ext4. Apply the missed fix on erofs to fix the runtime warning: [ 5.266254] ------------[ cut here ]------------ [ 5.266274] WARNING: CPU: 6 PID: 3109 at mm/truncate.c:89 truncate_folio_batch_exceptionals+0xff/0x260 [ 5.266294] Modules linked in: [ 5.266999] CPU: 6 UID: 0 PID: 3109 Comm: umount Tainted: G S 6.16.0+ #6 PREEMPT(voluntary) [ 5.267012] Tainted: [S]=CPU_OUT_OF_SPEC [ 5.267017] Hardware name: Dell Inc. OptiPlex 5000/05WXFV, BIOS 1.5.1 08/24/2022 [ 5.267024] RIP: 0010:truncate_folio_batch_exceptionals+0xff/0x260 [ 5.267076] Code: 00 00 41 39 df 7f 11 eb 78 83 c3 01 49 83 c4 08 41 39 df 74 6c 48 63 f3 48 83 fe 1f 0f 83 3c 01 00 00 43 f6 44 26 08 01 74 df <0f> 0b 4a 8b 34 22 4c 89 ef 48 89 55 90 e8 ff 54 1f 00 48 8b 55 90 [ 5.267083] RSP: 0018:ffffc900013f36c8 EFLAGS: 00010202 [ 5.267095] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 5.267101] RDX: ffffc900013f3790 RSI: 0000000000000000 RDI: ffff8882a1407898 [ 5.267108] RBP: ffffc900013f3740 R08: 0000000000000000 R09: 0000000000000000 [ 5.267113] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 5.267119] R13: ffff8882a1407ab8 R14: ffffc900013f3888 R15: 0000000000000001 [ 5.267125] FS: 00007aaa8b437800(0000) GS:ffff88850025b000(0000) knlGS:0000000000000000 [ 5.267132] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5.267138] CR2: 00007aaa8b3aac10 CR3: 000000024f764000 CR4: 0000000000f52ef0 [ 5.267144] PKRU: 55555554 [ 5.267150] Call Trace: [ 5.267154] [ 5.267181] truncate_inode_pages_range+0x118/0x5e0 [ 5.267193] ? save_trace+0x54/0x390 [ 5.267296] truncate_inode_pages_final+0x43/0x60 [ 5.267309] evict+0x2a4/0x2c0 [ 5.267339] dispose_list+0x39/0x80 [ 5.267352] evict_inodes+0x150/0x1b0 [ 5.267376] generic_shutdown_super+0x41/0x180 [ 5.267390] kill_block_super+0x1b/0x50 [ 5.267402] erofs_kill_sb+0x81/0x90 [erofs] [ 5.267436] deactivate_locked_super+0x32/0xb0 [ 5.267450] deactivate_super+0x46/0x60 [ 5.267460] cleanup_mnt+0xc3/0x170 [ 5.267475] __cleanup_mnt+0x12/0x20 [ 5.267485] task_work_run+0x5d/0xb0 [ 5.267499] exit_to_user_mode_loop+0x144/0x170 [ 5.267512] do_syscall_64+0x2b9/0x7c0 [ 5.267523] ? __lock_acquire+0x665/0x2ce0 [ 5.267535] ? __lock_acquire+0x665/0x2ce0 [ 5.267560] ? lock_acquire+0xcd/0x300 [ 5.267573] ? find_held_lock+0x31/0x90 [ 5.267582] ? mntput_no_expire+0x97/0x4e0 [ 5.267606] ? mntput_no_expire+0xa1/0x4e0 [ 5.267625] ? mntput+0x24/0x50 [ 5.267634] ? path_put+0x1e/0x30 [ 5.267647] ? do_faccessat+0x120/0x2f0 [ 5.267677] ? do_syscall_64+0x1a2/0x7c0 [ 5.267686] ? from_kgid_munged+0x17/0x30 [ 5.267703] ? from_kuid_munged+0x13/0x30 [ 5.267711] ? __do_sys_getuid+0x3d/0x50 [ 5.267724] ? do_syscall_64+0x1a2/0x7c0 [ 5.267732] ? irqentry_exit+0x77/0xb0 [ 5.267743] ? clear_bhb_loop+0x30/0x80 [ 5.267752] ? clear_bhb_loop+0x30/0x80 [ 5.267765] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 5.267772] RIP: 0033:0x7aaa8b32a9fb [ 5.267781] Code: c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 f3 0f 1e fa 31 f6 e9 05 00 00 00 0f 1f 44 00 00 f3 0f 1e fa b8 a6 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 05 c3 0f 1f 40 00 48 8b 15 e9 83 0d 00 f7 d8 [ 5.267787] RSP: 002b:00007ffd7c4c9468 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 5.267796] RAX: 0000000000000000 RBX: 00005a61592a8b00 RCX: 00007aaa8b32a9fb [ 5.267802] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00005a61592b2080 [ 5.267806] RBP: 00007ffd7c4c9540 R08: 00007aaa8b403b20 R09: 0000000000000020 [ 5.267812] R10: 0000000000000001 R11: 0000000000000246 R12: 00005a61592a8c00 [ 5.267817] R13: 0000000000000000 R14: 00005a61592b2080 R15: 00005a61592a8f10 [ 5.267849] [ 5.267854] irq event stamp: 4721 [ 5.267859] hardirqs last enabled at (4727): [] __up_console_sem+0x90/0xa0 [ 5.267873] hardirqs last disabled at (4732): [] __up_console_sem+0x75/0xa0 [ 5.267884] softirqs last enabled at (3044): [] kernel_fpu_end+0x53/0x70 [ 5.267895] softirqs last disabled at (3042): [] kernel_fpu_begin_mask+0xc4/0x120 [ 5.267905] ---[ end trace 0000000000000000 ]--- Fixes: bde708f1a65d ("fs/dax: always remove DAX page-cache entries when breaking layouts") Signed-off-by: Yuezhang Mo Reviewed-by: Friendy Su Reviewed-by: Daniel Palmer Reviewed-by: Gao Xiang Signed-off-by: Gao Xiang --- fs/erofs/super.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 1b529ace4db0..db13b40a78e0 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -1018,10 +1018,22 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) return 0; } +static void erofs_evict_inode(struct inode *inode) +{ +#ifdef CONFIG_FS_DAX + if (IS_DAX(inode)) + dax_break_layout_final(inode); +#endif + + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); +} + const struct super_operations erofs_sops = { .put_super = erofs_put_super, .alloc_inode = erofs_alloc_inode, .free_inode = erofs_free_inode, + .evict_inode = erofs_evict_inode, .statfs = erofs_statfs, .show_options = erofs_show_options, }; From 8cc71fc3b82b51e155fbe20876b1aa17a315ac4c Mon Sep 17 00:00:00 2001 From: Nithyanantham Paramasivam Date: Fri, 5 Sep 2025 18:18:00 +0530 Subject: [PATCH 1272/1307] wifi: cfg80211: Fix "no buffer space available" error in nl80211_get_station() for MLO Currently, nl80211_get_station() allocates a fixed buffer size using NLMSG_DEFAULT_SIZE. In multi-link scenarios - particularly when the number of links exceeds two - this buffer size is often insufficient to accommodate complete station statistics, resulting in "no buffer space available" errors. To address this, modify nl80211_get_station() to return only accumulated station statistics and exclude per link stats. Pass a new flag (link_stats) to nl80211_send_station() to control the inclusion of per link statistics. This allows retaining detailed output with per link data in dump commands, while excluding it from other commands where it is not needed. This change modifies the handling of per link stats introduced in commit 82d7f841d9bd ("wifi: cfg80211: extend to embed link level statistics in NL message") to enable them only for nl80211_dump_station(). Apply the same fix to cfg80211_del_sta_sinfo() by skipping per link stats to avoid buffer issues. cfg80211_new_sta() doesn't include stats and is therefore not impacted. Fixes: 82d7f841d9bd ("wifi: cfg80211: extend to embed link level statistics in NL message") Signed-off-by: Nithyanantham Paramasivam Link: https://patch.msgid.link/20250905124800.1448493-1-nithyanantham.paramasivam@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 89519aa52893..f2f7424e930c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7062,7 +7062,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, - const u8 *mac_addr, struct station_info *sinfo) + const u8 *mac_addr, struct station_info *sinfo, + bool link_stats) { void *hdr; struct nlattr *sinfoattr, *bss_param; @@ -7283,7 +7284,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, goto nla_put_failure; } - if (sinfo->valid_links) { + if (link_stats && sinfo->valid_links) { links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!links) goto nla_put_failure; @@ -7574,7 +7575,7 @@ static int nl80211_dump_station(struct sk_buff *skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev->netdev, mac_addr, - &sinfo) < 0) + &sinfo, true) < 0) goto out; sta_idx++; @@ -7635,7 +7636,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, info->snd_portid, info->snd_seq, 0, - rdev, dev, mac_addr, &sinfo) < 0) { + rdev, dev, mac_addr, &sinfo, false) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -19680,7 +19681,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, return; if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0, - rdev, dev, mac_addr, sinfo) < 0) { + rdev, dev, mac_addr, sinfo, false) < 0) { nlmsg_free(msg); return; } @@ -19710,7 +19711,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, } if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0, - rdev, dev, mac_addr, sinfo) < 0) { + rdev, dev, mac_addr, sinfo, false) < 0) { nlmsg_free(msg); return; } From e0423541477dfb684fbc6e6b5386054bc650f264 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Sep 2025 15:44:45 +0200 Subject: [PATCH 1273/1307] PM: EM: Add function for registering a PD without capacity update The intel_pstate driver manages CPU capacity changes itself and it does not need an update of the capacity of all CPUs in the system to be carried out after registering a PD. Moreover, in some configurations (for instance, an SMT-capable hybrid x86 system booted with nosmt in the kernel command line) the em_check_capacity_update() call at the end of em_dev_register_perf_domain() always fails and reschedules itself to run once again in 1 s, so effectively it runs in vain every 1 s forever. To address this, introduce a new variant of em_dev_register_perf_domain(), called em_dev_register_pd_no_update(), that does not invoke em_check_capacity_update(), and make intel_pstate use it instead of the original. Fixes: 7b010f9b9061 ("cpufreq: intel_pstate: EAS support for hybrid platforms") Closes: https://lore.kernel.org/linux-pm/40212796-734c-4140-8a85-854f72b8144d@panix.com/ Reported-by: Kenneth R. Crudup Tested-by: Kenneth R. Crudup Cc: 6.16+ # 6.16+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++-- include/linux/energy_model.h | 10 ++++++++++ kernel/power/energy_model.c | 29 +++++++++++++++++++++++++---- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f366d35c5840..0d5d283a5429 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1034,8 +1034,8 @@ static bool hybrid_register_perf_domain(unsigned int cpu) if (!cpu_dev) return false; - if (em_dev_register_perf_domain(cpu_dev, HYBRID_EM_STATE_COUNT, &cb, - cpumask_of(cpu), false)) + if (em_dev_register_pd_no_update(cpu_dev, HYBRID_EM_STATE_COUNT, &cb, + cpumask_of(cpu), false)) return false; cpudata->pd_registered = true; diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 7fa1eb3cc823..61d50571ad88 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -171,6 +171,9 @@ int em_dev_update_perf_domain(struct device *dev, int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, const struct em_data_callback *cb, const cpumask_t *cpus, bool microwatts); +int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states, + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); struct em_perf_table *em_table_alloc(struct em_perf_domain *pd); void em_table_free(struct em_perf_table *table); @@ -350,6 +353,13 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, { return -EINVAL; } +static inline +int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states, + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) +{ + return -EINVAL; +} static inline void em_dev_unregister_perf_domain(struct device *dev) { } diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index ea7995a25780..8df55397414a 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -552,6 +552,30 @@ EXPORT_SYMBOL_GPL(em_cpu_get); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, const struct em_data_callback *cb, const cpumask_t *cpus, bool microwatts) +{ + int ret = em_dev_register_pd_no_update(dev, nr_states, cb, cpus, microwatts); + + if (_is_cpu_device(dev)) + em_check_capacity_update(); + + return ret; +} +EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); + +/** + * em_dev_register_pd_no_update() - Register a perf domain for a device + * @dev : Device to register the PD for + * @nr_states : Number of performance states in the new PD + * @cb : Callback functions for populating the energy model + * @cpus : CPUs to include in the new PD (mandatory if @dev is a CPU device) + * @microwatts : Whether or not the power values in the EM will be in uW + * + * Like em_dev_register_perf_domain(), but does not trigger a CPU capacity + * update after registering the PD, even if @dev is a CPU device. + */ +int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states, + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) { struct em_perf_table *em_table; unsigned long cap, prev_cap = 0; @@ -636,12 +660,9 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, unlock: mutex_unlock(&em_pd_mutex); - if (_is_cpu_device(dev)) - em_check_capacity_update(); - return ret; } -EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); +EXPORT_SYMBOL_GPL(em_dev_register_pd_no_update); /** * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device From 4de37a48b6b58faaded9eb765047cf0d8785ea18 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 29 Aug 2025 11:03:44 +0200 Subject: [PATCH 1274/1307] drm/mediatek: fix potential OF node use-after-free The for_each_child_of_node() helper drops the reference it takes to each node as it iterates over children and an explicit of_node_put() is only needed when exiting the loop early. Drop the recently introduced bogus additional reference count decrement at each iteration that could potentially lead to a use-after-free. Fixes: 1f403699c40f ("drm/mediatek: Fix device/node reference count leaks in mtk_drm_get_all_drm_priv") Cc: Ma Ke Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250829090345.21075-2-johan@kernel.org/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index f8a817689e16..76719eb5db09 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -387,11 +387,11 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) of_id = of_match_node(mtk_drm_of_ids, node); if (!of_id) - goto next_put_node; + continue; pdev = of_find_device_by_node(node); if (!pdev) - goto next_put_node; + continue; drm_dev = device_find_child(&pdev->dev, NULL, mtk_drm_match); if (!drm_dev) @@ -417,11 +417,10 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) next_put_device_pdev_dev: put_device(&pdev->dev); -next_put_node: - of_node_put(node); - - if (cnt == MAX_CRTC) + if (cnt == MAX_CRTC) { + of_node_put(node); break; + } } if (drm_priv->data->mmsys_dev_num == cnt) { From 9ba2556cef1df746fad4d691c8290e235b23c7d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 29 Aug 2025 11:03:45 +0200 Subject: [PATCH 1275/1307] drm/mediatek: clean up driver data initialisation The platform and drm devices are only used to look up the drm device and its driver data respectively when initialising the driver data during bind(). Drop the reference counts as soon as they have been used to make the code more readable. Note that the crtc count is never incremented on lookup failures. Signed-off-by: Johan Hovold Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250829090345.21075-3-johan@kernel.org/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 76719eb5db09..eb5537f0ac90 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -394,12 +394,14 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) continue; drm_dev = device_find_child(&pdev->dev, NULL, mtk_drm_match); + put_device(&pdev->dev); if (!drm_dev) - goto next_put_device_pdev_dev; + continue; temp_drm_priv = dev_get_drvdata(drm_dev); + put_device(drm_dev); if (!temp_drm_priv) - goto next_put_device_drm_dev; + continue; if (temp_drm_priv->data->main_len) all_drm_priv[CRTC_MAIN] = temp_drm_priv; @@ -411,12 +413,6 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) if (temp_drm_priv->mtk_drm_bound) cnt++; -next_put_device_drm_dev: - put_device(drm_dev); - -next_put_device_pdev_dev: - put_device(&pdev->dev); - if (cnt == MAX_CRTC) { of_node_put(node); break; From 9c600589e14f5fc01b8be9a5d0ad1f094b8b304b Mon Sep 17 00:00:00 2001 From: James Guan Date: Wed, 10 Sep 2025 19:19:29 +0800 Subject: [PATCH 1276/1307] wifi: virt_wifi: Fix page fault on connect This patch prevents page fault in __cfg80211_connect_result()[1] when connecting a virt_wifi device, while ensuring that virt_wifi can connect properly. [1] https://lore.kernel.org/linux-wireless/20250909063213.1055024-1-guan_yufei@163.com/ Closes: https://lore.kernel.org/linux-wireless/20250909063213.1055024-1-guan_yufei@163.com/ Signed-off-by: James Guan Link: https://patch.msgid.link/20250910111929.137049-1-guan_yufei@163.com [remove irrelevant network-manager instructions] Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/virt_wifi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c index 1fffeff2190c..4eae89376feb 100644 --- a/drivers/net/wireless/virtual/virt_wifi.c +++ b/drivers/net/wireless/virtual/virt_wifi.c @@ -277,7 +277,9 @@ static void virt_wifi_connect_complete(struct work_struct *work) priv->is_connected = true; /* Schedules an event that acquires the rtnl lock. */ - cfg80211_connect_result(priv->upperdev, requested_bss, NULL, 0, NULL, 0, + cfg80211_connect_result(priv->upperdev, + priv->is_connected ? fake_router_bssid : NULL, + NULL, 0, NULL, 0, status, GFP_KERNEL); netif_carrier_on(priv->upperdev); } From 87b90cee22d8658a69c0fbd43633839b75f8f05f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 2 Sep 2025 21:02:22 +0200 Subject: [PATCH 1277/1307] MAINTAINERS: drm-misc: fix X: entries for nova/nouveau Nouveau patches usually flow through the drm-misc tree, while nova (and nova-core) are maintained through a dedicated driver tree and soon through drm-rust. Hence, fix up the corresponding X: entries to list nova instead of nouveau. Reported-by: Maxime Ripard Closes: https://lore.kernel.org/dri-devel/enuksb2qk5wyrilz3l2vnog45lghgmplrav5to6pd5k5owi36h@pxdq6y5dpgpt/ Acked-by: Maxime Ripard Link: https://lore.kernel.org/r/20250902190247.435340-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f94e115a8d32..ed7e06ca1d05 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8091,7 +8091,7 @@ X: drivers/gpu/drm/i915/ X: drivers/gpu/drm/kmb/ X: drivers/gpu/drm/mediatek/ X: drivers/gpu/drm/msm/ -X: drivers/gpu/drm/nouveau/ +X: drivers/gpu/drm/nova/ X: drivers/gpu/drm/radeon/ X: drivers/gpu/drm/tegra/ X: drivers/gpu/drm/xe/ From a3967baad4d533dc254c31e0d221e51c8d223d58 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 9 Sep 2025 23:26:12 +0000 Subject: [PATCH 1278/1307] tcp_bpf: Call sk_msg_free() when tcp_bpf_send_verdict() fails to allocate psock->cork. syzbot reported the splat below. [0] The repro does the following: 1. Load a sk_msg prog that calls bpf_msg_cork_bytes(msg, cork_bytes) 2. Attach the prog to a SOCKMAP 3. Add a socket to the SOCKMAP 4. Activate fault injection 5. Send data less than cork_bytes At 5., the data is carried over to the next sendmsg() as it is smaller than the cork_bytes specified by bpf_msg_cork_bytes(). Then, tcp_bpf_send_verdict() tries to allocate psock->cork to hold the data, but this fails silently due to fault injection + __GFP_NOWARN. If the allocation fails, we need to revert the sk->sk_forward_alloc change done by sk_msg_alloc(). Let's call sk_msg_free() when tcp_bpf_send_verdict fails to allocate psock->cork. The "*copied" also needs to be updated such that a proper error can be returned to the caller, sendmsg. It fails to allocate psock->cork. Nothing has been corked so far, so this patch simply sets "*copied" to 0. [0]: WARNING: net/ipv4/af_inet.c:156 at inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156, CPU#1: syz-executor/5983 Modules linked in: CPU: 1 UID: 0 PID: 5983 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:inet_sock_destruct+0x623/0x730 net/ipv4/af_inet.c:156 Code: 0f 0b 90 e9 62 fe ff ff e8 7a db b5 f7 90 0f 0b 90 e9 95 fe ff ff e8 6c db b5 f7 90 0f 0b 90 e9 bb fe ff ff e8 5e db b5 f7 90 <0f> 0b 90 e9 e1 fe ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 9f fc RSP: 0018:ffffc90000a08b48 EFLAGS: 00010246 RAX: ffffffff8a09d0b2 RBX: dffffc0000000000 RCX: ffff888024a23c80 RDX: 0000000000000100 RSI: 0000000000000fff RDI: 0000000000000000 RBP: 0000000000000fff R08: ffff88807e07c627 R09: 1ffff1100fc0f8c4 R10: dffffc0000000000 R11: ffffed100fc0f8c5 R12: ffff88807e07c380 R13: dffffc0000000000 R14: ffff88807e07c60c R15: 1ffff1100fc0f872 FS: 00005555604c4500(0000) GS:ffff888125af1000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005555604df5c8 CR3: 0000000032b06000 CR4: 00000000003526f0 Call Trace: __sk_destruct+0x86/0x660 net/core/sock.c:2339 rcu_do_batch kernel/rcu/tree.c:2605 [inline] rcu_core+0xca8/0x1770 kernel/rcu/tree.c:2861 handle_softirqs+0x286/0x870 kernel/softirq.c:579 __do_softirq kernel/softirq.c:613 [inline] invoke_softirq kernel/softirq.c:453 [inline] __irq_exit_rcu+0xca/0x1f0 kernel/softirq.c:680 irq_exit_rcu+0x9/0x30 kernel/softirq.c:696 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1052 [inline] sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1052 Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") Reported-by: syzbot+4cabd1d2fa917a456db8@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68c0b6b5.050a0220.3c6139.0013.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250909232623.4151337-1-kuniyu@google.com --- net/ipv4/tcp_bpf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index ba581785adb4..a268e1595b22 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -408,8 +408,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, if (!psock->cork) { psock->cork = kzalloc(sizeof(*psock->cork), GFP_ATOMIC | __GFP_NOWARN); - if (!psock->cork) + if (!psock->cork) { + sk_msg_free(sk, msg); + *copied = 0; return -ENOMEM; + } } memcpy(psock->cork, msg, sizeof(*msg)); return 0; From d013ebc3499fd87cb9dee1dafd0c58aeb05c27c1 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 10 Sep 2025 16:56:06 +0200 Subject: [PATCH 1279/1307] selftests: can: enable CONFIG_CAN_VCAN as a module A proper kernel configuration for running kselftest can be obtained with: $ yes | make kselftest-merge Build of 'vcan' driver is currently missing, while the other required knobs are already there because of net/link_netns.py [1]. Add a config file in selftests/net/can to store the minimum set of kconfig needed for CAN selftests. [1] https://patch.msgid.link/20250219125039.18024-14-shaw.leon@gmail.com Fixes: 77442ffa83e8 ("selftests: can: Import tst-filter from can-tests") Reviewed-by: Vincent Mailhol Signed-off-by: Davide Caratti Link: https://patch.msgid.link/fa4c0ea262ec529f25e5f5aa9269d84764c67321.1757516009.git.dcaratti@redhat.com Signed-off-by: Marc Kleine-Budde --- tools/testing/selftests/net/can/config | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/testing/selftests/net/can/config diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config new file mode 100644 index 000000000000..188f79796670 --- /dev/null +++ b/tools/testing/selftests/net/can/config @@ -0,0 +1,3 @@ +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VCAN=m From 7fcbe5b2c6a4b5407bf2241fdb71e0a390f6ab9a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 25 Aug 2025 23:07:24 +0900 Subject: [PATCH 1280/1307] can: j1939: implement NETDEV_UNREGISTER notification handler syzbot is reporting unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 problem, for j1939 protocol did not have NETDEV_UNREGISTER notification handler for undoing changes made by j1939_sk_bind(). Commit 25fe97cb7620 ("can: j1939: move j1939_priv_put() into sk_destruct callback") expects that a call to j1939_priv_put() can be unconditionally delayed until j1939_sk_sock_destruct() is called. But we need to call j1939_priv_put() against an extra ref held by j1939_sk_bind() call (as a part of undoing changes made by j1939_sk_bind()) as soon as NETDEV_UNREGISTER notification fires (i.e. before j1939_sk_sock_destruct() is called via j1939_sk_release()). Otherwise, the extra ref on "struct j1939_priv" held by j1939_sk_bind() call prevents "struct net_device" from dropping the usage count to 1; making it impossible for unregister_netdevice() to continue. Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 Tested-by: syzbot Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Fixes: 25fe97cb7620 ("can: j1939: move j1939_priv_put() into sk_destruct callback") Signed-off-by: Tetsuo Handa Tested-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://patch.msgid.link/ac9db9a4-6c30-416e-8b94-96e6559d55b2@I-love.SAKURA.ne.jp [mkl: remove space in front of label] Signed-off-by: Marc Kleine-Budde --- net/can/j1939/j1939-priv.h | 1 + net/can/j1939/main.c | 3 +++ net/can/j1939/socket.c | 49 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 31a93cae5111..81f58924b4ac 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -212,6 +212,7 @@ void j1939_priv_get(struct j1939_priv *priv); /* notify/alert all j1939 sockets bound to ifindex */ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv); +void j1939_sk_netdev_event_unregister(struct j1939_priv *priv); int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk); void j1939_tp_init(struct j1939_priv *priv); diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 7e8a20f2fc42..3706a872ecaf 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -377,6 +377,9 @@ static int j1939_netdev_notify(struct notifier_block *nb, j1939_sk_netdev_event_netdown(priv); j1939_ecu_unmap_all(priv); break; + case NETDEV_UNREGISTER: + j1939_sk_netdev_event_unregister(priv); + break; } j1939_priv_put(priv); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 3d8b588822f9..70ebc861ea2a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -1300,6 +1300,55 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) read_unlock_bh(&priv->j1939_socks_lock); } +void j1939_sk_netdev_event_unregister(struct j1939_priv *priv) +{ + struct sock *sk; + struct j1939_sock *jsk; + bool wait_rcu = false; + +rescan: /* The caller is holding a ref on this "priv" via j1939_priv_get_by_ndev(). */ + read_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + /* Skip if j1939_jsk_add() is not called on this socket. */ + if (!(jsk->state & J1939_SOCK_BOUND)) + continue; + sk = &jsk->sk; + sock_hold(sk); + read_unlock_bh(&priv->j1939_socks_lock); + /* Check if j1939_jsk_del() is not yet called on this socket after holding + * socket's lock, for both j1939_sk_bind() and j1939_sk_release() call + * j1939_jsk_del() with socket's lock held. + */ + lock_sock(sk); + if (jsk->state & J1939_SOCK_BOUND) { + /* Neither j1939_sk_bind() nor j1939_sk_release() called j1939_jsk_del(). + * Make this socket no longer bound, by pretending as if j1939_sk_bind() + * dropped old references but did not get new references. + */ + j1939_jsk_del(priv, jsk); + j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa); + j1939_netdev_stop(priv); + /* Call j1939_priv_put() now and prevent j1939_sk_sock_destruct() from + * calling the corresponding j1939_priv_put(). + * + * j1939_sk_sock_destruct() is supposed to call j1939_priv_put() after + * an RCU grace period. But since the caller is holding a ref on this + * "priv", we can defer synchronize_rcu() until immediately before + * the caller calls j1939_priv_put(). + */ + j1939_priv_put(priv); + jsk->priv = NULL; + wait_rcu = true; + } + release_sock(sk); + sock_put(sk); + goto rescan; + } + read_unlock_bh(&priv->j1939_socks_lock); + if (wait_rcu) + synchronize_rcu(); +} + static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { From f214744c8a27c3c1da6b538c232da22cd027530e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 24 Aug 2025 19:30:09 +0900 Subject: [PATCH 1281/1307] can: j1939: j1939_sk_bind(): call j1939_priv_put() immediately when j1939_local_ecu_get() failed Commit 25fe97cb7620 ("can: j1939: move j1939_priv_put() into sk_destruct callback") expects that a call to j1939_priv_put() can be unconditionally delayed until j1939_sk_sock_destruct() is called. But a refcount leak will happen when j1939_sk_bind() is called again after j1939_local_ecu_get() from previous j1939_sk_bind() call returned an error. We need to call j1939_priv_put() before j1939_sk_bind() returns an error. Fixes: 25fe97cb7620 ("can: j1939: move j1939_priv_put() into sk_destruct callback") Signed-off-by: Tetsuo Handa Tested-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://patch.msgid.link/4f49a1bc-a528-42ad-86c0-187268ab6535@I-love.SAKURA.ne.jp Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 70ebc861ea2a..88e7160d4248 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -521,6 +521,9 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa); if (ret) { j1939_netdev_stop(priv); + jsk->priv = NULL; + synchronize_rcu(); + j1939_priv_put(priv); goto out_release_sock; } From 06e02da29f6f1a45fc07bd60c7eaf172dc21e334 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 24 Aug 2025 19:27:40 +0900 Subject: [PATCH 1282/1307] can: j1939: j1939_local_ecu_get(): undo increment when j1939_local_ecu_get() fails Since j1939_sk_bind() and j1939_sk_release() call j1939_local_ecu_put() when J1939_SOCK_BOUND was already set, but the error handling path for j1939_sk_bind() will not set J1939_SOCK_BOUND when j1939_local_ecu_get() fails, j1939_local_ecu_get() needs to undo priv->ents[sa].nusers++ when j1939_local_ecu_get() returns an error. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Tetsuo Handa Tested-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://patch.msgid.link/e7f80046-4ff7-4ce2-8ad8-7c3c678a42c9@I-love.SAKURA.ne.jp Signed-off-by: Marc Kleine-Budde --- net/can/j1939/bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c index 39844f14eed8..797719cb227e 100644 --- a/net/can/j1939/bus.c +++ b/net/can/j1939/bus.c @@ -290,8 +290,11 @@ int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa) if (!ecu) ecu = j1939_ecu_create_locked(priv, name); err = PTR_ERR_OR_ZERO(ecu); - if (err) + if (err) { + if (j1939_address_is_unicast(sa)) + priv->ents[sa].nusers--; goto done; + } ecu->nusers++; /* TODO: do we care if ecu->addr != sa? */ From ef79f00be72bd81d2e1e6f060d83cf7e425deee4 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Fri, 22 Aug 2025 12:50:02 +0300 Subject: [PATCH 1283/1307] can: xilinx_can: xcan_write_frame(): fix use-after-free of transmitted SKB can_put_echo_skb() takes ownership of the SKB and it may be freed during or after the call. However, xilinx_can xcan_write_frame() keeps using SKB after the call. Fix that by only calling can_put_echo_skb() after the code is done touching the SKB. The tx_lock is held for the entire xcan_write_frame() execution and also on the can_get_echo_skb() side so the order of operations does not matter. An earlier fix commit 3d3c817c3a40 ("can: xilinx_can: Fix usage of skb memory") did not move the can_put_echo_skb() call far enough. Signed-off-by: Anssi Hannula Fixes: 1598efe57b3e ("can: xilinx_can: refactor code in preparation for CAN FD support") Link: https://patch.msgid.link/20250822095002.168389-1-anssi.hannula@bitwise.fi [mkl: add "commit" in front of sha1 in patch description] [mkl: fix indention] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/xilinx_can.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 81baec8eb1e5..a25a3ca62c12 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -690,14 +690,6 @@ static void xcan_write_frame(struct net_device *ndev, struct sk_buff *skb, dlc |= XCAN_DLCR_EDL_MASK; } - if (!(priv->devtype.flags & XCAN_FLAG_TX_MAILBOXES) && - (priv->devtype.flags & XCAN_FLAG_TXFEMP)) - can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max, 0); - else - can_put_echo_skb(skb, ndev, 0, 0); - - priv->tx_head++; - priv->write_reg(priv, XCAN_FRAME_ID_OFFSET(frame_offset), id); /* If the CAN frame is RTR frame this write triggers transmission * (not on CAN FD) @@ -730,6 +722,14 @@ static void xcan_write_frame(struct net_device *ndev, struct sk_buff *skb, data[1]); } } + + if (!(priv->devtype.flags & XCAN_FLAG_TX_MAILBOXES) && + (priv->devtype.flags & XCAN_FLAG_TXFEMP)) + can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max, 0); + else + can_put_echo_skb(skb, ndev, 0, 0); + + priv->tx_head++; } /** From 5c793afa07da6d2d4595f6c73a2a543a471bb055 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 14 Aug 2025 13:26:37 +0200 Subject: [PATCH 1284/1307] can: rcar_can: rcar_can_resume(): fix s2ram with PSCI On R-Car Gen3 using PSCI, s2ram powers down the SoC. After resume, the CAN interface no longer works, until it is brought down and up again. Fix this by calling rcar_can_start() from the PM resume callback, to fully initialize the controller instead of just restarting it. Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/699b2f7fcb60b31b6f976a37f08ce99c5ffccb31.1755165227.git.geert+renesas@glider.be Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar/rcar_can.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 64e664f5adcc..87c134bcd48d 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -861,7 +861,6 @@ static int rcar_can_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct rcar_can_priv *priv = netdev_priv(ndev); - u16 ctlr; int err; if (!netif_running(ndev)) @@ -873,12 +872,7 @@ static int rcar_can_resume(struct device *dev) return err; } - ctlr = readw(&priv->regs->ctlr); - ctlr &= ~RCAR_CAN_CTLR_SLPM; - writew(ctlr, &priv->regs->ctlr); - ctlr &= ~RCAR_CAN_CTLR_CANM; - writew(ctlr, &priv->regs->ctlr); - priv->can.state = CAN_STATE_ERROR_ACTIVE; + rcar_can_start(ndev); netif_device_attach(ndev); netif_start_queue(ndev); From 5e13f2c491a4100d208e77e92fe577fe3dbad6c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Sep 2025 14:45:21 +0200 Subject: [PATCH 1285/1307] netfilter: nft_set_bitmap: fix lockdep splat due to missing annotation Running new 'set_flush_add_atomic_bitmap' test case for nftables.git with CONFIG_PROVE_RCU_LIST=y yields: net/netfilter/nft_set_bitmap.c:231 RCU-list traversed in non-reader section!! rcu_scheduler_active = 2, debug_locks = 1 1 lock held by nft/4008: #0: ffff888147f79cd8 (&nft_net->commit_mutex){+.+.}-{4:4}, at: nf_tables_valid_genid+0x2f/0xd0 lockdep_rcu_suspicious+0x116/0x160 nft_bitmap_walk+0x22d/0x240 nf_tables_delsetelem+0x1010/0x1a00 .. This is a false positive, the list cannot be altered while the transaction mutex is held, so pass the relevant argument to the iterator. Fixes tag intentionally wrong; no point in picking this up if earlier false-positive-fixups were not applied. Fixes: 28b7a6b84c0a ("netfilter: nf_tables: avoid false-positive lockdep splats in set walker") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_bitmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index c24c922f895d..8d3f040a904a 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -226,7 +226,8 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, const struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be; - list_for_each_entry_rcu(be, &priv->list, head) { + list_for_each_entry_rcu(be, &priv->list, head, + lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) { if (iter->count < iter->skip) goto cont; From c4eaca2e1052adfd67bed0a36a9d4b8e515666e4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Sep 2025 10:02:18 +0200 Subject: [PATCH 1286/1307] netfilter: nft_set_pipapo: don't check genbit from packetpath lookups The pipapo set type is special in that it has two copies of its datastructure: one live copy containing only valid elements and one on-demand clone used during transaction where adds/deletes happen. This clone is not visible to the datapath. This is unlike all other set types in nftables, those all link new elements into their live hlist/tree. For those sets, the lookup functions must skip the new elements while the transaction is ongoing to ensure consistency. As the clone is shallow, removal does have an effect on the packet path: once the transaction enters the commit phase the 'gencursor' bit that determines which elements are active and which elements should be ignored (because they are no longer valid) is flipped. This causes the datapath lookup to ignore these elements if they are found during lookup. This opens up a small race window where pipapo has an inconsistent view of the dataset from when the transaction-cpu flipped the genbit until the transaction-cpu calls nft_pipapo_commit() to swap live/clone pointers: cpu0 cpu1 has added new elements to clone has marked elements as being inactive in new generation perform lookup in the set enters commit phase: I) increments the genbit A) observes new genbit removes elements from the clone so they won't be found anymore B) lookup in datastructure can't see new elements yet, but old elements are ignored -> Only matches elements that were not changed in the transaction II) calls nft_pipapo_commit(), clone and live pointers are swapped. C New nft_lookup happening now will find matching elements. Consider a packet matching range r1-r2: cpu0 processes following transaction: 1. remove r1-r2 2. add r1-r3 P is contained in both ranges. Therefore, cpu1 should always find a match for P. Due to above race, this is not the case: cpu1 does find r1-r2, but then ignores it due to the genbit indicating the range has been removed. At the same time, r1-r3 is not visible yet, because it can only be found in the clone. The situation persists for all lookups until after cpu0 hits II). The fix is easy: Don't check the genbit from pipapo lookup functions. This is possible because unlike the other set types, the new elements are not reachable from the live copy of the dataset. The clone/live pointer swap is enough to avoid matching on old elements while at the same time all new elements are exposed in one go. After this change, step B above returns a match in r1-r2. This is fine: r1-r2 only becomes truly invalid the moment they get freed. This happens after a synchronize_rcu() call and rcu read lock is held via netfilter hook traversal (nf_hook_slow()). Cc: Stefano Brivio Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 20 ++++++++++++++++++-- net/netfilter/nft_set_pipapo_avx2.c | 4 +--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 9a10251228fd..793790d79d13 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -510,6 +510,23 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, * * This function is called from the data path. It will search for * an element matching the given key in the current active copy. + * Unlike other set types, this uses NFT_GENMASK_ANY instead of + * nft_genmask_cur(). + * + * This is because new (future) elements are not reachable from + * priv->match, they get added to priv->clone instead. + * When the commit phase flips the generation bitmask, the + * 'now old' entries are skipped but without the 'now current' + * elements becoming visible. Using nft_genmask_cur() thus creates + * inconsistent state: matching old entries get skipped but thew + * newly matching entries are unreachable. + * + * GENMASK will still find the 'now old' entries which ensures consistent + * priv->match view. + * + * nft_pipapo_commit swaps ->clone and ->match shortly after the + * genbit flip. As ->clone doesn't contain the old entries in the first + * place, lookup will only find the now-current ones. * * Return: ntables API extension pointer or NULL if no match. */ @@ -518,12 +535,11 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_pipapo *priv = nft_set_priv(set); - u8 genmask = nft_genmask_cur(net); const struct nft_pipapo_match *m; const struct nft_pipapo_elem *e; m = rcu_dereference(priv->match); - e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64()); + e = pipapo_get(m, (const u8 *)key, NFT_GENMASK_ANY, get_jiffies_64()); return e ? &e->ext : NULL; } diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 2f090e253caf..c0884fa68c79 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1152,7 +1152,6 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, struct nft_pipapo *priv = nft_set_priv(set); const struct nft_set_ext *ext = NULL; struct nft_pipapo_scratch *scratch; - u8 genmask = nft_genmask_cur(net); const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; @@ -1248,8 +1247,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, if (last) { const struct nft_set_ext *e = &f->mt[ret].e->ext; - if (unlikely(nft_set_elem_expired(e) || - !nft_set_elem_active(e, genmask))) + if (unlikely(nft_set_elem_expired(e))) goto next_match; ext = e; From a60f7bf4a1524d8896b76ba89623080aebf44272 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Sep 2025 10:02:19 +0200 Subject: [PATCH 1287/1307] netfilter: nft_set_rbtree: continue traversal if element is inactive When the rbtree lookup function finds a match in the rbtree, it sets the range start interval to a potentially inactive element. Then, after tree lookup, if the matching element is inactive, it returns NULL and suppresses a matching result. This is wrong and leads to false negative matches when a transaction has already entered the commit phase. cpu0 cpu1 has added new elements to clone has marked elements as being inactive in new generation perform lookup in the set enters commit phase: I) increments the genbit A) observes new genbit B) finds matching range C) returns no match: found range invalid in new generation II) removes old elements from the tree C New nft_lookup happening now will find matching element, because it is no longer obscured by old, inactive one. Consider a packet matching range r1-r2: cpu0 processes following transaction: 1. remove r1-r2 2. add r1-r3 P is contained in both ranges. Therefore, cpu1 should always find a match for P. Due to above race, this is not the case: cpu1 does find r1-r2, but then ignores it due to the genbit indicating the range has been removed. It does NOT test for further matches. The situation persists for all lookups until after cpu0 hits II) after which r1-r3 range start node is tested for the first time. Move the "interval start is valid" check ahead so that tree traversal continues if the starting interval is not valid in this generation. Thanks to Stefan Hanreich for providing an initial reproducer for this bug. Reported-by: Stefan Hanreich Fixes: c1eda3c6394f ("netfilter: nft_rbtree: ignore inactive matching element with no descendants") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_rbtree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 938a257c069e..b1f04168ec93 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -77,7 +77,9 @@ __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(interval)) continue; - interval = rbe; + if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_rbtree_elem_expired(rbe)) + interval = rbe; } else if (d > 0) parent = rcu_dereference_raw(parent->rb_right); else { @@ -102,8 +104,6 @@ __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, } if (set->flags & NFT_SET_INTERVAL && interval != NULL && - nft_set_elem_active(&interval->ext, genmask) && - !nft_rbtree_elem_expired(interval) && nft_rbtree_interval_start(interval)) return &interval->ext; From 64102d9bbc3d41dac5188b8fba75b1344c438970 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Sep 2025 10:02:20 +0200 Subject: [PATCH 1288/1307] netfilter: nf_tables: place base_seq in struct net This will soon be read from packet path around same time as the gencursor. Both gencursor and base_seq get incremented almost at the same time, so it makes sense to place them in the same structure. This doesn't increase struct net size on 64bit due to padding. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 1 - include/net/netns/nftables.h | 1 + net/netfilter/nf_tables_api.c | 65 ++++++++++++++++--------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 891e43a01bdc..3faa80f5d801 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1912,7 +1912,6 @@ struct nftables_pernet { struct mutex commit_mutex; u64 table_handle; u64 tstamp; - unsigned int base_seq; unsigned int gc_seq; u8 validate_state; struct work_struct destroy_work; diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index cc8060c017d5..99dd166c5d07 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -3,6 +3,7 @@ #define _NETNS_NFTABLES_H_ struct netns_nftables { + unsigned int base_seq; u8 gencursor; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c1082de09656..9518b50695ba 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1131,11 +1131,14 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, return ERR_PTR(-ENOENT); } -static __be16 nft_base_seq(const struct net *net) +static unsigned int nft_base_seq(const struct net *net) { - struct nftables_pernet *nft_net = nft_pernet(net); + return READ_ONCE(net->nft.base_seq); +} - return htons(nft_net->base_seq & 0xffff); +static __be16 nft_base_seq_be16(const struct net *net) +{ + return htons(nft_base_seq(net) & 0xffff); } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { @@ -1155,7 +1158,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), - flags, family, NFNETLINK_V0, nft_base_seq(net)); + flags, family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -1248,7 +1251,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -2030,7 +2033,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), - flags, family, NFNETLINK_V0, nft_base_seq(net)); + flags, family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -2133,7 +2136,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -3671,7 +3674,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0, - nft_base_seq(net)); + nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -3839,7 +3842,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -4050,7 +4053,7 @@ static int nf_tables_getrule_reset(struct sk_buff *skb, buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_RULE_TABLE]), (char *)nla_data(nla[NFTA_RULE_TABLE]), - nft_net->base_seq); + nft_base_seq(net)); audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC); kfree(buf); @@ -4887,7 +4890,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), flags, ctx->family, NFNETLINK_V0, - nft_base_seq(ctx->net)); + nft_base_seq_be16(ctx->net)); if (!nlh) goto nla_put_failure; @@ -5032,7 +5035,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && @@ -6209,7 +6212,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && @@ -6238,7 +6241,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) seq = cb->nlh->nlmsg_seq; nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI, - table->family, NFNETLINK_V0, nft_base_seq(net)); + table->family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -6331,7 +6334,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, - NFNETLINK_V0, nft_base_seq(ctx->net)); + NFNETLINK_V0, nft_base_seq_be16(ctx->net)); if (!nlh) goto nla_put_failure; @@ -6630,7 +6633,7 @@ static int nf_tables_getsetelem_reset(struct sk_buff *skb, } nelems++; } - audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems); + audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems); out_unlock: rcu_read_unlock(); @@ -8381,7 +8384,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), - flags, family, NFNETLINK_V0, nft_base_seq(net)); + flags, family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -8446,7 +8449,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -8480,7 +8483,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) idx++; } if (ctx->reset && entries) - audit_log_obj_reset(table, nft_net->base_seq, entries); + audit_log_obj_reset(table, nft_base_seq(net), entries); if (rc < 0) break; } @@ -8649,7 +8652,7 @@ static int nf_tables_getobj_reset(struct sk_buff *skb, buf = kasprintf(GFP_ATOMIC, "%.*s:%u", nla_len(nla[NFTA_OBJ_TABLE]), (char *)nla_data(nla[NFTA_OBJ_TABLE]), - nft_net->base_seq); + nft_base_seq(net)); audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); kfree(buf); @@ -8754,9 +8757,8 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, u16 flags, int family, int report, gfp_t gfp) { - struct nftables_pernet *nft_net = nft_pernet(net); char *buf = kasprintf(gfp, "%s:%u", - table->name, nft_net->base_seq); + table->name, nft_base_seq(net)); audit_log_nfcfg(buf, family, @@ -9442,7 +9444,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, nlh = nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), - flags, family, NFNETLINK_V0, nft_base_seq(net)); + flags, family, NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; @@ -9511,7 +9513,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); - cb->seq = READ_ONCE(nft_net->base_seq); + cb->seq = nft_base_seq(net); list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) @@ -9696,17 +9698,16 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { - struct nftables_pernet *nft_net = nft_pernet(net); struct nlmsghdr *nlh; char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC, - NFNETLINK_V0, nft_base_seq(net)); + NFNETLINK_V0, nft_base_seq_be16(net)); if (!nlh) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) || + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_base_seq(net))) || nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; @@ -10968,11 +10969,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ - base_seq = READ_ONCE(nft_net->base_seq); + base_seq = nft_base_seq(net); while (++base_seq == 0) ; - WRITE_ONCE(nft_net->base_seq, base_seq); + WRITE_ONCE(net->nft.base_seq, base_seq); gc_seq = nft_gc_seq_begin(nft_net); @@ -11181,7 +11182,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); - nf_tables_commit_audit_log(&adl, nft_net->base_seq); + nf_tables_commit_audit_log(&adl, nft_base_seq(net)); nft_gc_seq_end(nft_net, gc_seq); nft_net->validate_state = NFT_VALIDATE_SKIP; @@ -11506,7 +11507,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid) mutex_lock(&nft_net->commit_mutex); nft_net->tstamp = get_jiffies_64(); - genid_ok = genid == 0 || nft_net->base_seq == genid; + genid_ok = genid == 0 || nft_base_seq(net) == genid; if (!genid_ok) mutex_unlock(&nft_net->commit_mutex); @@ -12143,7 +12144,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); - nft_net->base_seq = 1; + net->nft.base_seq = 1; nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); From 11fe5a82e53ac3581a80c88e0e35fb8a80e15f48 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Sep 2025 10:02:21 +0200 Subject: [PATCH 1289/1307] netfilter: nf_tables: make nft_set_do_lookup available unconditionally This function was added for retpoline mitigation and is replaced by a static inline helper if mitigations are not enabled. Enable this helper function unconditionally so next patch can add a lookup restart mechanism to fix possible false negatives while transactions are in progress. Adding lookup restarts in nft_lookup_eval doesn't work as nft_objref would then need the same copypaste loop. This patch is separate to ease review of the actual bug fix. Suggested-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables_core.h | 10 ++-------- net/netfilter/nft_lookup.c | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 6c2f483d9828..656e784714f3 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -109,17 +109,11 @@ nft_hash_lookup_fast(const struct net *net, const struct nft_set *set, const struct nft_set_ext * nft_hash_lookup(const struct net *net, const struct nft_set *set, const u32 *key); +#endif + const struct nft_set_ext * nft_set_do_lookup(const struct net *net, const struct nft_set *set, const u32 *key); -#else -static inline const struct nft_set_ext * -nft_set_do_lookup(const struct net *net, const struct nft_set *set, - const u32 *key) -{ - return set->ops->lookup(net, set, key); -} -#endif /* called from nft_pipapo_avx2.c */ const struct nft_set_ext * diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 40c602ffbcba..2c6909bf1b40 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -24,11 +24,11 @@ struct nft_lookup { struct nft_set_binding binding; }; -#ifdef CONFIG_MITIGATION_RETPOLINE -const struct nft_set_ext * -nft_set_do_lookup(const struct net *net, const struct nft_set *set, - const u32 *key) +static const struct nft_set_ext * +__nft_set_do_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) { +#ifdef CONFIG_MITIGATION_RETPOLINE if (set->ops == &nft_set_hash_fast_type.ops) return nft_hash_lookup_fast(net, set, key); if (set->ops == &nft_set_hash_type.ops) @@ -51,10 +51,17 @@ nft_set_do_lookup(const struct net *net, const struct nft_set *set, return nft_rbtree_lookup(net, set, key); WARN_ON_ONCE(1); +#endif return set->ops->lookup(net, set, key); } + +const struct nft_set_ext * +nft_set_do_lookup(const struct net *net, const struct nft_set *set, + const u32 *key) +{ + return __nft_set_do_lookup(net, set, key); +} EXPORT_SYMBOL_GPL(nft_set_do_lookup); -#endif void nft_lookup_eval(const struct nft_expr *expr, struct nft_regs *regs, From b2f742c846cab9afc5953a5d8f17b54922dcc723 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Sep 2025 10:02:22 +0200 Subject: [PATCH 1290/1307] netfilter: nf_tables: restart set lookup on base_seq change The hash, hash_fast, rhash and bitwise sets may indicate no result even though a matching element exists during a short time window while other cpu is finalizing the transaction. This happens when the hash lookup/bitwise lookup function has picked up the old genbit, right before it was toggled by nf_tables_commit(), but then the same cpu managed to unlink the matching old element from the hash table: cpu0 cpu1 has added new elements to clone has marked elements as being inactive in new generation perform lookup in the set enters commit phase: A) observes old genbit increments base_seq I) increments the genbit II) removes old element from the set B) finds matching element C) returns no match: found element is not valid in old generation Next lookup observes new genbit and finds matching e2. Consider a packet matching element e1, e2. cpu0 processes following transaction: 1. remove e1 2. adds e2, which has same key as e1. P matches both e1 and e2. Therefore, cpu1 should always find a match for P. Due to above race, this is not the case: cpu1 observed the old genbit. e2 will not be considered once it is found. The element e1 is not found anymore if cpu0 managed to unlink it from the hlist before cpu1 found it during list traversal. The situation only occurs for a brief time period, lookups happening after I) observe new genbit and return e2. This problem exists in all set types except nft_set_pipapo, so fix it once in nft_lookup rather than each set ops individually. Sample the base sequence counter, which gets incremented right before the genbit is changed. Then, if no match is found, retry the lookup if the base sequence was altered in between. If the base sequence hasn't changed: - No update took place: no-match result is expected. This is the common case. or: - nf_tables_commit() hasn't progressed to genbit update yet. Old elements were still visible and nomatch result is expected, or: - nf_tables_commit updated the genbit: We picked up the new base_seq, so the lookup function also picked up the new genbit, no-match result is expected. If the old genbit was observed, then nft_lookup also picked up the old base_seq: nft_lookup_should_retry() returns true and relookup is performed in the new generation. This problem was added when the unconditional synchronize_rcu() call that followed the current/next generation bit toggle was removed. Thanks to Pablo Neira Ayuso for reviewing an earlier version of this patchset, for suggesting re-use of existing base_seq and placement of the restart loop in nft_set_do_lookup(). Fixes: 0cbc06b3faba ("netfilter: nf_tables: remove synchronize_rcu in commit phase") Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 3 ++- net/netfilter/nft_lookup.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9518b50695ba..c3c73411c40c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10973,7 +10973,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) while (++base_seq == 0) ; - WRITE_ONCE(net->nft.base_seq, base_seq); + /* pairs with smp_load_acquire in nft_lookup_eval */ + smp_store_release(&net->nft.base_seq, base_seq); gc_seq = nft_gc_seq_begin(nft_net); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 2c6909bf1b40..58c5b14889c4 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -55,11 +55,40 @@ __nft_set_do_lookup(const struct net *net, const struct nft_set *set, return set->ops->lookup(net, set, key); } +static unsigned int nft_base_seq(const struct net *net) +{ + /* pairs with smp_store_release() in nf_tables_commit() */ + return smp_load_acquire(&net->nft.base_seq); +} + +static bool nft_lookup_should_retry(const struct net *net, unsigned int seq) +{ + return unlikely(seq != nft_base_seq(net)); +} + const struct nft_set_ext * nft_set_do_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { - return __nft_set_do_lookup(net, set, key); + const struct nft_set_ext *ext; + unsigned int base_seq; + + do { + base_seq = nft_base_seq(net); + + ext = __nft_set_do_lookup(net, set, key); + if (ext) + break; + /* No match? There is a small chance that lookup was + * performed in the old generation, but nf_tables_commit() + * already unlinked a (matching) element. + * + * We need to repeat the lookup to make sure that we didn't + * miss a matching element in the new generation. + */ + } while (nft_lookup_should_retry(net, base_seq)); + + return ext; } EXPORT_SYMBOL_GPL(nft_set_do_lookup); From 37a9675e61a2a2a721a28043ffdf2c8ec81eba37 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Sep 2025 23:52:31 +0200 Subject: [PATCH 1291/1307] MAINTAINERS: add Phil as netfilter reviewer Phil has contributed to netfilter with features, fixes and patch reviews for a long time. Make this more formal and add Reviewer tag. Acked-by: Jozsef Kadlecsik Acked-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2df02e4374ed..ba11421c33e5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17480,6 +17480,7 @@ NETFILTER M: Pablo Neira Ayuso M: Jozsef Kadlecsik M: Florian Westphal +R: Phil Sutter L: netfilter-devel@vger.kernel.org L: coreteam@netfilter.org S: Maintained From 449c9c02537a146ac97ef962327a221e21c9cab3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Sep 2025 11:41:59 +0200 Subject: [PATCH 1292/1307] PM: hibernate: Restrict GFP mask in hibernation_snapshot() Commit 12ffc3b1513e ("PM: Restrict swap use to later in the suspend sequence") incorrectly removed a pm_restrict_gfp_mask() call from hibernation_snapshot(), so memory allocations involving swap are not prevented from being carried out in this code path any more which may lead to serious breakage. The symptoms of such breakage have become visible after adding a shrink_shmem_memory() call to hibernation_snapshot() in commit 2640e819474f ("PM: hibernate: shrink shmem pages after dev_pm_ops.prepare()") which caused this problem to be much more likely to manifest itself. However, since commit 2640e819474f was initially present in the DRM tree that did not include commit 12ffc3b1513e, the symptoms of this issue were not visible until merge commit 260f6f4fda93 ("Merge tag 'drm-next-2025-07-30' of https://gitlab.freedesktop.org/drm/kernel") that exposed it through an entirely reasonable merge conflict resolution. Fixes: 12ffc3b1513e ("PM: Restrict swap use to later in the suspend sequence") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220555 Reported-by: Todd Brandt Tested-by: Todd Brandt Cc: 6.16+ # 6.16+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) --- kernel/power/hibernate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1f1f30cca573..2f66ab453823 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -449,6 +449,7 @@ int hibernation_snapshot(int platform_mode) shrink_shmem_memory(); console_suspend_all(); + pm_restrict_gfp_mask(); error = dpm_suspend(PMSG_FREEZE); From cdbc9836c7afadad68f374791738f118263c5371 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 3 Jul 2025 12:10:50 +0200 Subject: [PATCH 1293/1307] libceph: fix invalid accesses to ceph_connection_v1_info There is a place where generic code in messenger.c is reading and another place where it is writing to con->v1 union member without checking that the union member is active (i.e. msgr1 is in use). On 64-bit systems, con->v1.auth_retry overlaps with con->v2.out_iter, so such a read is almost guaranteed to return a bogus value instead of 0 when msgr2 is in use. This ends up being fairly benign because the side effect is just the invalidation of the authorizer and successive fetching of new tickets. con->v1.connect_seq overlaps with con->v2.conn_bufs and the fact that it's being written to can cause more serious consequences, but luckily it's not something that happens often. Cc: stable@vger.kernel.org Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)") Signed-off-by: Ilya Dryomov Reviewed-by: Viacheslav Dubeyko --- net/ceph/messenger.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d1b5705dc0c6..9f6d860411cb 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1524,7 +1524,7 @@ static void con_fault_finish(struct ceph_connection *con) * in case we faulted due to authentication, invalidate our * current tickets so that we can get new ones. */ - if (con->v1.auth_retry) { + if (!ceph_msgr2(from_msgr(con->msgr)) && con->v1.auth_retry) { dout("auth_retry %d, invalidating\n", con->v1.auth_retry); if (con->ops->invalidate_authorizer) con->ops->invalidate_authorizer(con); @@ -1714,9 +1714,10 @@ static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ if (con->state == CEPH_CON_S_STANDBY) { - dout("clear_standby %p and ++connect_seq\n", con); + dout("clear_standby %p\n", con); con->state = CEPH_CON_S_PREOPEN; - con->v1.connect_seq++; + if (!ceph_msgr2(from_msgr(con->msgr))) + con->v1.connect_seq++; WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_WRITE_PENDING)); WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_KEEPALIVE_PENDING)); } From e25ddfb388c8b7e5f20e3bf38d627fb485003781 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Wed, 10 Sep 2025 20:57:39 +0800 Subject: [PATCH 1294/1307] bpf: Reject bpf_timer for PREEMPT_RT When enable CONFIG_PREEMPT_RT, the kernel will warn when run timer selftests by './test_progs -t timer': BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 In order to avoid such warning, reject bpf_timer in verifier when PREEMPT_RT is enabled. Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20250910125740.52172-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c89e2b1bc644..9fb1f957a093 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8547,6 +8547,10 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, verifier_bug(env, "Two map pointers in a timer helper"); return -EFAULT; } + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n"); + return -EOPNOTSUPP; + } meta->map_uid = reg->map_uid; meta->map_ptr = map; return 0; From fbdd61c94bcb09b0c0eb0655917bf4193d07aac1 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Wed, 10 Sep 2025 20:57:40 +0800 Subject: [PATCH 1295/1307] selftests/bpf: Skip timer cases when bpf_timer is not supported When enable CONFIG_PREEMPT_RT, verifier will reject bpf_timer with returning -EOPNOTSUPP. Therefore, skip test cases when errno is EOPNOTSUPP. cd tools/testing/selftests/bpf ./test_progs -t timer 125 free_timer:SKIP 456 timer:SKIP 457/1 timer_crash/array:SKIP 457/2 timer_crash/hash:SKIP 457 timer_crash:SKIP 458 timer_lockup:SKIP 459 timer_mim:SKIP Summary: 5/0 PASSED, 6 SKIPPED, 0 FAILED Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20250910125740.52172-3-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/free_timer.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/timer.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/timer_crash.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/timer_lockup.c | 4 ++++ tools/testing/selftests/bpf/prog_tests/timer_mim.c | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c index b7b77a6b2979..0de8facca4c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/free_timer.c +++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c @@ -124,6 +124,10 @@ void test_free_timer(void) int err; skel = free_timer__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "open_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index d66687f1ee6a..56f660ca567b 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -86,6 +86,10 @@ void serial_test_timer(void) int err; timer_skel = timer__open_and_load(); + if (!timer_skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c index f74b82305da8..b841597c8a3a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c @@ -12,6 +12,10 @@ static void test_timer_crash_mode(int mode) struct timer_crash *skel; skel = timer_crash__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load")) return; skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c index 1a2f99596916..eb303fa1e09a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -59,6 +59,10 @@ void test_timer_lockup(void) } skel = timer_lockup__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index 9ff7843909e7..c930c7d7105b 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -65,6 +65,10 @@ void serial_test_timer_mim(void) goto cleanup; timer_skel = timer_mim__open_and_load(); + if (!timer_skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) goto cleanup; From 6fef6ae764be8a77f61ad3b6937ba82fe8358045 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 7 Sep 2025 21:43:20 +0100 Subject: [PATCH 1296/1307] net: ethtool: fix wrong type used in struct kernel_ethtool_ts_info In C, enumerated types do not have a defined size, apart from being compatible with one of the standard types. This allows an ABI / compiler to choose the type of an enum depending on the values it needs to store, and storing larger values in it can lead to undefined behaviour. The tx_type and rx_filters members of struct kernel_ethtool_ts_info are defined as enumerated types, but are bit arrays, where each bit is defined by the enumerated type. This means they typically store values in excess of the maximum value of the enumerated type, in fact (1 << max_value) and thus must not be declared using the enumated type. Fix both of these to use u32, as per the corresponding __u32 UAPI type. Fixes: 2111375b85ad ("net: Add struct kernel_ethtool_ts_info") Signed-off-by: Russell King (Oracle) Reviewed-by: Kory Maincent Link: https://patch.msgid.link/E1uvMEK-00000003Amd-2pWR@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index de5bd76a400c..d7d757e72554 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -856,8 +856,8 @@ struct kernel_ethtool_ts_info { enum hwtstamp_provider_qualifier phc_qualifier; enum hwtstamp_source phc_source; int phc_phyindex; - enum hwtstamp_tx_types tx_types; - enum hwtstamp_rx_filters rx_filters; + u32 tx_types; + u32 rx_filters; }; /** From 5537a4679403423e0b49c95b619983a4583d69c5 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 8 Sep 2025 13:26:19 +0200 Subject: [PATCH 1297/1307] net: usb: asix: ax88772: drop phylink use in PM to avoid MDIO runtime PM wakeups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop phylink_{suspend,resume}() from ax88772 PM callbacks. MDIO bus accesses have their own runtime-PM handling and will try to wake the device if it is suspended. Such wake attempts must not happen from PM callbacks while the device PM lock is held. Since phylink {sus|re}sume may trigger MDIO, it must not be called in PM context. No extra phylink PM handling is required for this driver: - .ndo_open/.ndo_stop control the phylink start/stop lifecycle. - ethtool/phylib entry points run in process context, not PM. - phylink MAC ops program the MAC on link changes after resume. Fixes: e0bffe3e6894 ("net: asix: ax88772: migrate to phylink") Reported-by: Hubert Wiśniewski Cc: stable@vger.kernel.org Signed-off-by: Oleksij Rempel Tested-by: Hubert Wiśniewski Tested-by: Xu Yang Link: https://patch.msgid.link/20250908112619.2900723-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 792ddda1ad49..1e8f7089f5e8 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -607,15 +607,8 @@ static const struct net_device_ops ax88772_netdev_ops = { static void ax88772_suspend(struct usbnet *dev) { - struct asix_common_private *priv = dev->driver_priv; u16 medium; - if (netif_running(dev->net)) { - rtnl_lock(); - phylink_suspend(priv->phylink, false); - rtnl_unlock(); - } - /* Stop MAC operation */ medium = asix_read_medium_status(dev, 1); medium &= ~AX_MEDIUM_RE; @@ -644,12 +637,6 @@ static void ax88772_resume(struct usbnet *dev) for (i = 0; i < 3; i++) if (!priv->reset(dev, 1)) break; - - if (netif_running(dev->net)) { - rtnl_lock(); - phylink_resume(priv->phylink); - rtnl_unlock(); - } } static int asix_resume(struct usb_interface *intf) From c3f8d13357deab1e04f8a52b499d6b9b704e578e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 10 Sep 2025 15:11:21 +0200 Subject: [PATCH 1298/1307] wifi: nl80211: completely disable per-link stats for now After commit 8cc71fc3b82b ("wifi: cfg80211: Fix "no buffer space available" error in nl80211_get_station() for MLO"), the per-link data is only included in station dumps, where the size limit is somewhat less of an issue. However, it's still an issue, depending on how many links a station has and how much per-link data there is. Thus, for now, disable per-link statistics entirely. A complete fix will need to take this into account, make it opt-in by userspace, and change the dump format to be able to split a single station's data across multiple netlink dump messages, which all together is too much development for a fix. Fixes: 82d7f841d9bd ("wifi: cfg80211: extend to embed link level statistics in NL message") Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f2f7424e930c..852573423e52 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7575,7 +7575,7 @@ static int nl80211_dump_station(struct sk_buff *skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev->netdev, mac_addr, - &sinfo, true) < 0) + &sinfo, false) < 0) goto out; sta_idx++; From 8884c693991333ae065830554b9b0c96590b1bb2 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 5 Sep 2025 09:15:31 +0000 Subject: [PATCH 1299/1307] hsr: use rtnl lock when iterating over ports hsr_for_each_port is called in many places without holding the RCU read lock, this may trigger warnings on debug kernels. Most of the callers are actually hold rtnl lock. So add a new helper hsr_for_each_port_rtnl to allow callers in suitable contexts to iterate ports safely without explicit RCU locking. This patch only fixed the callers that is hold rtnl lock. Other caller issues will be fixed in later patches. Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250905091533.377443-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- net/hsr/hsr_device.c | 18 +++++++++--------- net/hsr/hsr_main.c | 2 +- net/hsr/hsr_main.h | 3 +++ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 88657255fec1..bce7b4061ce0 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -49,7 +49,7 @@ static bool hsr_check_carrier(struct hsr_port *master) ASSERT_RTNL(); - hsr_for_each_port(master->hsr, port) { + hsr_for_each_port_rtnl(master->hsr, port) { if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) { netif_carrier_on(master->dev); return true; @@ -105,7 +105,7 @@ int hsr_get_max_mtu(struct hsr_priv *hsr) struct hsr_port *port; mtu_max = ETH_DATA_LEN; - hsr_for_each_port(hsr, port) + hsr_for_each_port_rtnl(hsr, port) if (port->type != HSR_PT_MASTER) mtu_max = min(port->dev->mtu, mtu_max); @@ -139,7 +139,7 @@ static int hsr_dev_open(struct net_device *dev) hsr = netdev_priv(dev); - hsr_for_each_port(hsr, port) { + hsr_for_each_port_rtnl(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { @@ -172,7 +172,7 @@ static int hsr_dev_close(struct net_device *dev) struct hsr_priv *hsr; hsr = netdev_priv(dev); - hsr_for_each_port(hsr, port) { + hsr_for_each_port_rtnl(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { @@ -205,7 +205,7 @@ static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr, * may become enabled. */ features &= ~NETIF_F_ONE_FOR_ALL; - hsr_for_each_port(hsr, port) + hsr_for_each_port_rtnl(hsr, port) features = netdev_increment_features(features, port->dev->features, mask); @@ -484,7 +484,7 @@ static void hsr_set_rx_mode(struct net_device *dev) hsr = netdev_priv(dev); - hsr_for_each_port(hsr, port) { + hsr_for_each_port_rtnl(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { @@ -506,7 +506,7 @@ static void hsr_change_rx_flags(struct net_device *dev, int change) hsr = netdev_priv(dev); - hsr_for_each_port(hsr, port) { + hsr_for_each_port_rtnl(hsr, port) { if (port->type == HSR_PT_MASTER) continue; switch (port->type) { @@ -534,7 +534,7 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev, hsr = netdev_priv(dev); - hsr_for_each_port(hsr, port) { + hsr_for_each_port_rtnl(hsr, port) { if (port->type == HSR_PT_MASTER || port->type == HSR_PT_INTERLINK) continue; @@ -580,7 +580,7 @@ static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev, hsr = netdev_priv(dev); - hsr_for_each_port(hsr, port) { + hsr_for_each_port_rtnl(hsr, port) { switch (port->type) { case HSR_PT_SLAVE_A: case HSR_PT_SLAVE_B: diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 192893c3f2ec..ac1eb1db1a52 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -22,7 +22,7 @@ static bool hsr_slave_empty(struct hsr_priv *hsr) { struct hsr_port *port; - hsr_for_each_port(hsr, port) + hsr_for_each_port_rtnl(hsr, port) if (port->type != HSR_PT_MASTER) return false; return true; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 135ec5fce019..33b0d2460c9b 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -224,6 +224,9 @@ struct hsr_priv { #define hsr_for_each_port(hsr, port) \ list_for_each_entry_rcu((port), &(hsr)->ports, port_list) +#define hsr_for_each_port_rtnl(hsr, port) \ + list_for_each_entry_rcu((port), &(hsr)->ports, port_list, lockdep_rtnl_is_held()) + struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt); /* Caller must ensure skb is a valid HSR frame */ From 393c841fe4333cdd856d0ca37b066d72746cfaa6 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 5 Sep 2025 09:15:32 +0000 Subject: [PATCH 1300/1307] hsr: use hsr_for_each_port_rtnl in hsr_port_get_hsr hsr_port_get_hsr() iterates over ports using hsr_for_each_port(), but many of its callers do not hold the required RCU lock. Switch to hsr_for_each_port_rtnl(), since most callers already hold the rtnl lock. After review, all callers are covered by either the rtnl lock or the RCU lock, except hsr_dev_xmit(). Fix this by adding an RCU read lock there. Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250905091533.377443-3-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- net/hsr/hsr_device.c | 3 +++ net/hsr/hsr_main.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index bce7b4061ce0..702da1f9aaa9 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -226,6 +226,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct hsr_priv *hsr = netdev_priv(dev); struct hsr_port *master; + rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (master) { skb->dev = master->dev; @@ -238,6 +239,8 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); } + rcu_read_unlock(); + return NETDEV_TX_OK; } diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index ac1eb1db1a52..bc94b07101d8 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -134,7 +134,7 @@ struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt) { struct hsr_port *port; - hsr_for_each_port(hsr, port) + hsr_for_each_port_rtnl(hsr, port) if (port->type == pt) return port; return NULL; From 847748fc66d08a89135a74e29362a66ba4e3ab15 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 5 Sep 2025 09:15:33 +0000 Subject: [PATCH 1301/1307] hsr: hold rcu and dev lock for hsr_get_port_ndev hsr_get_port_ndev calls hsr_for_each_port, which need to hold rcu lock. On the other hand, before return the port device, we need to hold the device reference to avoid UaF in the caller function. Suggested-by: Paolo Abeni Fixes: 9c10dd8eed74 ("net: hsr: Create and export hsr_get_port_ndev()") Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250905091533.377443-4-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 20 ++++++++++++++------ net/hsr/hsr_device.c | 7 ++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index dadce6009791..e42d0fdefee1 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -654,7 +654,7 @@ static void icssg_prueth_hsr_fdb_add_del(struct prueth_emac *emac, static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) { - struct net_device *real_dev; + struct net_device *real_dev, *port_dev; struct prueth_emac *emac; u8 vlan_id, i; @@ -663,11 +663,15 @@ static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) if (is_hsr_master(real_dev)) { for (i = HSR_PT_SLAVE_A; i < HSR_PT_INTERLINK; i++) { - emac = netdev_priv(hsr_get_port_ndev(real_dev, i)); - if (!emac) + port_dev = hsr_get_port_ndev(real_dev, i); + emac = netdev_priv(port_dev); + if (!emac) { + dev_put(port_dev); return -EINVAL; + } icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, true); + dev_put(port_dev); } } else { emac = netdev_priv(real_dev); @@ -679,7 +683,7 @@ static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr) { - struct net_device *real_dev; + struct net_device *real_dev, *port_dev; struct prueth_emac *emac; u8 vlan_id, i; @@ -688,11 +692,15 @@ static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr) if (is_hsr_master(real_dev)) { for (i = HSR_PT_SLAVE_A; i < HSR_PT_INTERLINK; i++) { - emac = netdev_priv(hsr_get_port_ndev(real_dev, i)); - if (!emac) + port_dev = hsr_get_port_ndev(real_dev, i); + emac = netdev_priv(port_dev); + if (!emac) { + dev_put(port_dev); return -EINVAL; + } icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, false); + dev_put(port_dev); } } else { emac = netdev_priv(real_dev); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 702da1f9aaa9..fbbc3ccf9df6 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -675,9 +675,14 @@ struct net_device *hsr_get_port_ndev(struct net_device *ndev, struct hsr_priv *hsr = netdev_priv(ndev); struct hsr_port *port; + rcu_read_lock(); hsr_for_each_port(hsr, port) - if (port->type == pt) + if (port->type == pt) { + dev_hold(port->dev); + rcu_read_unlock(); return port->dev; + } + rcu_read_unlock(); return NULL; } EXPORT_SYMBOL(hsr_get_port_ndev); From 63a796558bc22ec699e4193d5c75534757ddf2e6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 11 Sep 2025 16:33:31 +0200 Subject: [PATCH 1302/1307] Revert "net: usb: asix: ax88772: drop phylink use in PM to avoid MDIO runtime PM wakeups" This reverts commit 5537a4679403 ("net: usb: asix: ax88772: drop phylink use in PM to avoid MDIO runtime PM wakeups"), it breaks operation of asix ethernet usb dongle after system suspend-resume cycle. Link: https://lore.kernel.org/all/b5ea8296-f981-445d-a09a-2f389d7f6fdd@samsung.com/ Fixes: 5537a4679403 ("net: usb: asix: ax88772: drop phylink use in PM to avoid MDIO runtime PM wakeups") Reported-by: Marek Szyprowski Acked-by: Jakub Kicinski Link: https://patch.msgid.link/2945b9dbadb8ee1fee058b19554a5cb14f1763c1.1757601118.git.pabeni@redhat.com Signed-off-by: Paolo Abeni --- drivers/net/usb/asix_devices.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 1e8f7089f5e8..792ddda1ad49 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -607,8 +607,15 @@ static const struct net_device_ops ax88772_netdev_ops = { static void ax88772_suspend(struct usbnet *dev) { + struct asix_common_private *priv = dev->driver_priv; u16 medium; + if (netif_running(dev->net)) { + rtnl_lock(); + phylink_suspend(priv->phylink, false); + rtnl_unlock(); + } + /* Stop MAC operation */ medium = asix_read_medium_status(dev, 1); medium &= ~AX_MEDIUM_RE; @@ -637,6 +644,12 @@ static void ax88772_resume(struct usbnet *dev) for (i = 0; i < 3; i++) if (!priv->reset(dev, 1)) break; + + if (netif_running(dev->net)) { + rtnl_lock(); + phylink_resume(priv->phylink); + rtnl_unlock(); + } } static int asix_resume(struct usb_interface *intf) From 1fcf686def19064a7b5cfaeb28c1f1a119900a2b Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 12 Sep 2025 03:27:11 +0800 Subject: [PATCH 1303/1307] erofs: fix long xattr name prefix placement Currently, xattr name prefixes are forcibly placed into the packed inode if the fragments feature is enabled, and users have no option to put them in plain form directly on disk. This is inflexible. First, as mentioned above, users should be able to store unwrapped long xattr name prefixes unconditionally (COMPAT_PLAIN_XATTR_PFX). Second, since we now have the new metabox inode to store metadata, it should be used when available instead of the packed inode. Fixes: 414091322c63 ("erofs: implement metadata compression") Signed-off-by: Gao Xiang --- fs/erofs/erofs_fs.h | 8 +++++--- fs/erofs/internal.h | 1 + fs/erofs/xattr.c | 13 ++++++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 377ee12b8b96..3d5738f80072 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -12,10 +12,12 @@ /* to allow for x86 boot sectors and other oddities. */ #define EROFS_SUPER_OFFSET 1024 -#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 -#define EROFS_FEATURE_COMPAT_MTIME 0x00000002 -#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004 +#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 +#define EROFS_FEATURE_COMPAT_MTIME 0x00000002 +#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004 #define EROFS_FEATURE_COMPAT_SHARED_EA_IN_METABOX 0x00000008 +#define EROFS_FEATURE_COMPAT_PLAIN_XATTR_PFX 0x00000010 + /* * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 4ccc5f0ee8df..9319c66e86c3 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -234,6 +234,7 @@ EROFS_FEATURE_FUNCS(metabox, incompat, INCOMPAT_METABOX) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) EROFS_FEATURE_FUNCS(shared_ea_in_metabox, compat, COMPAT_SHARED_EA_IN_METABOX) +EROFS_FEATURE_FUNCS(plain_xattr_pfx, compat, COMPAT_PLAIN_XATTR_PFX) static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid) { diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index eaa9efd766ee..396536d9a862 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -482,6 +482,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb) erofs_off_t pos = (erofs_off_t)sbi->xattr_prefix_start << 2; struct erofs_xattr_prefix_item *pfs; int ret = 0, i, len; + bool plain = erofs_sb_has_plain_xattr_pfx(sbi); if (!sbi->xattr_prefix_count) return 0; @@ -490,9 +491,15 @@ int erofs_xattr_prefixes_init(struct super_block *sb) if (!pfs) return -ENOMEM; - if (sbi->packed_inode) - buf.mapping = sbi->packed_inode->i_mapping; - else + if (!plain) { + if (erofs_sb_has_metabox(sbi)) + (void)erofs_init_metabuf(&buf, sb, true); + else if (sbi->packed_inode) + buf.mapping = sbi->packed_inode->i_mapping; + else + plain = true; + } + if (plain) (void)erofs_init_metabuf(&buf, sb, false); for (i = 0; i < sbi->xattr_prefix_count; i++) { From f6d2900f2806d584303db689d9e18f0443610514 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 10 Sep 2025 11:40:03 +0200 Subject: [PATCH 1304/1307] MAINTAINERS: Update the DMA Rust entry Update the DMA Rust maintainers entry in the following two aspects: (1) Change Abdiel's entry to 'Reviewer'. (2) Take patches through the driver-core tree. Abdiel won't do any more maintainer work on the DMA (or scatterlist) infrastructure, but he'd like to be kept in the loop, hence change is entry to 'R:'. Analogous to [1], the DMA (and scatterlist) helpers are closely coupled with the core device infrastructure and the device lifecycle, hence take patches through the driver-core tree by default. Cc: Abdiel Janulgue Link: https://lore.kernel.org/r/20250725202840.2251768-1-ojeda@kernel.org [1] Acked-by: Abdiel Janulgue Acked-by: Greg Kroah-Hartman Signed-off-by: Danilo Krummrich --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fed6cd812d79..281149d9b821 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7239,15 +7239,15 @@ F: include/linux/swiotlb.h F: kernel/dma/ DMA MAPPING HELPERS DEVICE DRIVER API [RUST] -M: Abdiel Janulgue M: Danilo Krummrich +R: Abdiel Janulgue R: Daniel Almeida R: Robin Murphy R: Andreas Hindborg L: rust-for-linux@vger.kernel.org S: Supported W: https://rust-for-linux.com -T: git https://github.com/Rust-for-Linux/linux.git alloc-next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: rust/helpers/dma.c F: rust/kernel/dma.rs F: samples/rust/rust_dma.rs From 9dfec4a51df9cf0dcc23cb4ac6fc314bf9e999d0 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Wed, 10 Sep 2025 15:58:47 +0800 Subject: [PATCH 1305/1307] USB: core: remove the move buf action The buffer size of sysfs is fixed at PAGE_SIZE, and the page offset of the buf parameter of sysfs_emit_at() must be 0, there is no need to manually manage the buf pointer offset. Fixes: 711d41ab4a0e ("usb: core: Use sysfs_emit_at() when showing dynamic IDs") Reported-by: syzbot+b6445765657b5855e869@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b6445765657b5855e869 Tested-by: syzbot+b6445765657b5855e869@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Link: https://lore.kernel.org/r/tencent_B32D6D8C9450EBFEEE5ACC2C7B0E6C402D0A@qq.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index c3177034b779..f441958b0ef4 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -119,11 +119,11 @@ ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf) guard(mutex)(&usb_dynids_lock); list_for_each_entry(dynid, &dynids->list, node) if (dynid->id.bInterfaceClass != 0) - count += sysfs_emit_at(&buf[count], count, "%04x %04x %02x\n", + count += sysfs_emit_at(buf, count, "%04x %04x %02x\n", dynid->id.idVendor, dynid->id.idProduct, dynid->id.bInterfaceClass); else - count += sysfs_emit_at(&buf[count], count, "%04x %04x\n", + count += sysfs_emit_at(buf, count, "%04x %04x\n", dynid->id.idVendor, dynid->id.idProduct); return count; } From 30989f67650cbf8dc763f7c22e3a210f70a8d7d0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 10 Sep 2025 16:25:27 +0200 Subject: [PATCH 1306/1307] MAINTAINERS: Input: Drop melfas-mip4 section Emails to the sole melfas-mip4 driver maintainer bounce: 550 No such user here (connected from melfas.com) so clearly this is not a supported driver anymore. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250910142526.105286-2-krzysztof.kozlowski@linaro.org Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 7 ------- 1 file changed, 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ecca2a4f96fd..6d6dd9f1316c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15523,13 +15523,6 @@ S: Supported W: http://www.melexis.com F: drivers/iio/temperature/mlx90635.c -MELFAS MIP4 TOUCHSCREEN DRIVER -M: Sangwon Jee -S: Supported -W: http://www.melfas.com -F: Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt -F: drivers/input/touchscreen/melfas_mip4.c - MELLANOX BLUEFIELD I2C DRIVER M: Khalil Blaiech M: Asmaa Mnebhi From f83ec76bf285bea5727f478a68b894f5543ca76e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Sep 2025 14:21:14 -0700 Subject: [PATCH 1307/1307] Linux 6.17-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cf37b9407821..9771619ac596 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION*