From 87b0031f7f73dac2ebb874fc8f331a66ee3b5cbd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:18 +0100 Subject: [PATCH 01/33] irqdomain: Add firmware info reporting interface Add an irqdomain callback to report firmware-provided information that is otherwise not available in a generic way. This is reported using a new data structure (struct irq_fwspec_info). This callback is optional and the only information that can be reported currently is the affinity of an interrupt. However, the containing structure is designed to be extensible, allowing other potentially relevant information to be reported in the future. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20251020122944.3074811-2-maz@kernel.org --- include/linux/irqdomain.h | 27 +++++++++++++++++++++++++++ kernel/irq/irqdomain.c | 32 +++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 4a86e6b915dd..9d6a5e99394f 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -44,6 +44,23 @@ struct irq_fwspec { u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; }; +/** + * struct irq_fwspec_info - firmware provided IRQ information structure + * + * @flags: Information validity flags + * @cpumask: Affinity mask for this interrupt + * + * This structure reports firmware-specific information about an + * interrupt. The only significant information is the affinity of a + * per-CPU interrupt, but this is designed to be extended as required. + */ +struct irq_fwspec_info { + unsigned long flags; + const struct cpumask *affinity; +}; + +#define IRQ_FWSPEC_INFO_AFFINITY_VALID BIT(0) + /* Conversion function from of_phandle_args fields to fwspec */ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, unsigned int count, struct irq_fwspec *fwspec); @@ -69,6 +86,9 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and * linux irq type value (@out_type). This is a generalised @xlate * (over struct irq_fwspec) and is preferred if provided. + * @get_fwspec_info: + * Given @fwspec, report additional firmware-provided information in + * @info. Optional. * @debug_show: For domains to show specific data for an interrupt in debugfs. * * Functions below are provided by the driver and called whenever a new mapping @@ -96,6 +116,7 @@ struct irq_domain_ops { void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); + int (*get_fwspec_info)(struct irq_fwspec *fwspec, struct irq_fwspec_info *info); #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS void (*debug_show)(struct seq_file *m, struct irq_domain *d, @@ -602,6 +623,8 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_bas int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq); +int irq_populate_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info); + static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY; @@ -685,6 +708,10 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) return false; } +static inline int irq_populate_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info) +{ + return -EINVAL; +} #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #ifdef CONFIG_GENERIC_MSI_IRQ diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index dc473faadcc8..2652c4cfd877 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -867,13 +867,9 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, } EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec); -unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) +static struct irq_domain *fwspec_to_domain(struct irq_fwspec *fwspec) { struct irq_domain *domain; - struct irq_data *irq_data; - irq_hw_number_t hwirq; - unsigned int type = IRQ_TYPE_NONE; - int virq; if (fwspec->fwnode) { domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED); @@ -883,6 +879,32 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) domain = irq_default_domain; } + return domain; +} + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +int irq_populate_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info) +{ + struct irq_domain *domain = fwspec_to_domain(fwspec); + + memset(info, 0, sizeof(*info)); + + if (!domain || !domain->ops->get_fwspec_info) + return 0; + + return domain->ops->get_fwspec_info(fwspec, info); +} +#endif + +unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) +{ + unsigned int type = IRQ_TYPE_NONE; + struct irq_domain *domain; + struct irq_data *irq_data; + irq_hw_number_t hwirq; + int virq; + + domain = fwspec_to_domain(fwspec); if (!domain) { pr_warn("no irq domain found for %s !\n", of_node_full_name(to_of_node(fwspec->fwnode))); From 5324fe21ba9b77b299c02191645a97777cdd73ac Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:19 +0100 Subject: [PATCH 02/33] ACPI: irq: Add interrupt affinity reporting interface Plug the irq_populate_fwspec_info() helper into the ACPI layer to offer an interrupt affinity reporting function. This is currently only supported for the CONFIG_ACPI_GENERIC_GSI configurations, but could later be extended to legacy architectures if necessary. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Reviewed-by: Jonathan Cameron Acked-by: Rafael J. Wysocki (Intel) Link: https://patch.msgid.link/20251020122944.3074811-3-maz@kernel.org --- drivers/acpi/irq.c | 19 +++++++++++++++++++ include/linux/acpi.h | 7 +++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c index 76a856c32c4d..d1595156c86a 100644 --- a/drivers/acpi/irq.c +++ b/drivers/acpi/irq.c @@ -300,6 +300,25 @@ int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res) } EXPORT_SYMBOL_GPL(acpi_irq_get); +const struct cpumask *acpi_irq_get_affinity(acpi_handle handle, + unsigned int index) +{ + struct irq_fwspec_info info; + struct irq_fwspec fwspec; + unsigned long flags; + + if (acpi_irq_parse_one(handle, index, &fwspec, &flags)) + return NULL; + + if (irq_populate_fwspec_info(&fwspec, &info)) + return NULL; + + if (!(info.flags & IRQ_FWSPEC_INFO_AFFINITY_VALID)) + return NULL; + + return info.affinity; +} + /** * acpi_set_irq_model - Setup the GSI irqdomain information * @model: the value assigned to acpi_irq_model diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5ff5d99f6ead..607db773b672 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1509,12 +1509,19 @@ static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console) #if IS_ENABLED(CONFIG_ACPI_GENERIC_GSI) int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res); +const struct cpumask *acpi_irq_get_affinity(acpi_handle handle, + unsigned int index); #else static inline int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res) { return -EINVAL; } +static inline const struct cpumask *acpi_irq_get_affinity(acpi_handle handle, + unsigned int index) +{ + return NULL; +} #endif #ifdef CONFIG_ACPI_LPIT From 5404f5c06dd41fd4445a01dec77a629e254a62e8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:20 +0100 Subject: [PATCH 03/33] of/irq: Add interrupt affinity reporting interface Plug the irq_populate_fwspec_info() helper into the OF layer to offer an interrupt affinity reporting function. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20251020122944.3074811-4-maz@kernel.org --- drivers/of/irq.c | 20 ++++++++++++++++++++ include/linux/of_irq.h | 7 +++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 65c3c23255b7..168fde921bd2 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -479,6 +479,26 @@ int of_irq_get(struct device_node *dev, int index) } EXPORT_SYMBOL_GPL(of_irq_get); +const struct cpumask *of_irq_get_affinity(struct device_node *dev, int index) +{ + struct of_phandle_args oirq; + struct irq_fwspec_info info; + struct irq_fwspec fwspec; + int rc; + + rc = of_irq_parse_one(dev, index, &oirq); + if (rc) + return NULL; + + of_phandle_args_to_fwspec(oirq.np, oirq.args, oirq.args_count, + &fwspec); + + if (irq_populate_fwspec_info(&fwspec, &info)) + return NULL; + + return info.affinity; +} + /** * of_irq_get_byname - Decode a node's IRQ and return it as a Linux IRQ number * @dev: pointer to device tree node diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 1db8543dfc8a..1c2bc0281807 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -43,6 +43,8 @@ extern int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq); extern int of_irq_count(struct device_node *dev); extern int of_irq_get(struct device_node *dev, int index); +extern const struct cpumask *of_irq_get_affinity(struct device_node *dev, + int index); extern int of_irq_get_byname(struct device_node *dev, const char *name); extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); @@ -76,6 +78,11 @@ static inline int of_irq_get_byname(struct device_node *dev, const char *name) { return 0; } +static inline const struct cpumask *of_irq_get_affinity(struct device_node *dev, + int index) +{ + return NULL; +} static inline int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs) { From 0d5daa938c94b8b9183e9b257a88dc0929d59409 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:21 +0100 Subject: [PATCH 04/33] platform: Add firmware-agnostic irq and affinity retrieval interface Expand platform_get_irq_optional() to also return an affinity if available, renaming it to platform_get_irq_affinity() in the process. platform_get_irq_optional() is preserved with its current semantics by calling into the new helper with a NULL affinity pointer. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20251020122944.3074811-5-maz@kernel.org --- drivers/base/platform.c | 71 ++++++++++++++++++++++++++------- include/linux/platform_device.h | 2 + 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 09450349cf32..b45d41b018ca 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -150,25 +150,37 @@ devm_platform_ioremap_resource_byname(struct platform_device *pdev, EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource_byname); #endif /* CONFIG_HAS_IOMEM */ +static const struct cpumask *get_irq_affinity(struct platform_device *dev, + unsigned int num) +{ + const struct cpumask *mask = NULL; +#ifndef CONFIG_SPARC + struct fwnode_handle *fwnode = dev_fwnode(&dev->dev); + + if (is_of_node(fwnode)) + mask = of_irq_get_affinity(to_of_node(fwnode), num); + else if (is_acpi_device_node(fwnode)) + mask = acpi_irq_get_affinity(ACPI_HANDLE_FWNODE(fwnode), num); +#endif + + return mask ?: cpu_possible_mask; +} + /** - * platform_get_irq_optional - get an optional IRQ for a device - * @dev: platform device - * @num: IRQ number index + * platform_get_irq_affinity - get an optional IRQ and its affinity for a device + * @dev: platform device + * @num: interrupt number index + * @affinity: optional cpumask pointer to get the affinity of a per-cpu interrupt * - * Gets an IRQ for a platform device. Device drivers should check the return - * value for errors so as to not pass a negative integer value to the - * request_irq() APIs. This is the same as platform_get_irq(), except that it - * does not print an error message if an IRQ can not be obtained. + * Gets an interupt for a platform device. Device drivers should check the + * return value for errors so as to not pass a negative integer value to + * the request_irq() APIs. Optional affinity information is provided in the + * affinity pointer if available, and NULL otherwise. * - * For example:: - * - * int irq = platform_get_irq_optional(pdev, 0); - * if (irq < 0) - * return irq; - * - * Return: non-zero IRQ number on success, negative error number on failure. + * Return: non-zero interrupt number on success, negative error number on failure. */ -int platform_get_irq_optional(struct platform_device *dev, unsigned int num) +int platform_get_irq_affinity(struct platform_device *dev, unsigned int num, + const struct cpumask **affinity) { int ret; #ifdef CONFIG_SPARC @@ -236,8 +248,37 @@ int platform_get_irq_optional(struct platform_device *dev, unsigned int num) out: if (WARN(!ret, "0 is an invalid IRQ number\n")) return -EINVAL; + + if (ret > 0 && affinity) + *affinity = get_irq_affinity(dev, num); + return ret; } +EXPORT_SYMBOL_GPL(platform_get_irq_affinity); + +/** + * platform_get_irq_optional - get an optional interrupt for a device + * @dev: platform device + * @num: interrupt number index + * + * Gets an interrupt for a platform device. Device drivers should check the + * return value for errors so as to not pass a negative integer value to + * the request_irq() APIs. This is the same as platform_get_irq(), except + * that it does not print an error message if an interrupt can not be + * obtained. + * + * For example:: + * + * int irq = platform_get_irq_optional(pdev, 0); + * if (irq < 0) + * return irq; + * + * Return: non-zero interrupt number on success, negative error number on failure. + */ +int platform_get_irq_optional(struct platform_device *dev, unsigned int num) +{ + return platform_get_irq_affinity(dev, num, NULL); +} EXPORT_SYMBOL_GPL(platform_get_irq_optional); /** diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 074754c23d33..ad66333ce85c 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -102,6 +102,8 @@ devm_platform_ioremap_resource_byname(struct platform_device *pdev, extern int platform_get_irq(struct platform_device *, unsigned int); extern int platform_get_irq_optional(struct platform_device *, unsigned int); +extern int platform_get_irq_affinity(struct platform_device *, unsigned int, + const struct cpumask **); extern int platform_irq_count(struct platform_device *); extern int devm_platform_get_irqs_affinity(struct platform_device *dev, struct irq_affinity *affd, From 68905ea65ceff4f42ab14dbc6882de313127646f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:22 +0100 Subject: [PATCH 05/33] irqchip/gic-v3: Add FW info retrieval support Plug the new .get_fwspec_info() callback into the GICv3 core driver, using some of the existing PPI affinity handling infrastructure. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-6-maz@kernel.org --- drivers/irqchip/irq-gic-v3.c | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 3de351e66ee8..cf0ba83c6eda 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -69,6 +69,8 @@ struct gic_chip_data { bool has_rss; unsigned int ppi_nr; struct partition_desc **ppi_descs; + struct partition_affinity *parts; + unsigned int nr_parts; }; #define T241_CHIPS_MAX 4 @@ -1797,11 +1799,58 @@ static int gic_irq_domain_select(struct irq_domain *d, return d == partition_get_domain(gic_data.ppi_descs[ppi_idx]); } +static int gic_irq_get_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info) +{ + const struct cpumask *mask = NULL; + + info->flags = 0; + info->affinity = NULL; + + /* ACPI is not capable of describing PPI affinity -- yet */ + if (!is_of_node(fwspec->fwnode)) + return 0; + + /* If the specifier provides an affinity, use it */ + if (fwspec->param_count == 4 && fwspec->param[3]) { + struct fwnode_handle *fw; + + switch (fwspec->param[0]) { + case 1: /* PPI */ + case 3: /* EPPI */ + break; + default: + return 0; + } + + fw = of_fwnode_handle(of_find_node_by_phandle(fwspec->param[3])); + if (!fw) + return -ENOENT; + + for (int i = 0; i < gic_data.nr_parts; i++) { + if (gic_data.parts[i].partition_id == fw) { + mask = &gic_data.parts[i].mask; + break; + } + } + + if (!mask) + return -ENOENT; + } else { + mask = cpu_possible_mask; + } + + info->affinity = mask; + info->flags = IRQ_FWSPEC_INFO_AFFINITY_VALID; + + return 0; +} + static const struct irq_domain_ops gic_irq_domain_ops = { .translate = gic_irq_domain_translate, .alloc = gic_irq_domain_alloc, .free = gic_irq_domain_free, .select = gic_irq_domain_select, + .get_fwspec_info = gic_irq_get_fwspec_info, }; static int partition_domain_translate(struct irq_domain *d, @@ -1840,6 +1889,7 @@ static int partition_domain_translate(struct irq_domain *d, static const struct irq_domain_ops partition_domain_ops = { .translate = partition_domain_translate, .select = gic_irq_domain_select, + .get_fwspec_info = gic_irq_get_fwspec_info, }; static bool gic_enable_quirk_msm8996(void *data) @@ -2232,6 +2282,9 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) part_idx++; } + gic_data.parts = parts; + gic_data.nr_parts = nr_parts; + for (i = 0; i < gic_data.ppi_nr; i++) { unsigned int irq; struct partition_desc *desc; From de575de83c77b693667256774f4d19f33e73f6db Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:23 +0100 Subject: [PATCH 06/33] irqchip/apple-aic: Add FW info retrieval support Plug the new .get_fwspec_info() callback into the Apple AIC driver, using some of the existing FIQ affinity handling infrastructure. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Acked-by: Sven Peter Link: https://patch.msgid.link/20251020122944.3074811-7-maz@kernel.org --- drivers/irqchip/irq-apple-aic.c | 34 ++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 032d66dceb8e..0b72451725eb 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -651,6 +651,33 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq, return 0; } +static int aic_irq_get_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info) +{ + const struct cpumask *mask; + u32 intid; + + info->flags = 0; + info->affinity = NULL; + + if (fwspec->param[0] != AIC_FIQ) + return 0; + + if (fwspec->param_count == 3) + intid = fwspec->param[1]; + else + intid = fwspec->param[2]; + + if (aic_irqc->fiq_aff[intid]) + mask = &aic_irqc->fiq_aff[intid]->aff; + else + mask = cpu_possible_mask; + + info->affinity = mask; + info->flags = IRQ_FWSPEC_INFO_AFFINITY_VALID; + + return 0; +} + static int aic_irq_domain_translate(struct irq_domain *id, struct irq_fwspec *fwspec, unsigned long *hwirq, @@ -750,9 +777,10 @@ static void aic_irq_domain_free(struct irq_domain *domain, unsigned int virq, } static const struct irq_domain_ops aic_irq_domain_ops = { - .translate = aic_irq_domain_translate, - .alloc = aic_irq_domain_alloc, - .free = aic_irq_domain_free, + .translate = aic_irq_domain_translate, + .alloc = aic_irq_domain_alloc, + .free = aic_irq_domain_free, + .get_fwspec_info = aic_irq_get_fwspec_info, }; /* From 541454dd204b66c9f03761e00a28ad9702b24829 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:24 +0100 Subject: [PATCH 07/33] coresight: trbe: Convert to the new interrupt affinity retrieval API Now that the relevant interrupt controllers are equipped with a callback returning the affinity of per-CPU interrupts, switch the TRBE driver over to this new method. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Acked-by: Suzuki K Poulose Link: https://patch.msgid.link/20251020122944.3074811-8-maz@kernel.org --- drivers/hwtracing/coresight/coresight-trbe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 43643d2c5bdd..8f17160fec44 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -1474,9 +1474,10 @@ static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata) static int arm_trbe_probe_irq(struct platform_device *pdev, struct trbe_drvdata *drvdata) { + const struct cpumask *affinity; int ret; - drvdata->irq = platform_get_irq(pdev, 0); + drvdata->irq = platform_get_irq_affinity(pdev, 0, &affinity); if (drvdata->irq < 0) { pr_err("IRQ not found for the platform device\n"); return drvdata->irq; @@ -1487,8 +1488,7 @@ static int arm_trbe_probe_irq(struct platform_device *pdev, return -EINVAL; } - if (irq_get_percpu_devid_partition(drvdata->irq, &drvdata->supported_cpus)) - return -EINVAL; + cpumask_copy(&drvdata->supported_cpus, affinity); drvdata->handle = alloc_percpu(struct perf_output_handle *); if (!drvdata->handle) From 663783e0013e97e18cc167139ab4319bbeaea399 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:25 +0100 Subject: [PATCH 08/33] perf: arm_pmu: Convert to the new interrupt affinity retrieval API Now that the relevant interrupt controllers are equipped with a callback returning the affinity of per-CPU interrupts, switch the OF side of the ARM PMU driver over to this new method. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Reviewed-by: Jinjie Ruan Link: https://patch.msgid.link/20251020122944.3074811-9-maz@kernel.org --- drivers/perf/arm_pmu_platform.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 118170a5cede..9c0494d8a867 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -42,14 +42,13 @@ static int probe_current_pmu(struct arm_pmu *pmu, return ret; } -static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq) +static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq, + const struct cpumask *affinity) { - int cpu, ret; struct pmu_hw_events __percpu *hw_events = pmu->hw_events; + int cpu; - ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus); - if (ret) - return ret; + cpumask_copy(&pmu->supported_cpus, affinity); for_each_cpu(cpu, &pmu->supported_cpus) per_cpu(hw_events->irq, cpu) = irq; @@ -115,9 +114,12 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) } if (num_irqs == 1) { - int irq = platform_get_irq(pdev, 0); + const struct cpumask *affinity; + int irq; + + irq = platform_get_irq_affinity(pdev, 0, &affinity); if ((irq > 0) && irq_is_percpu_devid(irq)) - return pmu_parse_percpu_irq(pmu, irq); + return pmu_parse_percpu_irq(pmu, irq, affinity); } if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(dev->of_node)) From f6c8aced7c24e0c765d4283144d280c3f36e3906 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:26 +0100 Subject: [PATCH 09/33] perf: arm_spe_pmu: Convert to new interrupt affinity retrieval API Now that the relevant interrupt controllers are equipped with a callback returning the affinity of per-CPU interrupts, switch the ARM SPE driver over to this new method. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Reviewed-by: Jinjie Ruan Link: https://patch.msgid.link/20251020122944.3074811-10-maz@kernel.org --- drivers/perf/arm_spe_pmu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index fa50645fedda..1460b02d20e8 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -1287,8 +1287,10 @@ static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu) static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) { struct platform_device *pdev = spe_pmu->pdev; - int irq = platform_get_irq(pdev, 0); + const struct cpumask *affinity; + int irq; + irq = platform_get_irq_affinity(pdev, 0, &affinity); if (irq < 0) return -ENXIO; @@ -1297,10 +1299,7 @@ static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu) return -EINVAL; } - if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) { - dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq); - return -EINVAL; - } + cpumask_copy(&spe_pmu->supported_cpus, affinity); spe_pmu->irq = irq; return 0; From 21bbbc50f398f5d58a14669ee18b10a2bd30e916 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:27 +0100 Subject: [PATCH 10/33] irqchip/gic-v3: Switch high priority PPIs over to handle_percpu_devid_irq() It so appears that handle_percpu_devid_irq() is extremely similar to handle_percpu_devid_fasteoi_nmi(), and that the differences do no justify the horrid machinery in the GICv3 driver to handle the flow handler switch. Stick with the standard flow handler, even for NMIs. Suggested-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-11-maz@kernel.org --- drivers/irqchip/irq-gic-v3.c | 54 ++---------------------------------- 1 file changed, 2 insertions(+), 52 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index cf0ba83c6eda..dd2d6d722fc5 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -230,9 +230,6 @@ static void __init gic_prio_init(void) !cpus_have_group0); } -/* rdist_nmi_refs[n] == number of cpus having the rdist interrupt n set as NMI */ -static refcount_t *rdist_nmi_refs; - static struct gic_kvm_info gic_v3_kvm_info __initdata; static DEFINE_PER_CPU(bool, has_rss); @@ -608,24 +605,6 @@ static u32 __gic_get_ppi_index(irq_hw_number_t hwirq) } } -static u32 __gic_get_rdist_index(irq_hw_number_t hwirq) -{ - switch (__get_intid_range(hwirq)) { - case SGI_RANGE: - case PPI_RANGE: - return hwirq; - case EPPI_RANGE: - return hwirq - EPPI_BASE_INTID + 32; - default: - unreachable(); - } -} - -static u32 gic_get_rdist_index(struct irq_data *d) -{ - return __gic_get_rdist_index(d->hwirq); -} - static int gic_irq_nmi_setup(struct irq_data *d) { struct irq_desc *desc = irq_to_desc(d->irq); @@ -646,20 +625,8 @@ static int gic_irq_nmi_setup(struct irq_data *d) return -EINVAL; /* desc lock should already be held */ - if (gic_irq_in_rdist(d)) { - u32 idx = gic_get_rdist_index(d); - - /* - * Setting up a percpu interrupt as NMI, only switch handler - * for first NMI - */ - if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) { - refcount_set(&rdist_nmi_refs[idx], 1); - desc->handle_irq = handle_percpu_devid_fasteoi_nmi; - } - } else { + if (!gic_irq_in_rdist(d)) desc->handle_irq = handle_fasteoi_nmi; - } gic_irq_set_prio(d, dist_prio_nmi); @@ -686,15 +653,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d) return; /* desc lock should already be held */ - if (gic_irq_in_rdist(d)) { - u32 idx = gic_get_rdist_index(d); - - /* Tearing down NMI, only switch handler for last NMI */ - if (refcount_dec_and_test(&rdist_nmi_refs[idx])) - desc->handle_irq = handle_percpu_devid_irq; - } else { + if (!gic_irq_in_rdist(d)) desc->handle_irq = handle_fasteoi_irq; - } gic_irq_set_prio(d, dist_prio_irq); } @@ -2080,19 +2040,9 @@ static const struct gic_quirk gic_quirks[] = { static void gic_enable_nmi_support(void) { - int i; - if (!gic_prio_masking_enabled() || nmi_support_forbidden) return; - rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR, - sizeof(*rdist_nmi_refs), GFP_KERNEL); - if (!rdist_nmi_refs) - return; - - for (i = 0; i < gic_data.ppi_nr + SGI_NR; i++) - refcount_set(&rdist_nmi_refs[i], 0); - pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", gic_has_relaxed_pmr_sync() ? "relaxed" : "forced"); From 5ff78c8de9d83ad6fc0553bf8f2edc816385837d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:28 +0100 Subject: [PATCH 11/33] genirq: Kill handle_percpu_devid_fasteoi_nmi() There is no in-tree user of this flow handler anymore, so simply remove it. Suggested-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-12-maz@kernel.org --- include/linux/irq.h | 1 - kernel/irq/chip.c | 25 ------------------------- 2 files changed, 26 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index c67e76fbcc07..b728c18f6ded 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -655,7 +655,6 @@ extern void handle_bad_irq(struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); extern void handle_fasteoi_nmi(struct irq_desc *desc); -extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc); extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); extern int irq_chip_pm_get(struct irq_data *data); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3ffa0d80ddd1..633e1f67bb6f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -929,31 +929,6 @@ void handle_percpu_devid_irq(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -/** - * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu - * dev ids - * @desc: the interrupt description structure for this irq - * - * Similar to handle_fasteoi_nmi, but handling the dev_id cookie - * as a percpu pointer. - */ -void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) -{ - struct irq_chip *chip = irq_desc_get_chip(desc); - struct irqaction *action = desc->action; - unsigned int irq = irq_desc_get_irq(desc); - irqreturn_t res; - - __kstat_incr_irqs_this_cpu(desc); - - trace_irq_handler_entry(irq, action); - res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); - trace_irq_handler_exit(irq, action, res); - - if (chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); -} - static void __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained, const char *name) From 5c2b2cc472e015e79c4f0170893a1e0883bd3bb4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:29 +0100 Subject: [PATCH 12/33] genirq: Merge irqaction::{dev_id,percpu_dev_id} When irqaction::percpu_dev_id was introduced, it was hoped that it could be part of an anonymous union with dev_id, as the two fields are mutually exclusive. However, toolchains used at the time were often showing terrible support for anonymous unions, breaking the build on a number of architectures. It was therefore decided to keep the two fields separate and address this down the line. 14 years later, the compiler dark age is over, and there is universal support for anonymous unions. Get a whole pointer back that can immediately be spent on something else. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-13-maz@kernel.org --- include/linux/interrupt.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 51b6484c0493..0ec1a71ab4e8 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -121,8 +121,10 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); */ struct irqaction { irq_handler_t handler; - void *dev_id; - void __percpu *percpu_dev_id; + union { + void *dev_id; + void __percpu *percpu_dev_id; + }; struct irqaction *next; irq_handler_t thread_fn; struct task_struct *thread; From 9047a39daa7832e40a9ae2d190ae5e7b351a9121 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:30 +0100 Subject: [PATCH 13/33] genirq: Factor-in percpu irqaction creation Move the code creating a per-cpu irqaction into its own helper, so that future changes to this code can be kept localised. At the same time, fix the documentation which appears to say the wrong thing when it comes to interrupts being automatically enabled (percpu_devid interrupts never are). Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20251020122944.3074811-14-maz@kernel.org --- kernel/irq/manage.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c94837382037..d9ddc30678b5 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2442,6 +2442,24 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act) return retval; } +static +struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long flags, + const char *devname, void __percpu *dev_id) +{ + struct irqaction *action; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return NULL; + + action->handler = handler; + action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; + action->name = devname; + action->percpu_dev_id = dev_id; + + return action; +} + /** * __request_percpu_irq - allocate a percpu interrupt line * @irq: Interrupt line to allocate @@ -2450,9 +2468,9 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act) * @devname: An ascii name for the claiming device * @dev_id: A percpu cookie passed back to the handler function * - * This call allocates interrupt resources and enables the interrupt on the - * local CPU. If the interrupt is supposed to be enabled on other CPUs, it - * has to be done on each CPU using enable_percpu_irq(). + * This call allocates interrupt resources, but doesn't enable the interrupt + * on any CPU, as all percpu-devid interrupts are flagged with IRQ_NOAUTOEN. + * It has to be done on each CPU using enable_percpu_irq(). * * @dev_id must be globally unique. It is a per-cpu variable, and * the handler gets called with the interrupted CPU's instance of @@ -2477,15 +2495,10 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, if (flags && flags != IRQF_TIMER) return -EINVAL; - action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + action = create_percpu_irqaction(handler, flags, devname, dev_id); if (!action) return -ENOMEM; - action->handler = handler; - action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; - action->name = devname; - action->percpu_dev_id = dev_id; - retval = irq_chip_pm_get(&desc->irq_data); if (retval < 0) { kfree(action); @@ -2546,16 +2559,11 @@ int request_percpu_nmi(unsigned int irq, irq_handler_t handler, if (irq_is_nmi(desc)) return -EINVAL; - action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + action = create_percpu_irqaction(handler, IRQF_NO_THREAD | IRQF_NOBALANCING, + name, dev_id); if (!action) return -ENOMEM; - action->handler = handler; - action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD - | IRQF_NOBALANCING; - action->name = name; - action->percpu_dev_id = dev_id; - retval = irq_chip_pm_get(&desc->irq_data); if (retval < 0) goto err_out; From 258e7d28a3dcd389239f9688058140c1a418b549 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:31 +0100 Subject: [PATCH 14/33] genirq: Add affinity to percpu_devid interrupt requests Add an affinity field to both the irqaction structure and the interrupt request primitives. Nothing is making use of it yet, and the only value used it NULL, which is used as a shorthand for cpu_possible_mask. This will shortly get used with actual affinities. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-15-maz@kernel.org --- include/linux/interrupt.h | 5 +++-- kernel/irq/manage.c | 14 ++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0ec1a71ab4e8..52147d5f432b 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -125,6 +125,7 @@ struct irqaction { void *dev_id; void __percpu *percpu_dev_id; }; + const struct cpumask *affinity; struct irqaction *next; irq_handler_t thread_fn; struct task_struct *thread; @@ -181,7 +182,7 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler, extern int __must_check __request_percpu_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *devname, - void __percpu *percpu_dev_id); + const cpumask_t *affinity, void __percpu *percpu_dev_id); extern int __must_check request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags, @@ -192,7 +193,7 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id) { return __request_percpu_irq(irq, handler, 0, - devname, percpu_dev_id); + devname, NULL, percpu_dev_id); } extern int __must_check diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d9ddc30678b5..5f4c65167743 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2444,10 +2444,14 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act) static struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long flags, - const char *devname, void __percpu *dev_id) + const char *devname, const cpumask_t *affinity, + void __percpu *dev_id) { struct irqaction *action; + if (!affinity) + affinity = cpu_possible_mask; + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); if (!action) return NULL; @@ -2456,6 +2460,7 @@ struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long f action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; action->name = devname; action->percpu_dev_id = dev_id; + action->affinity = affinity; return action; } @@ -2466,6 +2471,7 @@ struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long f * @handler: Function to be called when the IRQ occurs. * @flags: Interrupt type flags (IRQF_TIMER only) * @devname: An ascii name for the claiming device + * @affinity: A cpumask describing the target CPUs for this interrupt * @dev_id: A percpu cookie passed back to the handler function * * This call allocates interrupt resources, but doesn't enable the interrupt @@ -2478,7 +2484,7 @@ struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long f */ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *devname, - void __percpu *dev_id) + const cpumask_t *affinity, void __percpu *dev_id) { struct irqaction *action; struct irq_desc *desc; @@ -2495,7 +2501,7 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, if (flags && flags != IRQF_TIMER) return -EINVAL; - action = create_percpu_irqaction(handler, flags, devname, dev_id); + action = create_percpu_irqaction(handler, flags, devname, affinity, dev_id); if (!action) return -ENOMEM; @@ -2560,7 +2566,7 @@ int request_percpu_nmi(unsigned int irq, irq_handler_t handler, return -EINVAL; action = create_percpu_irqaction(handler, IRQF_NO_THREAD | IRQF_NOBALANCING, - name, dev_id); + name, NULL, dev_id); if (!action) return -ENOMEM; From b9c6aa9efc71dae656f9f913d1250ea08cd6e10f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:32 +0100 Subject: [PATCH 15/33] genirq: Update request_percpu_nmi() to take an affinity Continue spreading the notion of affinity to the per CPU interrupt request code by updating the call sites that use request_percpu_nmi() (all two of them) to take an affinity pointer. This pointer is firmly NULL for now. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-16-maz@kernel.org --- arch/arm64/kernel/smp.c | 2 +- drivers/perf/arm_pmu.c | 2 +- include/linux/interrupt.h | 4 ++-- kernel/irq/manage.c | 12 +++++++----- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 68cea3a4a35c..6fb838eee2e7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1094,7 +1094,7 @@ static void ipi_setup_sgi(int ipi) irq = ipi_irq_base + ipi; if (ipi_should_be_nmi(ipi)) { - err = request_percpu_nmi(irq, ipi_handler, "IPI", &irq_stat); + err = request_percpu_nmi(irq, ipi_handler, "IPI", NULL, &irq_stat); WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err); } else { err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 5c310e803dd7..22c601b46c85 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -659,7 +659,7 @@ int armpmu_request_irq(int irq, int cpu) irq_ops = &pmunmi_ops; } } else if (armpmu_count_irq_users(irq) == 0) { - err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu); + err = request_percpu_nmi(irq, handler, "arm-pmu", NULL, &cpu_armpmu); /* If cannot get an NMI, get a normal interrupt */ if (err) { diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 52147d5f432b..81506ab759b8 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -197,8 +197,8 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler, } extern int __must_check -request_percpu_nmi(unsigned int irq, irq_handler_t handler, - const char *devname, void __percpu *dev); +request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *name, + const struct cpumask *affinity, void __percpu *dev_id); extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 5f4c65167743..b1a3140e5f3c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2527,6 +2527,7 @@ EXPORT_SYMBOL_GPL(__request_percpu_irq); * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs. * @name: An ascii name for the claiming device + * @affinity: A cpumask describing the target CPUs for this interrupt * @dev_id: A percpu cookie passed back to the handler function * * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs @@ -2543,8 +2544,8 @@ EXPORT_SYMBOL_GPL(__request_percpu_irq); * If the interrupt line cannot be used to deliver NMIs, function * will fail returning a negative value. */ -int request_percpu_nmi(unsigned int irq, irq_handler_t handler, - const char *name, void __percpu *dev_id) +int request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *name, + const struct cpumask *affinity, void __percpu *dev_id) { struct irqaction *action; struct irq_desc *desc; @@ -2561,12 +2562,13 @@ int request_percpu_nmi(unsigned int irq, irq_handler_t handler, !irq_supports_nmi(desc)) return -EINVAL; - /* The line cannot already be NMI */ - if (irq_is_nmi(desc)) + /* The line cannot be NMI already if the new request covers all CPUs */ + if (irq_is_nmi(desc) && + (!affinity || cpumask_equal(affinity, cpu_possible_mask))) return -EINVAL; action = create_percpu_irqaction(handler, IRQF_NO_THREAD | IRQF_NOBALANCING, - name, NULL, dev_id); + name, affinity, dev_id); if (!action) return -ENOMEM; From bdf4e2ac295fe77c94b570a1ad12c0882bc89b53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:33 +0100 Subject: [PATCH 16/33] genirq: Allow per-cpu interrupt sharing for non-overlapping affinities Interrupt sharing for percpu-devid interrupts is forbidden, and for good reasons. These are interrupts generated *from* a CPU and handled by itself (timer, for example). Nobody in their right mind would put two devices on the same pin (and if they have, they get to keep the pieces...). But this also prevents more benign cases, where devices are connected to groups of CPUs, and for which the affinities are not overlapping. Effectively, the only thing they share is the interrupt number, and nothing else. Tweak the definition of IRQF_SHARED applied to percpu_devid interrupts to allow this particular use case. This results in extra validation at the point of the interrupt being setup and freed, as well as a tiny bit of extra complexity for interrupts at handling time (to pick the correct irqaction). Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-17-maz@kernel.org --- kernel/irq/chip.c | 8 ++++-- kernel/irq/manage.c | 67 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 633e1f67bb6f..19e0a87a2663 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -897,8 +897,9 @@ void handle_percpu_irq(struct irq_desc *desc) void handle_percpu_devid_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - struct irqaction *action = desc->action; unsigned int irq = irq_desc_get_irq(desc); + unsigned int cpu = smp_processor_id(); + struct irqaction *action; irqreturn_t res; /* @@ -910,12 +911,15 @@ void handle_percpu_devid_irq(struct irq_desc *desc) if (chip->irq_ack) chip->irq_ack(&desc->irq_data); + for (action = desc->action; action; action = action->next) + if (cpumask_test_cpu(cpu, action->affinity)) + break; + if (likely(action)) { trace_irq_handler_entry(irq, action); res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); trace_irq_handler_exit(irq, action, res); } else { - unsigned int cpu = smp_processor_id(); bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); if (enabled) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index b1a3140e5f3c..7a09d96f4d2d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1418,6 +1418,19 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) return 0; } +static bool valid_percpu_irqaction(struct irqaction *old, struct irqaction *new) +{ + do { + if (cpumask_intersects(old->affinity, new->affinity) || + old->percpu_dev_id == new->percpu_dev_id) + return false; + + old = old->next; + } while (old); + + return true; +} + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -1438,6 +1451,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) struct irqaction *old, **old_ptr; unsigned long flags, thread_mask = 0; int ret, nested, shared = 0; + bool per_cpu_devid; if (!desc) return -EINVAL; @@ -1447,6 +1461,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!try_module_get(desc->owner)) return -ENODEV; + per_cpu_devid = irq_settings_is_per_cpu_devid(desc); + new->irq = irq; /* @@ -1554,13 +1570,20 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ unsigned int oldtype; - if (irq_is_nmi(desc)) { + if (irq_is_nmi(desc) && !per_cpu_devid) { pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", new->name, irq, desc->irq_data.chip->name); ret = -EINVAL; goto out_unlock; } + if (per_cpu_devid && !valid_percpu_irqaction(old, new)) { + pr_err("Overlapping affinities for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + /* * If nobody did set the configuration before, inherit * the one provided by the requester. @@ -1711,7 +1734,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!(new->flags & IRQF_NO_AUTOEN) && irq_settings_can_autoenable(desc)) { irq_startup(desc, IRQ_RESEND, IRQ_START_COND); - } else { + } else if (!per_cpu_devid) { /* * Shared interrupts do not go well with disabling * auto enable. The sharing interrupt might request @@ -2346,7 +2369,7 @@ void disable_percpu_nmi(unsigned int irq) static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id) { struct irq_desc *desc = irq_to_desc(irq); - struct irqaction *action; + struct irqaction *action, **action_ptr; WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); @@ -2354,21 +2377,33 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_ return NULL; scoped_guard(raw_spinlock_irqsave, &desc->lock) { - action = desc->action; - if (!action || action->percpu_dev_id != dev_id) { - WARN(1, "Trying to free already-free IRQ %d\n", irq); - return NULL; + action_ptr = &desc->action; + for (;;) { + action = *action_ptr; + + if (!action) { + WARN(1, "Trying to free already-free IRQ %d\n", irq); + return NULL; + } + + if (action->percpu_dev_id == dev_id) + break; + + action_ptr = &action->next; } - if (!cpumask_empty(desc->percpu_enabled)) { - WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", - irq, cpumask_first(desc->percpu_enabled)); + if (cpumask_intersects(desc->percpu_enabled, action->affinity)) { + WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", irq, + cpumask_first_and(desc->percpu_enabled, action->affinity)); return NULL; } /* Found it - now remove it from the list of entries: */ - desc->action = NULL; - desc->istate &= ~IRQS_NMI; + *action_ptr = action->next; + + /* Demote from NMI if we killed the last action */ + if (!desc->action) + desc->istate &= ~IRQS_NMI; } unregister_handler_proc(irq, action); @@ -2462,6 +2497,14 @@ struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long f action->percpu_dev_id = dev_id; action->affinity = affinity; + /* + * We allow some form of sharing for non-overlapping affinity + * masks. Obviously, covering all CPUs prevents any sharing in + * the first place. + */ + if (!cpumask_equal(affinity, cpu_possible_mask)) + action->flags |= IRQF_SHARED; + return action; } From c734af3b2b95f0ac6ed87c50e7602a6beeaf534f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:34 +0100 Subject: [PATCH 17/33] genirq: Add request_percpu_irq_affinity() helper While it would be nice to simply make request_percpu_irq() take an affinity mask, the churn is likely to be on the irritating side given that most drivers do not give a damn about affinities. So take the more innocuous path to provide a helper that parallels request_percpu_irq(), with an affinity as a bonus argument. Yes, request_percpu_irq_affinity() is a bit of a mouthful. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-18-maz@kernel.org --- include/linux/interrupt.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 81506ab759b8..fa62ab556ee3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -196,6 +196,15 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler, devname, NULL, percpu_dev_id); } +static inline int __must_check +request_percpu_irq_affinity(unsigned int irq, irq_handler_t handler, + const char *devname, const cpumask_t *affinity, + void __percpu *percpu_dev_id) +{ + return __request_percpu_irq(irq, handler, 0, + devname, affinity, percpu_dev_id); +} + extern int __must_check request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *name, const struct cpumask *affinity, void __percpu *dev_id); From 54b350fa8e965dc59622698e2a18d6bf73944bf4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 20 Oct 2025 13:29:35 +0100 Subject: [PATCH 18/33] perf: arm_pmu: Request specific affinities for per CPU NMIs/interrupts Let the PMU driver request both NMIs and normal interrupts with an affinity mask matching the PMU affinity. Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-19-maz@kernel.org --- drivers/perf/arm_pmu.c | 44 +++++++++++++++++++-------------- drivers/perf/arm_pmu_acpi.c | 2 +- drivers/perf/arm_pmu_platform.c | 4 +-- include/linux/perf/arm_pmu.h | 4 +-- 4 files changed, 31 insertions(+), 23 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 22c601b46c85..959ceb3d1f55 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -26,7 +26,8 @@ #include -static int armpmu_count_irq_users(const int irq); +static int armpmu_count_irq_users(const struct cpumask *affinity, + const int irq); struct pmu_irq_ops { void (*enable_pmuirq)(unsigned int irq); @@ -64,7 +65,9 @@ static void armpmu_enable_percpu_pmuirq(unsigned int irq) static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu, void __percpu *devid) { - if (armpmu_count_irq_users(irq) == 1) + struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu); + + if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1) free_percpu_irq(irq, devid); } @@ -89,7 +92,9 @@ static void armpmu_disable_percpu_pmunmi(unsigned int irq) static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu, void __percpu *devid) { - if (armpmu_count_irq_users(irq) == 1) + struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu); + + if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1) free_percpu_nmi(irq, devid); } @@ -580,11 +585,11 @@ static const struct attribute_group armpmu_common_attr_group = { .attrs = armpmu_common_attrs, }; -static int armpmu_count_irq_users(const int irq) +static int armpmu_count_irq_users(const struct cpumask *affinity, const int irq) { int cpu, count = 0; - for_each_possible_cpu(cpu) { + for_each_cpu(cpu, affinity) { if (per_cpu(cpu_irq, cpu) == irq) count++; } @@ -592,12 +597,13 @@ static int armpmu_count_irq_users(const int irq) return count; } -static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq) +static const struct pmu_irq_ops * +armpmu_find_irq_ops(const struct cpumask *affinity, int irq) { const struct pmu_irq_ops *ops = NULL; int cpu; - for_each_possible_cpu(cpu) { + for_each_cpu(cpu, affinity) { if (per_cpu(cpu_irq, cpu) != irq) continue; @@ -609,22 +615,25 @@ static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq) return ops; } -void armpmu_free_irq(int irq, int cpu) +void armpmu_free_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) return; if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) return; - per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu); + per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, armpmu); per_cpu(cpu_irq, cpu) = 0; per_cpu(cpu_irq_ops, cpu) = NULL; } -int armpmu_request_irq(int irq, int cpu) +int armpmu_request_irq(struct arm_pmu * __percpu *pcpu_armpmu, int irq, int cpu) { int err = 0; + struct arm_pmu **armpmu = per_cpu_ptr(pcpu_armpmu, cpu); + const struct cpumask *affinity = *armpmu ? &(*armpmu)->supported_cpus : + cpu_possible_mask; /* ACPI */ const irq_handler_t handler = armpmu_dispatch_irq; const struct pmu_irq_ops *irq_ops; @@ -646,25 +655,24 @@ int armpmu_request_irq(int irq, int cpu) IRQF_NOBALANCING | IRQF_NO_AUTOEN | IRQF_NO_THREAD; - err = request_nmi(irq, handler, irq_flags, "arm-pmu", - per_cpu_ptr(&cpu_armpmu, cpu)); + err = request_nmi(irq, handler, irq_flags, "arm-pmu", armpmu); /* If cannot get an NMI, get a normal interrupt */ if (err) { err = request_irq(irq, handler, irq_flags, "arm-pmu", - per_cpu_ptr(&cpu_armpmu, cpu)); + armpmu); irq_ops = &pmuirq_ops; } else { has_nmi = true; irq_ops = &pmunmi_ops; } - } else if (armpmu_count_irq_users(irq) == 0) { - err = request_percpu_nmi(irq, handler, "arm-pmu", NULL, &cpu_armpmu); + } else if (armpmu_count_irq_users(affinity, irq) == 0) { + err = request_percpu_nmi(irq, handler, "arm-pmu", affinity, pcpu_armpmu); /* If cannot get an NMI, get a normal interrupt */ if (err) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &cpu_armpmu); + err = request_percpu_irq_affinity(irq, handler, "arm-pmu", + affinity, pcpu_armpmu); irq_ops = &percpu_pmuirq_ops; } else { has_nmi = true; @@ -672,7 +680,7 @@ int armpmu_request_irq(int irq, int cpu) } } else { /* Per cpudevid irq was already requested by another CPU */ - irq_ops = armpmu_find_irq_ops(irq); + irq_ops = armpmu_find_irq_ops(affinity, irq); if (WARN_ON(!irq_ops)) err = -EINVAL; diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 05dda19c5359..e80f76d95e68 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -218,7 +218,7 @@ static int arm_pmu_acpi_parse_irqs(void) * them with their PMUs. */ per_cpu(pmu_irqs, cpu) = irq; - err = armpmu_request_irq(irq, cpu); + err = armpmu_request_irq(&probed_pmus, irq, cpu); if (err) goto out_err; } diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 9c0494d8a867..1c9e50a13201 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -165,7 +165,7 @@ static int armpmu_request_irqs(struct arm_pmu *armpmu) if (!irq) continue; - err = armpmu_request_irq(irq, cpu); + err = armpmu_request_irq(&hw_events->percpu_pmu, irq, cpu); if (err) break; } @@ -181,7 +181,7 @@ static void armpmu_free_irqs(struct arm_pmu *armpmu) for_each_cpu(cpu, &armpmu->supported_cpus) { int irq = per_cpu(hw_events->irq, cpu); - armpmu_free_irq(irq, cpu); + armpmu_free_irq(&hw_events->percpu_pmu, irq, cpu); } } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 93c9a26492fc..6690bd77aa4e 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -190,8 +190,8 @@ bool arm_pmu_irq_is_nmi(void); struct arm_pmu *armpmu_alloc(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); -int armpmu_request_irq(int irq, int cpu); -void armpmu_free_irq(int irq, int cpu); +int armpmu_request_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu); +void armpmu_free_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu); #define ARMV8_PMU_PDEV_NAME "armv8-pmu" From f8112d29ba992e51e4789844340f7da4bdca5fcb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:36 +0100 Subject: [PATCH 19/33] perf: arm_spe_pmu: Request specific affinities for per CPU interrupts Let the SPE driver request interrupts with an affinity mask matching the SPE implementation affinity. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-20-maz@kernel.org --- drivers/perf/arm_spe_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 1460b02d20e8..87908f0712c0 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -1259,8 +1259,8 @@ static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu) return -ENXIO; /* Request our PPIs (note that the IRQ is still disabled) */ - ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME, - spe_pmu->handle); + ret = request_percpu_irq_affinity(spe_pmu->irq, arm_spe_pmu_irq_handler, + DRVNAME, mask, spe_pmu->handle); if (ret) return ret; From 4cdf4813f528cdadfc3778fed6b7783cfe1eb75a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:37 +0100 Subject: [PATCH 20/33] coresight: trbe: Request specific affinities for per CPU interrupts Let the TRBE driver request interrupts with an affinity mask matching the TRBE implementation affinity. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Acked-by: Suzuki K Poulose Link: https://patch.msgid.link/20251020122944.3074811-21-maz@kernel.org --- drivers/hwtracing/coresight/coresight-trbe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 8f17160fec44..9f64f463339d 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -1494,7 +1494,8 @@ static int arm_trbe_probe_irq(struct platform_device *pdev, if (!drvdata->handle) return -ENOMEM; - ret = request_percpu_irq(drvdata->irq, arm_trbe_irq_handler, DRVNAME, drvdata->handle); + ret = request_percpu_irq_affinity(drvdata->irq, arm_trbe_irq_handler, DRVNAME, + affinity, drvdata->handle); if (ret) { free_percpu(drvdata->handle); return ret; From 64b9738eaa937232f2567fd55bbb4fc1a00242ea Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:38 +0100 Subject: [PATCH 21/33] irqchip/gic-v3: Drop support for custom PPI partitions The only thing getting in the way of correctly handling PPIs the way they were intended is the GICv3 hack that deals with PPI partitions. Remove that code, allowing the common code to kick in. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-22-maz@kernel.org --- drivers/irqchip/Kconfig | 1 - drivers/irqchip/irq-gic-v3.c | 133 +++-------------------------------- 2 files changed, 8 insertions(+), 126 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index a61c6dc63c29..648b3618fc78 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -36,7 +36,6 @@ config GIC_NON_BANKED config ARM_GIC_V3 bool select IRQ_DOMAIN_HIERARCHY - select PARTITION_PERCPU select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP select HAVE_ARM_SMCCC_DISCOVERY select IRQ_MSI_IOMMU diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index dd2d6d722fc5..6607ab58f72e 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -46,8 +45,6 @@ static u8 dist_prio_nmi __ro_after_init = GICV3_PRIO_NMI; #define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 2) #define FLAGS_WORKAROUND_INSECURE (1ULL << 3) -#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) - static struct cpumask broken_rdists __read_mostly __maybe_unused; struct redist_region { @@ -68,11 +65,15 @@ struct gic_chip_data { u64 flags; bool has_rss; unsigned int ppi_nr; - struct partition_desc **ppi_descs; struct partition_affinity *parts; unsigned int nr_parts; }; +struct partition_affinity { + cpumask_t mask; + struct fwnode_handle *partition_id; +}; + #define T241_CHIPS_MAX 4 static void __iomem *t241_dist_base_alias[T241_CHIPS_MAX] __read_mostly; static DEFINE_STATIC_KEY_FALSE(gic_nvidia_t241_erratum); @@ -593,18 +594,6 @@ static void gic_irq_set_prio(struct irq_data *d, u8 prio) writeb_relaxed(prio, base + offset + index); } -static u32 __gic_get_ppi_index(irq_hw_number_t hwirq) -{ - switch (__get_intid_range(hwirq)) { - case PPI_RANGE: - return hwirq - 16; - case EPPI_RANGE: - return hwirq - EPPI_BASE_INTID + 16; - default: - unreachable(); - } -} - static int gic_irq_nmi_setup(struct irq_data *d) { struct irq_desc *desc = irq_to_desc(d->irq); @@ -1628,13 +1617,6 @@ static int gic_irq_domain_translate(struct irq_domain *d, case GIC_IRQ_TYPE_LPI: /* LPI */ *hwirq = fwspec->param[1]; break; - case GIC_IRQ_TYPE_PARTITION: - *hwirq = fwspec->param[1]; - if (fwspec->param[1] >= 16) - *hwirq += EPPI_BASE_INTID - 16; - else - *hwirq += 16; - break; default: return -EINVAL; } @@ -1643,10 +1625,8 @@ static int gic_irq_domain_translate(struct irq_domain *d, /* * Make it clear that broken DTs are... broken. - * Partitioned PPIs are an unfortunate exception. */ - WARN_ON(*type == IRQ_TYPE_NONE && - fwspec->param[0] != GIC_IRQ_TYPE_PARTITION); + WARN_ON(*type == IRQ_TYPE_NONE); return 0; } @@ -1703,33 +1683,12 @@ static void gic_irq_domain_free(struct irq_domain *domain, unsigned int virq, } } -static bool fwspec_is_partitioned_ppi(struct irq_fwspec *fwspec, - irq_hw_number_t hwirq) -{ - enum gic_intid_range range; - - if (!gic_data.ppi_descs) - return false; - - if (!is_of_node(fwspec->fwnode)) - return false; - - if (fwspec->param_count < 4 || !fwspec->param[3]) - return false; - - range = __get_intid_range(hwirq); - if (range != PPI_RANGE && range != EPPI_RANGE) - return false; - - return true; -} - static int gic_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token) { - unsigned int type, ppi_idx; irq_hw_number_t hwirq; + unsigned int type; int ret; /* Not for us */ @@ -1748,15 +1707,7 @@ static int gic_irq_domain_select(struct irq_domain *d, if (WARN_ON_ONCE(ret)) return 0; - if (!fwspec_is_partitioned_ppi(fwspec, hwirq)) - return d == gic_data.domain; - - /* - * If this is a PPI and we have a 4th (non-null) parameter, - * then we need to match the partition domain. - */ - ppi_idx = __gic_get_ppi_index(hwirq); - return d == partition_get_domain(gic_data.ppi_descs[ppi_idx]); + return d == gic_data.domain; } static int gic_irq_get_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info) @@ -1813,45 +1764,6 @@ static const struct irq_domain_ops gic_irq_domain_ops = { .get_fwspec_info = gic_irq_get_fwspec_info, }; -static int partition_domain_translate(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *hwirq, - unsigned int *type) -{ - unsigned long ppi_intid; - struct device_node *np; - unsigned int ppi_idx; - int ret; - - if (!gic_data.ppi_descs) - return -ENOMEM; - - np = of_find_node_by_phandle(fwspec->param[3]); - if (WARN_ON(!np)) - return -EINVAL; - - ret = gic_irq_domain_translate(d, fwspec, &ppi_intid, type); - if (WARN_ON_ONCE(ret)) - return 0; - - ppi_idx = __gic_get_ppi_index(ppi_intid); - ret = partition_translate_id(gic_data.ppi_descs[ppi_idx], - of_fwnode_handle(np)); - if (ret < 0) - return ret; - - *hwirq = ret; - *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; - - return 0; -} - -static const struct irq_domain_ops partition_domain_ops = { - .translate = partition_domain_translate, - .select = gic_irq_domain_select, - .get_fwspec_info = gic_irq_get_fwspec_info, -}; - static bool gic_enable_quirk_msm8996(void *data) { struct gic_chip_data *d = data; @@ -2174,12 +2086,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) if (!parts_node) return; - gic_data.ppi_descs = kcalloc(gic_data.ppi_nr, sizeof(*gic_data.ppi_descs), GFP_KERNEL); - if (!gic_data.ppi_descs) - goto out_put_node; - nr_parts = of_get_child_count(parts_node); - if (!nr_parts) goto out_put_node; @@ -2235,30 +2142,6 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) gic_data.parts = parts; gic_data.nr_parts = nr_parts; - for (i = 0; i < gic_data.ppi_nr; i++) { - unsigned int irq; - struct partition_desc *desc; - struct irq_fwspec ppi_fwspec = { - .fwnode = gic_data.fwnode, - .param_count = 3, - .param = { - [0] = GIC_IRQ_TYPE_PARTITION, - [1] = i, - [2] = IRQ_TYPE_NONE, - }, - }; - - irq = irq_create_fwspec_mapping(&ppi_fwspec); - if (WARN_ON(!irq)) - continue; - desc = partition_create_desc(gic_data.fwnode, parts, nr_parts, - irq, &partition_domain_ops); - if (WARN_ON(!desc)) - continue; - - gic_data.ppi_descs[i] = desc; - } - out_put_node: of_node_put(parts_node); } From 7443813f107a344139b3c7d04161bd7bddea7eda Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:39 +0100 Subject: [PATCH 22/33] irqchip/apple-aic: Drop support for custom PMU irq partitions Similarly to what has been done for GICv3, drop the irq partitioning support from the AIC driver, effectively merging the two per-cpu interrupts for the PMU. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Reviewed-by: Sven Peter Link: https://patch.msgid.link/20251020122944.3074811-23-maz@kernel.org --- drivers/irqchip/irq-apple-aic.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 0b72451725eb..795b3db4554a 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -578,16 +578,9 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) } if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == - (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { - int irq; - if (cpumask_test_cpu(smp_processor_id(), - &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) - irq = AIC_CPU_PMU_P; - else - irq = AIC_CPU_PMU_E; + (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) generic_handle_domain_irq(aic_irqc->hw_domain, - AIC_FIQ_HWIRQ(irq)); - } + AIC_FIQ_HWIRQ(AIC_CPU_PMU_P)); if (static_branch_likely(&use_fast_ipi) && (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ) && @@ -632,18 +625,7 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq, handle_fasteoi_irq, NULL, NULL); irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); } else { - int fiq = FIELD_GET(AIC_EVENT_NUM, hw); - - switch (fiq) { - case AIC_CPU_PMU_P: - case AIC_CPU_PMU_E: - irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff); - break; - default: - irq_set_percpu_devid(irq); - break; - } - + irq_set_percpu_devid(irq); irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data, handle_percpu_devid_irq, NULL, NULL); } @@ -732,6 +714,10 @@ static int aic_irq_domain_translate(struct irq_domain *id, break; } } + + /* Merge the two PMUs on a single interrupt */ + if (*hwirq == AIC_CPU_PMU_E) + *hwirq = AIC_CPU_PMU_P; break; default: return -EINVAL; From c620438ef2ac80b09269a9ae3c0b4fe5add19bfe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:40 +0100 Subject: [PATCH 23/33] irqchip: Kill irq-partition-percpu This code is now completely unused, and nobody will ever miss it. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-24-maz@kernel.org --- drivers/irqchip/Kconfig | 3 - drivers/irqchip/Makefile | 1 - drivers/irqchip/irq-partition-percpu.c | 241 ------------------- include/linux/irqchip/irq-partition-percpu.h | 53 ---- 4 files changed, 298 deletions(-) delete mode 100644 drivers/irqchip/irq-partition-percpu.c delete mode 100644 include/linux/irqchip/irq-partition-percpu.h diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 648b3618fc78..5dddb4c9442a 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -450,9 +450,6 @@ config LS_SCFG_MSI depends on PCI_MSI select IRQ_MSI_LIB -config PARTITION_PERCPU - bool - config STM32MP_EXTI tristate "STM32MP extended interrupts and event controller" depends on (ARCH_STM32 && !ARM_SINGLE_ARMV7M) || COMPILE_TEST diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 3de083f5484c..6a229443efe0 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -36,7 +36,6 @@ obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-v3-mbi.o irq-gic-common.o obj-$(CONFIG_ARM_GIC_ITS_PARENT) += irq-gic-its-msi-parent.o obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v4.o obj-$(CONFIG_ARM_GIC_V3_ITS_FSL_MC) += irq-gic-v3-its-fsl-mc-msi.o -obj-$(CONFIG_PARTITION_PERCPU) += irq-partition-percpu.o obj-$(CONFIG_ARM_GIC_V5) += irq-gic-v5.o irq-gic-v5-irs.o irq-gic-v5-its.o \ irq-gic-v5-iwb.o obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c deleted file mode 100644 index 4441ffe149ea..000000000000 --- a/drivers/irqchip/irq-partition-percpu.c +++ /dev/null @@ -1,241 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2016 ARM Limited, All Rights Reserved. - * Author: Marc Zyngier - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -struct partition_desc { - int nr_parts; - struct partition_affinity *parts; - struct irq_domain *domain; - struct irq_desc *chained_desc; - unsigned long *bitmap; - struct irq_domain_ops ops; -}; - -static bool partition_check_cpu(struct partition_desc *part, - unsigned int cpu, unsigned int hwirq) -{ - return cpumask_test_cpu(cpu, &part->parts[hwirq].mask); -} - -static void partition_irq_mask(struct irq_data *d) -{ - struct partition_desc *part = irq_data_get_irq_chip_data(d); - struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); - struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - - if (partition_check_cpu(part, smp_processor_id(), d->hwirq) && - chip->irq_mask) - chip->irq_mask(data); -} - -static void partition_irq_unmask(struct irq_data *d) -{ - struct partition_desc *part = irq_data_get_irq_chip_data(d); - struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); - struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - - if (partition_check_cpu(part, smp_processor_id(), d->hwirq) && - chip->irq_unmask) - chip->irq_unmask(data); -} - -static int partition_irq_set_irqchip_state(struct irq_data *d, - enum irqchip_irq_state which, - bool val) -{ - struct partition_desc *part = irq_data_get_irq_chip_data(d); - struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); - struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - - if (partition_check_cpu(part, smp_processor_id(), d->hwirq) && - chip->irq_set_irqchip_state) - return chip->irq_set_irqchip_state(data, which, val); - - return -EINVAL; -} - -static int partition_irq_get_irqchip_state(struct irq_data *d, - enum irqchip_irq_state which, - bool *val) -{ - struct partition_desc *part = irq_data_get_irq_chip_data(d); - struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); - struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - - if (partition_check_cpu(part, smp_processor_id(), d->hwirq) && - chip->irq_get_irqchip_state) - return chip->irq_get_irqchip_state(data, which, val); - - return -EINVAL; -} - -static int partition_irq_set_type(struct irq_data *d, unsigned int type) -{ - struct partition_desc *part = irq_data_get_irq_chip_data(d); - struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); - struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - - if (chip->irq_set_type) - return chip->irq_set_type(data, type); - - return -EINVAL; -} - -static void partition_irq_print_chip(struct irq_data *d, struct seq_file *p) -{ - struct partition_desc *part = irq_data_get_irq_chip_data(d); - struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); - struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - - seq_printf(p, "%5s-%lu", chip->name, data->hwirq); -} - -static struct irq_chip partition_irq_chip = { - .irq_mask = partition_irq_mask, - .irq_unmask = partition_irq_unmask, - .irq_set_type = partition_irq_set_type, - .irq_get_irqchip_state = partition_irq_get_irqchip_state, - .irq_set_irqchip_state = partition_irq_set_irqchip_state, - .irq_print_chip = partition_irq_print_chip, -}; - -static void partition_handle_irq(struct irq_desc *desc) -{ - struct partition_desc *part = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_chip(desc); - int cpu = smp_processor_id(); - int hwirq; - - chained_irq_enter(chip, desc); - - for_each_set_bit(hwirq, part->bitmap, part->nr_parts) { - if (partition_check_cpu(part, cpu, hwirq)) - break; - } - - if (unlikely(hwirq == part->nr_parts)) - handle_bad_irq(desc); - else - generic_handle_domain_irq(part->domain, hwirq); - - chained_irq_exit(chip, desc); -} - -static int partition_domain_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs, void *arg) -{ - int ret; - irq_hw_number_t hwirq; - unsigned int type; - struct irq_fwspec *fwspec = arg; - struct partition_desc *part; - - BUG_ON(nr_irqs != 1); - ret = domain->ops->translate(domain, fwspec, &hwirq, &type); - if (ret) - return ret; - - part = domain->host_data; - - set_bit(hwirq, part->bitmap); - irq_set_chained_handler_and_data(irq_desc_get_irq(part->chained_desc), - partition_handle_irq, part); - irq_set_percpu_devid_partition(virq, &part->parts[hwirq].mask); - irq_domain_set_info(domain, virq, hwirq, &partition_irq_chip, part, - handle_percpu_devid_irq, NULL, NULL); - irq_set_status_flags(virq, IRQ_NOAUTOEN); - - return 0; -} - -static void partition_domain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs) -{ - struct irq_data *d; - - BUG_ON(nr_irqs != 1); - - d = irq_domain_get_irq_data(domain, virq); - irq_set_handler(virq, NULL); - irq_domain_reset_irq_data(d); -} - -int partition_translate_id(struct partition_desc *desc, void *partition_id) -{ - struct partition_affinity *part = NULL; - int i; - - for (i = 0; i < desc->nr_parts; i++) { - if (desc->parts[i].partition_id == partition_id) { - part = &desc->parts[i]; - break; - } - } - - if (WARN_ON(!part)) { - pr_err("Failed to find partition\n"); - return -EINVAL; - } - - return i; -} - -struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode, - struct partition_affinity *parts, - int nr_parts, - int chained_irq, - const struct irq_domain_ops *ops) -{ - struct partition_desc *desc; - struct irq_domain *d; - - BUG_ON(!ops->select || !ops->translate); - - desc = kzalloc(sizeof(*desc), GFP_KERNEL); - if (!desc) - return NULL; - - desc->ops = *ops; - desc->ops.free = partition_domain_free; - desc->ops.alloc = partition_domain_alloc; - - d = irq_domain_create_linear(fwnode, nr_parts, &desc->ops, desc); - if (!d) - goto out; - desc->domain = d; - - desc->bitmap = bitmap_zalloc(nr_parts, GFP_KERNEL); - if (WARN_ON(!desc->bitmap)) - goto out; - - desc->chained_desc = irq_to_desc(chained_irq); - desc->nr_parts = nr_parts; - desc->parts = parts; - - return desc; -out: - if (d) - irq_domain_remove(d); - kfree(desc); - - return NULL; -} - -struct irq_domain *partition_get_domain(struct partition_desc *dsc) -{ - if (dsc) - return dsc->domain; - - return NULL; -} diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h deleted file mode 100644 index b35ee22c278f..000000000000 --- a/include/linux/irqchip/irq-partition-percpu.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2016 ARM Limited, All Rights Reserved. - * Author: Marc Zyngier - */ - -#ifndef __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H -#define __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H - -#include -#include -#include - -struct partition_affinity { - cpumask_t mask; - void *partition_id; -}; - -struct partition_desc; - -#ifdef CONFIG_PARTITION_PERCPU -int partition_translate_id(struct partition_desc *desc, void *partition_id); -struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode, - struct partition_affinity *parts, - int nr_parts, - int chained_irq, - const struct irq_domain_ops *ops); -struct irq_domain *partition_get_domain(struct partition_desc *dsc); -#else -static inline int partition_translate_id(struct partition_desc *desc, - void *partition_id) -{ - return -EINVAL; -} - -static inline -struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode, - struct partition_affinity *parts, - int nr_parts, - int chained_irq, - const struct irq_domain_ops *ops) -{ - return NULL; -} - -static inline -struct irq_domain *partition_get_domain(struct partition_desc *dsc) -{ - return NULL; -} -#endif - -#endif /* __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H */ From ee2d50a9f524ae829d1a8ec296d7a0170e7b8ade Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:41 +0100 Subject: [PATCH 24/33] genirq: Kill irq_{g,s}et_percpu_devid_partition() These two helpers do not have any user anymore, and can be removed, together with the affinity field kept in the irqdesc structure. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-25-maz@kernel.org --- include/linux/irq.h | 4 ---- include/linux/irqdesc.h | 1 - kernel/irq/irqdesc.c | 24 +----------------------- 3 files changed, 1 insertion(+), 28 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index b728c18f6ded..4a9f1d7b08c3 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -718,10 +718,6 @@ static inline void irq_set_chip_and_handler(unsigned int irq, } extern int irq_set_percpu_devid(unsigned int irq); -extern int irq_set_percpu_devid_partition(unsigned int irq, - const struct cpumask *affinity); -extern int irq_get_percpu_devid_partition(unsigned int irq, - struct cpumask *affinity); extern void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index fd091c35d572..37e0b5b5600a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -82,7 +82,6 @@ struct irq_desc { int threads_handled_last; raw_spinlock_t lock; struct cpumask *percpu_enabled; - const struct cpumask *percpu_affinity; #ifdef CONFIG_SMP const struct cpumask *affinity_hint; struct irq_affinity_notify *affinity_notify; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index db714d3014b5..6acf268f005b 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -879,8 +879,7 @@ void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus) chip_bus_sync_unlock(desc); } -int irq_set_percpu_devid_partition(unsigned int irq, - const struct cpumask *affinity) +int irq_set_percpu_devid(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -892,31 +891,10 @@ int irq_set_percpu_devid_partition(unsigned int irq, if (!desc->percpu_enabled) return -ENOMEM; - desc->percpu_affinity = affinity ? : cpu_possible_mask; - irq_set_percpu_devid_flags(irq); return 0; } -int irq_set_percpu_devid(unsigned int irq) -{ - return irq_set_percpu_devid_partition(irq, NULL); -} - -int irq_get_percpu_devid_partition(unsigned int irq, struct cpumask *affinity) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (!desc || !desc->percpu_enabled) - return -EINVAL; - - if (affinity) - cpumask_copy(affinity, desc->percpu_affinity); - - return 0; -} -EXPORT_SYMBOL_GPL(irq_get_percpu_devid_partition); - void kstat_incr_irq_this_cpu(unsigned int irq) { kstat_incr_irqs_this_cpu(irq_to_desc(irq)); From ebac4649fcadc6047030810326875c6e612c7b2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:42 +0100 Subject: [PATCH 25/33] irqdomain: Kill of_node_to_fwnode() helper There is no in-tree users of this helper since b13b41cc3dc18 ("misc: ti_fpc202: Switch to of_fwnode_handle()"), and is replaced with of_fwnode_handle(). Get rid of it. Suggested-by: Jonathan Cameron Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-26-maz@kernel.org --- include/linux/irqdomain.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 9d6a5e99394f..5907baf6099d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -730,12 +730,6 @@ static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsig } #endif -/* Deprecated functions. Will be removed in the merge window */ -static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) -{ - return node ? &node->fwnode : NULL; -} - static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, const struct irq_domain_ops *ops, void *host_data) From fa9d2777387346645a40ab37cfb0c37b3ef40cc9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 20 Oct 2025 13:29:43 +0100 Subject: [PATCH 26/33] perf: arm_pmu: Kill last use of per-CPU cpu_armpmu pointer Having removed the use of the cpu_armpmu per-CPU variable from the interrupt handling, the only user left is the BRBE scheduler hook. It is easy to drop the use of this variable by following the pointer to the generic PMU structure, and get the arm_pmu structure from there. Perform the conversion and kill cpu_armpmu altogether. Suggested-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Will Deacon Link: https://patch.msgid.link/20251020122944.3074811-27-maz@kernel.org --- drivers/perf/arm_pmu.c | 5 ----- drivers/perf/arm_pmuv3.c | 2 +- include/linux/perf/arm_pmu.h | 2 -- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 959ceb3d1f55..f7abd1333963 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -104,7 +104,6 @@ static const struct pmu_irq_ops percpu_pmunmi_ops = { .free_pmuirq = armpmu_free_percpu_pmunmi }; -DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); @@ -725,8 +724,6 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) if (pmu->reset) pmu->reset(pmu); - per_cpu(cpu_armpmu, cpu) = pmu; - irq = armpmu_get_cpu_irq(pmu, cpu); if (irq) per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); @@ -746,8 +743,6 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) if (irq) per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); - per_cpu(cpu_armpmu, cpu) = NULL; - return 0; } diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 69c5cc8f5606..ca8d706d4022 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1064,7 +1064,7 @@ static int armv8pmu_user_event_idx(struct perf_event *event) static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, struct task_struct *task, bool sched_in) { - struct arm_pmu *armpmu = *this_cpu_ptr(&cpu_armpmu); + struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); if (!hw_events->branch_users) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 6690bd77aa4e..bab26a7d79f4 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -132,8 +132,6 @@ struct arm_pmu { #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) -DECLARE_PER_CPU(struct arm_pmu *, cpu_armpmu); - u64 armpmu_event_update(struct perf_event *event); int armpmu_event_set_period(struct perf_event *event); From 68c4c159a0db4409a5d6b5f4703d71b89a96f06a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Oct 2025 14:30:32 +0000 Subject: [PATCH 27/33] genirq: Fix percpu_devid irq affinity documentation Stephen points out that some of the percpu_devid irq affinity documentation is either missing or not matching the data structures. Address all the issues in one go. Fixes: 87b0031f7f73 ("irqdomain: Add firmware info reporting interface") Fixes: 258e7d28a3dc ("genirq: Add affinity to percpu_devid interrupt requests") Reported-by: Stephen Rothwell Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251030143032.2035987-1-maz@kernel.org --- include/linux/interrupt.h | 1 + include/linux/irqdomain.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index fa62ab556ee3..266f2b39213a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -109,6 +109,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); * @name: name of the device * @dev_id: cookie to identify the device * @percpu_dev_id: cookie to identify the device + * @affinity: CPUs this irqaction is allowed to run on * @next: pointer to the next irqaction for shared interrupts * @irq: interrupt number * @flags: flags (see IRQF_* above) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 5907baf6099d..952d3c8dd6b7 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -48,7 +48,7 @@ struct irq_fwspec { * struct irq_fwspec_info - firmware provided IRQ information structure * * @flags: Information validity flags - * @cpumask: Affinity mask for this interrupt + * @affinity: Affinity mask for this interrupt * * This structure reports firmware-specific information about an * interrupt. The only significant information is the affinity of a From 9ea2b810d51ae662cc5b5578f9395cb620a34a26 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Oct 2025 17:04:08 +0800 Subject: [PATCH 28/33] genirq/proc: Fix race in show_irq_affinity() Reading /proc/irq/N/smp_affinity* races with irq_set_affinity() and irq_move_masked_irq(), leading to old or torn output for users. After a user writes a new CPU mask to /proc/irq/N/affinity*, the syscall returns success, yet a subsequent read of the same file immediately returns a value different from what was just written. That's due to a race between show_irq_affinity() and irq_move_masked_irq() which lets the read observe a transient, inconsistent affinity mask. Cure it by guarding the read with irq_desc::lock. [ tglx: Massaged change log ] Signed-off-by: Muchun Song Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251028090408.76331-1-songmuchun@bytedance.com --- kernel/irq/proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 29c2404e743b..77258eafbf63 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -48,6 +48,8 @@ static int show_irq_affinity(int type, struct seq_file *m) struct irq_desc *desc = irq_to_desc((long)m->private); const struct cpumask *mask; + guard(raw_spinlock_irq)(&desc->lock); + switch (type) { case AFFINITY: case AFFINITY_LIST: From 51d0656959bcdb743232f9b530b4cca569e74e7f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 27 Oct 2025 13:59:31 +0100 Subject: [PATCH 29/33] genirq/manage: Reduce priority of forced secondary interrupt handler Crystal reports that the PCIe Advanced Error Reporting driver gets stuck in an infinite loop on PREEMPT_RT: Both the primary interrupt handler aer_irq() as well as the secondary handler aer_isr() are forced into threads with identical priority. Crystal writes that on the ARM system in question, the primary handler has to clear an error in the Root Error Status register... "before the next error happens, or else the hardware will set the Multiple ERR_COR Received bit. If that bit is set, then aer_isr() can't rely on the Error Source Identification register, so it scans through all devices looking for errors -- and for some reason, on this system, accessing the AER registers (or any Config Space above 0x400, even though there are capabilities located there) generates an Unsupported Request Error (but returns valid data). Since this happens more than once, without aer_irq() preempting, it causes another multi error and we get stuck in a loop." The issue does not show on non-PREEMPT_RT because the primary handler runs in hardirq context and thus can preempt the threaded secondary handler, clear the Root Error Status register and prevent the secondary handler from getting stuck. Emulate the same behavior on PREEMPT_RT by assigning a lower default priority to the secondary handler if the primary handler is forced into a thread. Reported-by: Crystal Wood Signed-off-by: Lukas Wunner Signed-off-by: Thomas Gleixner Tested-by: Crystal Wood Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/f6dcdb41be2694886b8dbf4fe7b3ab89e9d5114c.1761569303.git.lukas@wunner.de Closes: https://lore.kernel.org/r/20250902224441.368483-1-crwood@redhat.com/ --- include/linux/sched.h | 1 + kernel/irq/manage.c | 5 ++++- kernel/sched/syscalls.c | 13 +++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index cbb7340c5866..cd6be74d87b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1901,6 +1901,7 @@ extern int sched_setscheduler(struct task_struct *, int, const struct sched_para extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern void sched_set_fifo(struct task_struct *p); extern void sched_set_fifo_low(struct task_struct *p); +extern void sched_set_fifo_secondary(struct task_struct *p); extern void sched_set_normal(struct task_struct *p, int nice); extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7a09d96f4d2d..c812b6f48f2b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1239,7 +1239,10 @@ static int irq_thread(void *data) irq_thread_set_ready(desc, action); - sched_set_fifo(current); + if (action->handler == irq_forced_secondary_handler) + sched_set_fifo_secondary(current); + else + sched_set_fifo(current); if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, &action->thread_flags)) diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 77ae87f36e84..48347950ac48 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -856,6 +856,19 @@ void sched_set_fifo_low(struct task_struct *p) } EXPORT_SYMBOL_GPL(sched_set_fifo_low); +/* + * Used when the primary interrupt handler is forced into a thread, in addition + * to the (always threaded) secondary handler. The secondary handler gets a + * slightly lower priority so that the primary handler can preempt it, thereby + * emulating the behavior of a non-PREEMPT_RT system where the primary handler + * runs in hard interrupt context. + */ +void sched_set_fifo_secondary(struct task_struct *p) +{ + struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 - 1 }; + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); +} + void sched_set_normal(struct task_struct *p, int nice) { struct sched_attr attr = { From 9d3faec60b1303fbec53d7a9b48a8c0fc5ae029b Mon Sep 17 00:00:00 2001 From: Chengkaitao Date: Tue, 18 Nov 2025 09:27:54 +0800 Subject: [PATCH 30/33] genirq: Use raw_spinlock_irq() in irq_set_affinity_notifier() Since irq_set_affinity_notifier() may sleep, interrupts are enabled. So raw_spinlock_irqsave() can be replaced with raw_spinlock_irq(). Signed-off-by: Chengkaitao Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251118012754.61805-1-pilgrimtao@gmail.com --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c812b6f48f2b..c1ce30c9c3ab 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -547,7 +547,7 @@ int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *noti INIT_WORK(¬ify->work, irq_affinity_notify); } - scoped_guard(raw_spinlock_irqsave, &desc->lock) { + scoped_guard(raw_spinlock_irq, &desc->lock) { old_notify = desc->affinity_notify; desc->affinity_notify = notify; } From 68775ca79af3b8d4c147598983ece012d7007bac Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 21 Nov 2025 15:34:58 +0100 Subject: [PATCH 31/33] genirq: Prevent early spurious wake-ups of interrupt threads During initialization, the interrupt thread is created before the interrupt is enabled. The interrupt enablement happens before the actual kthread wake up point. Once the interrupt is enabled the hardware can raise an interrupt and once setup_irq() drops the descriptor lock a interrupt wake-up can happen. Even when such an interrupt can be considered premature, this is not a problem in general because at the point where the descriptor lock is dropped and the wakeup can happen, the data which is used by the thread is fully initialized. Though from the perspective of least surprise, the initial wakeup really should be performed by the setup code and not randomly by a premature interrupt. Prevent this by performing a wake-up only if the target is in state TASK_INTERRUPTIBLE, which the thread uses in wait_for_interrupt(). If the thread is still in state TASK_UNINTERRUPTIBLE, the wake-up is not lost because after the setup code completed the initial wake-up the thread will observe the IRQTF_RUNTHREAD and proceed with the handling. [ tglx: Simplified the changes and extended the changelog. ] Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://patch.msgid.link/20251121143500.42111-2-frederic@kernel.org --- kernel/irq/handle.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e103451243a0..786f5570a640 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -133,7 +133,15 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) */ atomic_inc(&desc->threads_active); - wake_up_process(action->thread); + /* + * This might be a premature wakeup before the thread reached the + * thread function and set the IRQTF_READY bit. It's waiting in + * kthread code with state UNINTERRUPTIBLE. Once it reaches the + * thread function it waits with INTERRUPTIBLE. The wakeup is not + * lost in that case because the thread is guaranteed to observe + * the RUN flag before it goes to sleep in wait_for_interrupt(). + */ + wake_up_state(action->thread, TASK_INTERRUPTIBLE); } static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled); From 801afdfbfcd90ff62a4b2469bbda1d958f7a5353 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 21 Nov 2025 15:34:59 +0100 Subject: [PATCH 32/33] genirq: Fix interrupt threads affinity vs. cpuset isolated partitions When a cpuset isolated partition is created / updated or destroyed, the interrupt threads are affined blindly to all the non-isolated CPUs. This happens without taking into account the interrupt threads initial affinity that becomes ignored. For example in a system with 8 CPUs, if an interrupt and its kthread are initially affine to CPU 5, creating an isolated partition with only CPU 2 inside will eventually end up affining the interrupt kthread to all CPUs but CPU 2 (that is CPUs 0,1,3-7), losing the kthread preference for CPU 5. Besides the blind re-affining, this doesn't take care of the actual low level interrupt which isn't migrated. As of today the only way to isolate non managed interrupts, along with their kthreads, is to overwrite their affinity separately, for example through /proc/irq/ To avoid doing that manually, future development should focus on updating the interrupt's affinity whenever cpuset isolated partitions are updated. In the meantime, cpuset shouldn't fiddle with interrupt threads directly. To prevent from that, set the PF_NO_SETAFFINITY flag to them. This is done through kthread_bind_mask() by affining them initially to all possible CPUs as at that point the interrupt is not started up which means the affinity of the hard interrupt is not known. The thread will adjust that once it reaches the handler, which is guaranteed to happen after the initial affinity of the hard interrupt is established. Suggested-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://patch.msgid.link/20251121143500.42111-3-frederic@kernel.org --- kernel/irq/manage.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c1ce30c9c3ab..61da1c68ff82 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1408,16 +1408,23 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) * references an already freed task_struct. */ new->thread = get_task_struct(t); + /* - * Tell the thread to set its affinity. This is - * important for shared interrupt handlers as we do - * not invoke setup_affinity() for the secondary - * handlers as everything is already set up. Even for - * interrupts marked with IRQF_NO_BALANCE this is - * correct as we want the thread to move to the cpu(s) - * on which the requesting code placed the interrupt. + * The affinity can not be established yet, but it will be once the + * interrupt is enabled. Delay and defer the actual setting to the + * thread itself once it is ready to run. In the meantime, prevent + * it from ever being re-affined directly by cpuset or + * housekeeping. The proper way to do it is to re-affine the whole + * vector. */ - set_bit(IRQTF_AFFINITY, &new->thread_flags); + kthread_bind_mask(t, cpu_possible_mask); + + /* + * Ensure the thread adjusts the affinity once it reaches the + * thread function. + */ + new->thread_flags = BIT(IRQTF_AFFINITY); + return 0; } From 3de5e46e50abc01a1cee7e12b657e083fc5ed638 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 21 Nov 2025 15:35:00 +0100 Subject: [PATCH 33/33] genirq: Remove cpumask availability check on kthread affinity setting Failing to allocate the affinity mask of an interrupt descriptor fails the whole descriptor initialization. It is then guaranteed that the cpumask is always available whenever the related interrupt objects are alive, such as the kthread handler. Therefore remove the superfluous check since it is merely a historical leftover. Get rid also of the comments above it that are obsolete and useless. Suggested-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://patch.msgid.link/20251121143500.42111-4-frederic@kernel.org --- kernel/irq/manage.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 61da1c68ff82..1615b6476210 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1001,7 +1001,6 @@ static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id) static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { cpumask_var_t mask; - bool valid = false; if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) return; @@ -1018,21 +1017,13 @@ static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *a } scoped_guard(raw_spinlock_irq, &desc->lock) { - /* - * This code is triggered unconditionally. Check the affinity - * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. - */ - if (cpumask_available(desc->irq_common_data.affinity)) { - const struct cpumask *m; + const struct cpumask *m; - m = irq_data_get_effective_affinity_mask(&desc->irq_data); - cpumask_copy(mask, m); - valid = true; - } + m = irq_data_get_effective_affinity_mask(&desc->irq_data); + cpumask_copy(mask, m); } - if (valid) - set_cpus_allowed_ptr(current, mask); + set_cpus_allowed_ptr(current, mask); free_cpumask_var(mask); } #else