From c3d78c34ad009a7cce57ae5b5c93e1bd03bb31a3 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 22 Sep 2025 11:30:10 +0800 Subject: [PATCH 01/94] perf: arm_pmuv3: Don't use PMCCNTR_EL0 on SMT cores CPU_CYCLES is expected to count the logical CPU (PE) clock. Currently it's preferred to use PMCCNTR_EL0 for counting CPU_CYCLES, but it'll count processor clock rather than the PE clock (ARM DDI0487 L.b D13.1.3) if one of the SMT siblings is not idle on a multi-threaded implementation. So don't use it on SMT cores. Introduce topology_core_has_smt() for knowing the SMT implementation and cached it in arm_pmu::has_smt during allocation. When counting cycles on SMT CPU 2-3 and CPU 3 is idle, without this patch we'll get: [root@client1 tmp]# perf stat -e cycles -A -C 2-3 -- stress-ng -c 1 --taskset 2 --timeout 1 [...] Performance counter stats for 'CPU(s) 2-3': CPU2 2880457316 cycles CPU3 2880459810 cycles 1.254688470 seconds time elapsed With this patch the idle state of CPU3 is observed as expected: [root@client1 ~]# perf stat -e cycles -A -C 2-3 -- stress-ng -c 1 --taskset 2 --timeout 1 [...] Performance counter stats for 'CPU(s) 2-3': CPU2 2558580492 cycles CPU3 305749 cycles 1.113626410 seconds time elapsed Signed-off-by: Yicong Yang Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 6 ++++++ drivers/perf/arm_pmuv3.c | 10 ++++++++++ include/linux/arch_topology.h | 11 +++++++++++ include/linux/perf/arm_pmu.h | 1 + 4 files changed, 28 insertions(+) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 5c310e803dd7..ae437791b5f8 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -925,6 +925,12 @@ int armpmu_register(struct arm_pmu *pmu) if (ret) return ret; + /* + * By this stage we know our supported CPUs on either DT/ACPI platforms, + * detect the SMT implementation. + */ + pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus)); + if (!pmu->set_event_filter) pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 69c5cc8f5606..d1d6000517b2 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -981,6 +981,7 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc, struct perf_event *event) { + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; @@ -1001,6 +1002,15 @@ static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc, if (has_branch_stack(event)) return false; + /* + * The PMCCNTR_EL0 increments from the processor clock rather than + * the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue + * counting on a WFI PE if one of its SMT sibling is not idle on a + * multi-threaded implementation. So don't use it on SMT cores. + */ + if (cpu_pmu->has_smt) + return false; + return true; } diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index d72d6e5aa200..daa1af2e8204 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -89,6 +89,17 @@ void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); void freq_inv_set_max_ratio(int cpu, u64 max_rate); + +/* + * Architectures like ARM64 don't have reliable architectural way to get SMT + * information and depend on the firmware (ACPI/OF) report. Non-SMT core won't + * initialize thread_id so we can use this to detect the SMT implementation. + */ +static inline bool topology_core_has_smt(int cpu) +{ + return cpu_topology[cpu].thread_id != -1; +} + #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 93c9a26492fc..2d39322c40c4 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -119,6 +119,7 @@ struct arm_pmu { /* PMUv3 only */ int pmuver; + bool has_smt; u64 reg_pmmir; u64 reg_brbidr; #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 From a2573bc7908da8e6eb63dc4e449b7c1724e3849b Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Tue, 30 Sep 2025 00:26:01 +0000 Subject: [PATCH 02/94] perf/arm_cspmu: Add callback to reset filter config Implementer may need to reset a filter config when stopping a counter, thus adding a callback for this. Reviewed-by: Ilkka Koskinen Reviewed-by: Suzuki K Poulose Signed-off-by: Besar Wicaksono Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 4 ++++ drivers/perf/arm_cspmu/arm_cspmu.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index efa9b229e701..82d7ed6202f1 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -815,6 +815,10 @@ static void arm_cspmu_stop(struct perf_event *event, int pmu_flags) return; arm_cspmu_disable_counter(cspmu, hwc->idx); + + if (cspmu->impl.ops.reset_ev_filter) + cspmu->impl.ops.reset_ev_filter(cspmu, event); + arm_cspmu_event_update(event); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 19684b76bd96..23bfc4a58064 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -152,11 +152,13 @@ struct arm_cspmu_impl_ops { bool (*is_cycle_counter_event)(const struct perf_event *event); /* Decode event type/id from configs */ u32 (*event_type)(const struct perf_event *event); - /* Set event filters */ + /* Set/reset event filters */ void (*set_cc_filter)(struct arm_cspmu *cspmu, const struct perf_event *event); void (*set_ev_filter)(struct arm_cspmu *cspmu, const struct perf_event *event); + void (*reset_ev_filter)(struct arm_cspmu *cspmu, + const struct perf_event *event); /* Implementation specific event validation */ int (*validate_event)(struct arm_cspmu *cspmu, struct perf_event *event); From 04330be8dc7fddf36f4adb1271932788ad47e7ad Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Tue, 30 Sep 2025 00:26:02 +0000 Subject: [PATCH 03/94] perf/arm_cspmu: Add pmpidr support The PMIIDR value is composed by the values in PMPIDR registers. We can use PMPIDR registers as alternative for device identification for systems that do not implement PMIIDR. Reviewed-by: Ilkka Koskinen Signed-off-by: Besar Wicaksono Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 44 +++++++++++++++++++++++++-- drivers/perf/arm_cspmu/arm_cspmu.h | 35 +++++++++++++++++++-- drivers/perf/arm_cspmu/nvidia_cspmu.c | 2 +- 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 82d7ed6202f1..33ad2cab5c16 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -322,14 +322,14 @@ static struct arm_cspmu_impl_match impl_match[] = { { .module_name = "nvidia_cspmu", .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA, - .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .pmiidr_mask = PMIIDR_IMPLEMENTER, .module = NULL, .impl_init_ops = NULL, }, { .module_name = "ampere_cspmu", .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE, - .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .pmiidr_mask = PMIIDR_IMPLEMENTER, .module = NULL, .impl_init_ops = NULL, }, @@ -351,6 +351,44 @@ static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr) return NULL; } +static u32 arm_cspmu_get_pmiidr(struct arm_cspmu *cspmu) +{ + u32 pmiidr, pmpidr; + + pmiidr = readl(cspmu->base0 + PMIIDR); + + if (pmiidr != 0) + return pmiidr; + + /* Construct PMIIDR value from PMPIDRs. */ + + pmpidr = readl(cspmu->base0 + PMPIDR0); + pmiidr |= FIELD_PREP(PMIIDR_PRODUCTID_PART_0, + FIELD_GET(PMPIDR0_PART_0, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR1); + pmiidr |= FIELD_PREP(PMIIDR_PRODUCTID_PART_1, + FIELD_GET(PMPIDR1_PART_1, pmpidr)); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_0, + FIELD_GET(PMPIDR1_DES_0, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR2); + pmiidr |= FIELD_PREP(PMIIDR_VARIANT, + FIELD_GET(PMPIDR2_REVISION, pmpidr)); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_1, + FIELD_GET(PMPIDR2_DES_1, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR3); + pmiidr |= FIELD_PREP(PMIIDR_REVISION, + FIELD_GET(PMPIDR3_REVAND, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR4); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_2, + FIELD_GET(PMPIDR4_DES_2, pmpidr)); + + return pmiidr; +} + #define DEFAULT_IMPL_OP(name) .name = arm_cspmu_##name static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) @@ -361,7 +399,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) /* Start with a default PMU implementation */ cspmu->impl.module = THIS_MODULE; - cspmu->impl.pmiidr = readl(cspmu->base0 + PMIIDR); + cspmu->impl.pmiidr = arm_cspmu_get_pmiidr(cspmu); cspmu->impl.ops = (struct arm_cspmu_impl_ops) { DEFAULT_IMPL_OP(get_event_attrs), DEFAULT_IMPL_OP(get_format_attrs), diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 23bfc4a58064..cd65a58dbd88 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -86,6 +86,11 @@ #define PMCFGR 0xE00 #define PMCR 0xE04 #define PMIIDR 0xE08 +#define PMPIDR0 0xFE0 +#define PMPIDR1 0xFE4 +#define PMPIDR2 0xFE8 +#define PMPIDR3 0xFEC +#define PMPIDR4 0xFD0 /* PMCFGR register field */ #define PMCFGR_NCG GENMASK(31, 28) @@ -115,8 +120,34 @@ #define PMCR_E BIT(0) /* PMIIDR register field */ -#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0) -#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20) +#define PMIIDR_IMPLEMENTER GENMASK(11, 0) +#define PMIIDR_IMPLEMENTER_DES_0 GENMASK(3, 0) +#define PMIIDR_IMPLEMENTER_DES_1 GENMASK(6, 4) +#define PMIIDR_IMPLEMENTER_DES_2 GENMASK(11, 8) +#define PMIIDR_REVISION GENMASK(15, 12) +#define PMIIDR_VARIANT GENMASK(19, 16) +#define PMIIDR_PRODUCTID GENMASK(31, 20) +#define PMIIDR_PRODUCTID_PART_0 GENMASK(27, 20) +#define PMIIDR_PRODUCTID_PART_1 GENMASK(31, 28) + +/* PMPIDR0 register field */ +#define PMPIDR0_PART_0 GENMASK(7, 0) + +/* PMPIDR1 register field */ +#define PMPIDR1_DES_0 GENMASK(7, 4) +#define PMPIDR1_PART_1 GENMASK(3, 0) + +/* PMPIDR2 register field */ +#define PMPIDR2_REVISION GENMASK(7, 4) +#define PMPIDR2_DES_1 GENMASK(2, 0) + +/* PMPIDR3 register field */ +#define PMPIDR3_REVAND GENMASK(7, 4) +#define PMPIDR3_CMOD GENMASK(3, 0) + +/* PMPIDR4 register field */ +#define PMPIDR4_SIZE GENMASK(7, 4) +#define PMPIDR4_DES_2 GENMASK(3, 0) /* JEDEC-assigned JEP106 identification code */ #define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index dc6d4e3e2a1b..b6cec351a142 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -322,7 +322,7 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) if (!ctx) return -ENOMEM; - prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr); + prodid = FIELD_GET(PMIIDR_PRODUCTID, cspmu->impl.pmiidr); /* Find matching PMU. */ for (; match->prodid; match++) { From 82dfd72bfb0362a3900179595032b65be11582da Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Tue, 30 Sep 2025 00:26:03 +0000 Subject: [PATCH 04/94] perf/arm_cspmu: nvidia: Add revision id matching Distinguish NVIDIA devices by revision and variant bits in PMIIDR register in addition to product id. Reviewed-by: Ilkka Koskinen Signed-off-by: Besar Wicaksono Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/nvidia_cspmu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index b6cec351a142..ac91dc46501d 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -23,7 +23,7 @@ #define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0) -#define NV_PRODID_MASK GENMASK(31, 0) +#define NV_PRODID_MASK (PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION) #define NV_FORMAT_NAME_GENERIC 0 @@ -220,7 +220,7 @@ struct nv_cspmu_match { static const struct nv_cspmu_match nv_cspmu_match[] = { { - .prodid = 0x103, + .prodid = 0x10300000, .prodid_mask = NV_PRODID_MASK, .filter_mask = NV_PCIE_FILTER_ID_MASK, .filter_default_val = NV_PCIE_FILTER_ID_MASK, @@ -230,7 +230,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { .format_attr = pcie_pmu_format_attrs }, { - .prodid = 0x104, + .prodid = 0x10400000, .prodid_mask = NV_PRODID_MASK, .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, @@ -240,7 +240,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { .format_attr = nvlink_c2c_pmu_format_attrs }, { - .prodid = 0x105, + .prodid = 0x10500000, .prodid_mask = NV_PRODID_MASK, .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, @@ -250,7 +250,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { .format_attr = nvlink_c2c_pmu_format_attrs }, { - .prodid = 0x106, + .prodid = 0x10600000, .prodid_mask = NV_PRODID_MASK, .filter_mask = NV_CNVL_FILTER_ID_MASK, .filter_default_val = NV_CNVL_FILTER_ID_MASK, @@ -260,7 +260,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { .format_attr = cnvlink_pmu_format_attrs }, { - .prodid = 0x2CF, + .prodid = 0x2CF00000, .prodid_mask = NV_PRODID_MASK, .filter_mask = 0x0, .filter_default_val = 0x0, @@ -312,7 +312,6 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu, static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) { - u32 prodid; struct nv_cspmu_ctx *ctx; struct device *dev = cspmu->dev; struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; @@ -322,13 +321,12 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) if (!ctx) return -ENOMEM; - prodid = FIELD_GET(PMIIDR_PRODUCTID, cspmu->impl.pmiidr); - /* Find matching PMU. */ for (; match->prodid; match++) { const u32 prodid_mask = match->prodid_mask; - if ((match->prodid & prodid_mask) == (prodid & prodid_mask)) + if ((match->prodid & prodid_mask) == + (cspmu->impl.pmiidr & prodid_mask)) break; } From decc3684c24112286c527188bb09dd6eaf720cc0 Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Tue, 30 Sep 2025 00:26:04 +0000 Subject: [PATCH 05/94] perf/arm_cspmu: nvidia: Add pmevfiltr2 support Support NVIDIA PMU that utilizes the optional event filter2 register. Reviewed-by: Ilkka Koskinen Signed-off-by: Besar Wicaksono Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/nvidia_cspmu.c | 176 +++++++++++++++++++------- 1 file changed, 133 insertions(+), 43 deletions(-) diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index ac91dc46501d..e06a06d3407b 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -40,10 +40,21 @@ struct nv_cspmu_ctx { const char *name; - u32 filter_mask; - u32 filter_default_val; + struct attribute **event_attr; struct attribute **format_attr; + + u32 filter_mask; + u32 filter_default_val; + u32 filter2_mask; + u32 filter2_default_val; + + u32 (*get_filter)(const struct perf_event *event); + u32 (*get_filter2)(const struct perf_event *event); + + void *data; + + int (*init_data)(struct arm_cspmu *cspmu); }; static struct attribute *scf_pmu_event_attrs[] = { @@ -144,6 +155,7 @@ static struct attribute *cnvlink_pmu_format_attrs[] = { static struct attribute *generic_pmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, ARM_CSPMU_FORMAT_FILTER_ATTR, + ARM_CSPMU_FORMAT_FILTER2_ATTR, NULL, }; @@ -184,13 +196,36 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event) return filter_val; } +static u32 nv_cspmu_event_filter2(const struct perf_event *event) +{ + const struct nv_cspmu_ctx *ctx = + to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); + + const u32 filter_val = event->attr.config2 & ctx->filter2_mask; + + if (filter_val == 0) + return ctx->filter2_default_val; + + return filter_val; +} + static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu, const struct perf_event *event) { - u32 filter = nv_cspmu_event_filter(event); - u32 offset = PMEVFILTR + (4 * event->hw.idx); + u32 filter, offset; + const struct nv_cspmu_ctx *ctx = + to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); + offset = 4 * event->hw.idx; - writel(filter, cspmu->base0 + offset); + if (ctx->get_filter) { + filter = ctx->get_filter(event); + writel(filter, cspmu->base0 + PMEVFILTR + offset); + } + + if (ctx->get_filter2) { + filter = ctx->get_filter2(event); + writel(filter, cspmu->base0 + PMEVFILT2R + offset); + } } static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu, @@ -210,74 +245,120 @@ enum nv_cspmu_name_fmt { struct nv_cspmu_match { u32 prodid; u32 prodid_mask; - u64 filter_mask; - u32 filter_default_val; const char *name_pattern; enum nv_cspmu_name_fmt name_fmt; - struct attribute **event_attr; - struct attribute **format_attr; + struct nv_cspmu_ctx template_ctx; + struct arm_cspmu_impl_ops ops; }; static const struct nv_cspmu_match nv_cspmu_match[] = { { .prodid = 0x10300000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_PCIE_FILTER_ID_MASK, - .filter_default_val = NV_PCIE_FILTER_ID_MASK, .name_pattern = "nvidia_pcie_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = pcie_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = pcie_pmu_format_attrs, + .filter_mask = NV_PCIE_FILTER_ID_MASK, + .filter_default_val = NV_PCIE_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { .prodid = 0x10400000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, - .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c1_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = nvlink_c2c_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = nvlink_c2c_pmu_format_attrs, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, + .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { .prodid = 0x10500000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, - .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c0_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = nvlink_c2c_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = nvlink_c2c_pmu_format_attrs, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, + .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { .prodid = 0x10600000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_CNVL_FILTER_ID_MASK, - .filter_default_val = NV_CNVL_FILTER_ID_MASK, .name_pattern = "nvidia_cnvlink_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = cnvlink_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = cnvlink_pmu_format_attrs, + .filter_mask = NV_CNVL_FILTER_ID_MASK, + .filter_default_val = NV_CNVL_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { .prodid = 0x2CF00000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, - .filter_default_val = 0x0, .name_pattern = "nvidia_scf_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = scf_pmu_event_attrs, - .format_attr = scf_pmu_format_attrs + .template_ctx = { + .event_attr = scf_pmu_event_attrs, + .format_attr = scf_pmu_format_attrs, + .filter_mask = 0x0, + .filter_default_val = 0x0, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { .prodid = 0, .prodid_mask = 0, - .filter_mask = NV_GENERIC_FILTER_ID_MASK, - .filter_default_val = NV_GENERIC_FILTER_ID_MASK, .name_pattern = "nvidia_uncore_pmu_%u", .name_fmt = NAME_FMT_GENERIC, - .event_attr = generic_pmu_event_attrs, - .format_attr = generic_pmu_format_attrs + .template_ctx = { + .event_attr = generic_pmu_event_attrs, + .format_attr = generic_pmu_format_attrs, + .filter_mask = NV_GENERIC_FILTER_ID_MASK, + .filter_default_val = NV_GENERIC_FILTER_ID_MASK, + .filter2_mask = NV_GENERIC_FILTER_ID_MASK, + .filter2_default_val = NV_GENERIC_FILTER_ID_MASK, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = nv_cspmu_event_filter2, + .data = NULL, + .init_data = NULL + }, }, }; @@ -310,6 +391,14 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu, return name; } +#define SET_OP(name, impl, match, default_op) \ + do { \ + if (match->ops.name) \ + impl->name = match->ops.name; \ + else if (default_op != NULL) \ + impl->name = default_op; \ + } while (false) + static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) { struct nv_cspmu_ctx *ctx; @@ -330,20 +419,21 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) break; } - ctx->name = nv_cspmu_format_name(cspmu, match); - ctx->filter_mask = match->filter_mask; - ctx->filter_default_val = match->filter_default_val; - ctx->event_attr = match->event_attr; - ctx->format_attr = match->format_attr; + /* Initialize the context with the matched template. */ + memcpy(ctx, &match->template_ctx, sizeof(struct nv_cspmu_ctx)); + ctx->name = nv_cspmu_format_name(cspmu, match); cspmu->impl.ctx = ctx; /* NVIDIA specific callbacks. */ - impl_ops->set_cc_filter = nv_cspmu_set_cc_filter; - impl_ops->set_ev_filter = nv_cspmu_set_ev_filter; - impl_ops->get_event_attrs = nv_cspmu_get_event_attrs; - impl_ops->get_format_attrs = nv_cspmu_get_format_attrs; - impl_ops->get_name = nv_cspmu_get_name; + SET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter); + SET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter); + SET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs); + SET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs); + SET_OP(get_name, impl_ops, match, nv_cspmu_get_name); + + if (ctx->init_data) + return ctx->init_data(cspmu); return 0; } From 8fa08f8835e57e1fd5e2994c9cf76c84dadc1235 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 20 Oct 2025 12:44:19 +0100 Subject: [PATCH 06/94] perf/arm-ni: Add NoC S3 support NoC S3 and its SI L1 sibling look largely similar to their predecessors, but add the notion of subfeatures to the discovery process, which we now use to find the event muxes for each device node. Plus, as ever, more mildly annoying shuffling around of some of the PMU registers (this time it's the counters...) Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-ni.c | 92 +++++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 21 deletions(-) diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c index 1615a0564031..aa824abc629e 100644 --- a/drivers/perf/arm-ni.c +++ b/drivers/perf/arm-ni.c @@ -21,6 +21,11 @@ #define NI_CHILD_NODE_INFO 0x004 #define NI_CHILD_PTR(n) (0x008 + (n) * 4) +#define NI_NUM_SUB_FEATURES 0x100 +#define NI_SUB_FEATURE_TYPE(n) (0x108 + (n) * 8) +#define NI_SUB_FEATURE_PTR(n) (0x10c + (n) * 8) + +#define NI_SUB_FEATURE_TYPE_FCU 0x2 #define NI700_PMUSELA 0x00c @@ -33,9 +38,10 @@ #define NI_PIDR2_VERSION GENMASK(7, 4) /* PMU node */ -#define NI_PMEVCNTR(n) (0x008 + (n) * 8) -#define NI_PMCCNTR_L 0x0f8 -#define NI_PMCCNTR_U 0x0fc +#define NI700_PMEVCNTR(n) (0x008 + (n) * 8) +#define NI700_PMCCNTR_L 0x0f8 +#define NI_PMEVCNTR(n) (0x200 + (n) * 8) +#define NI_PMCCNTR_L 0x2f8 #define NI_PMEVTYPER(n) (0x400 + (n) * 4) #define NI_PMEVTYPER_NODE_TYPE GENMASK(12, 9) #define NI_PMEVTYPER_NODE_ID GENMASK(8, 0) @@ -66,6 +72,8 @@ enum ni_part { PART_NI_700 = 0x43b, PART_NI_710AE = 0x43d, + PART_NOC_S3 = 0x43f, + PART_SI_L1 = 0x455, }; enum ni_node_type { @@ -79,6 +87,10 @@ enum ni_node_type { NI_HSNI, NI_HMNI, NI_PMNI, + NI_TSNI, + NI_TMNI, + NI_CMNI = 0x0e, + NI_MCN = 0x63, }; struct arm_ni_node { @@ -179,6 +191,9 @@ static struct attribute *arm_ni_event_attrs[] = { NI_EVENT_ATTR(hsni, NI_HSNI), NI_EVENT_ATTR(hmni, NI_HMNI), NI_EVENT_ATTR(pmni, NI_PMNI), + NI_EVENT_ATTR(tsni, NI_TSNI), + NI_EVENT_ATTR(tmni, NI_TMNI), + NI_EVENT_ATTR(cmni, NI_CMNI), NULL }; @@ -332,16 +347,16 @@ static int arm_ni_event_init(struct perf_event *event) return -EINVAL; } -static u64 arm_ni_read_ccnt(struct arm_ni_cd *cd) +static u64 arm_ni_read_ccnt(void __iomem *pmccntr) { u64 l, u_old, u_new; int retries = 3; /* 1st time unlucky, 2nd improbable, 3rd just broken */ - u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U); + u_new = readl_relaxed(pmccntr + 4); do { u_old = u_new; - l = readl_relaxed(cd->pmu_base + NI_PMCCNTR_L); - u_new = readl_relaxed(cd->pmu_base + NI_PMCCNTR_U); + l = readl_relaxed(pmccntr); + u_new = readl_relaxed(pmccntr + 4); } while (u_new != u_old && --retries); WARN_ON(!retries); @@ -350,7 +365,6 @@ static u64 arm_ni_read_ccnt(struct arm_ni_cd *cd) static void arm_ni_event_read(struct perf_event *event) { - struct arm_ni_cd *cd = pmu_to_cd(event->pmu); struct hw_perf_event *hw = &event->hw; u64 count, prev; bool ccnt = hw->idx == NI_CCNT_IDX; @@ -358,9 +372,9 @@ static void arm_ni_event_read(struct perf_event *event) do { prev = local64_read(&hw->prev_count); if (ccnt) - count = arm_ni_read_ccnt(cd); + count = arm_ni_read_ccnt((void __iomem *)event->hw.event_base); else - count = readl_relaxed(cd->pmu_base + NI_PMEVCNTR(hw->idx)); + count = readl_relaxed((void __iomem *)event->hw.event_base); } while (local64_cmpxchg(&hw->prev_count, prev, count) != prev); count -= prev; @@ -385,16 +399,21 @@ static void arm_ni_event_stop(struct perf_event *event, int flags) arm_ni_event_read(event); } -static void arm_ni_init_ccnt(struct arm_ni_cd *cd) +static void arm_ni_init_ccnt(struct hw_perf_event *hw) { - local64_set(&cd->ccnt->hw.prev_count, S64_MIN); - lo_hi_writeq_relaxed(S64_MIN, cd->pmu_base + NI_PMCCNTR_L); + local64_set(&hw->prev_count, S64_MIN); + lo_hi_writeq_relaxed(S64_MIN, (void __iomem *)hw->event_base); } -static void arm_ni_init_evcnt(struct arm_ni_cd *cd, int idx) +static void arm_ni_init_evcnt(struct hw_perf_event *hw) { - local64_set(&cd->evcnt[idx]->hw.prev_count, S32_MIN); - writel_relaxed(S32_MIN, cd->pmu_base + NI_PMEVCNTR(idx)); + local64_set(&hw->prev_count, S32_MIN); + writel_relaxed(S32_MIN, (void __iomem *)hw->event_base); +} + +static bool arm_ni_is_7xx(const struct arm_ni *ni) +{ + return ni->part == PART_NI_700 || ni->part == PART_NI_710AE; } static int arm_ni_event_add(struct perf_event *event, int flags) @@ -403,14 +422,17 @@ static int arm_ni_event_add(struct perf_event *event, int flags) struct hw_perf_event *hw = &event->hw; struct arm_ni_unit *unit; enum ni_node_type type = NI_EVENT_TYPE(event); + bool is_7xx = arm_ni_is_7xx(cd_to_ni(cd)); u32 reg; if (type == NI_PMU) { if (cd->ccnt) return -ENOSPC; hw->idx = NI_CCNT_IDX; + hw->event_base = (unsigned long)cd->pmu_base + + is_7xx ? NI700_PMCCNTR_L : NI_PMCCNTR_L; cd->ccnt = event; - arm_ni_init_ccnt(cd); + arm_ni_init_ccnt(hw); } else { hw->idx = 0; while (cd->evcnt[hw->idx]) { @@ -420,7 +442,9 @@ static int arm_ni_event_add(struct perf_event *event, int flags) cd->evcnt[hw->idx] = event; unit = (void *)hw->config_base; unit->event[hw->idx] = NI_EVENT_EVENTID(event); - arm_ni_init_evcnt(cd, hw->idx); + hw->event_base = (unsigned long)cd->pmu_base + + is_7xx ? NI700_PMEVCNTR(hw->idx) : NI_PMEVCNTR(hw->idx); + arm_ni_init_evcnt(hw); lo_hi_writeq_relaxed(le64_to_cpu(unit->pmusel), unit->pmusela); reg = FIELD_PREP(NI_PMEVTYPER_NODE_TYPE, type) | @@ -457,7 +481,7 @@ static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id) ret = IRQ_HANDLED; if (!(WARN_ON(!cd->ccnt))) { arm_ni_event_read(cd->ccnt); - arm_ni_init_ccnt(cd); + arm_ni_init_ccnt(&cd->ccnt->hw); } } for (int i = 0; i < NI_NUM_COUNTERS; i++) { @@ -466,7 +490,7 @@ static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id) ret = IRQ_HANDLED; if (!(WARN_ON(!cd->evcnt[i]))) { arm_ni_event_read(cd->evcnt[i]); - arm_ni_init_evcnt(cd, i); + arm_ni_init_evcnt(&cd->evcnt[i]->hw); } } writel_relaxed(reg, cd->pmu_base + NI_PMOVSCLR); @@ -476,6 +500,25 @@ static irqreturn_t arm_ni_handle_irq(int irq, void *dev_id) } } +static void __iomem *arm_ni_get_pmusel(struct arm_ni *ni, void __iomem *unit_base) +{ + u32 type, ptr, num; + + if (arm_ni_is_7xx(ni)) + return unit_base + NI700_PMUSELA; + + num = readl_relaxed(unit_base + NI_NUM_SUB_FEATURES); + for (int i = 0; i < num; i++) { + type = readl_relaxed(unit_base + NI_SUB_FEATURE_TYPE(i)); + if (type != NI_SUB_FEATURE_TYPE_FCU) + continue; + ptr = readl_relaxed(unit_base + NI_SUB_FEATURE_PTR(i)); + return ni->base + ptr; + } + /* Should be impossible */ + return NULL; +} + static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_start) { struct arm_ni_cd *cd = ni->cds + node->id; @@ -512,13 +555,18 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s case NI_HSNI: case NI_HMNI: case NI_PMNI: - unit->pmusela = unit_base + NI700_PMUSELA; + case NI_TSNI: + case NI_TMNI: + case NI_CMNI: + unit->pmusela = arm_ni_get_pmusel(ni, unit_base); writel_relaxed(1, unit->pmusela); if (readl_relaxed(unit->pmusela) != 1) dev_info(ni->dev, "No access to node 0x%04x%04x\n", unit->id, unit->type); else unit->ns = true; break; + case NI_MCN: + break; default: /* * e.g. FMU - thankfully bits 3:2 of FMU_ERR_FR0 are RES0 so @@ -649,6 +697,8 @@ static int arm_ni_probe(struct platform_device *pdev) switch (part) { case PART_NI_700: case PART_NI_710AE: + case PART_NOC_S3: + case PART_SI_L1: break; default: dev_WARN(&pdev->dev, "Unknown part number: 0x%03x, this may go badly\n", part); From 970e1e41805f0bd49dc234330a9390f4708d097d Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 22 Oct 2025 19:53:25 +0800 Subject: [PATCH 07/94] perf: arm_cspmu: fix error handling in arm_cspmu_impl_unregister() driver_find_device() calls get_device() to increment the reference count once a matching device is found. device_release_driver() releases the driver, but it does not decrease the reference count that was incremented by driver_find_device(). At the end of the loop, there is no put_device() to balance the reference count. To avoid reference count leakage, add put_device() to decrease the reference count. Found by code review. Cc: stable@vger.kernel.org Fixes: bfc653aa89cb ("perf: arm_cspmu: Separate Arm and vendor module") Signed-off-by: Ma Ke Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 33ad2cab5c16..34430b68f602 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1407,8 +1407,10 @@ void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match) /* Unbind the driver from all matching backend devices. */ while ((dev = driver_find_device(&arm_cspmu_driver.driver, NULL, - match, arm_cspmu_match_device))) + match, arm_cspmu_match_device))) { device_release_driver(dev); + put_device(dev); + } mutex_lock(&arm_cspmu_lock); From 989b40b7578a2be7b0388522d33d751b257d59d9 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 29 Oct 2025 16:34:24 -0500 Subject: [PATCH 08/94] perf: arm_pmuv3: Add new Cortex and C1 CPU PMUs Add CPU PMU compatible strings for Cortex-A320, Cortex-A520AE, Cortex-A720AE, and C1 Nano/Premium/Pro/Ultra. Signed-off-by: Rob Herring (Arm) Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index d1d6000517b2..dc5d8626dc98 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1475,6 +1475,10 @@ static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ PMUV3_INIT_SIMPLE(armv8_pmuv3) +PMUV3_INIT_SIMPLE(armv8_c1_nano) +PMUV3_INIT_SIMPLE(armv8_c1_premium) +PMUV3_INIT_SIMPLE(armv8_c1_pro) +PMUV3_INIT_SIMPLE(armv8_c1_ultra) PMUV3_INIT_SIMPLE(armv8_cortex_a34) PMUV3_INIT_SIMPLE(armv8_cortex_a55) PMUV3_INIT_SIMPLE(armv8_cortex_a65) @@ -1482,11 +1486,14 @@ PMUV3_INIT_SIMPLE(armv8_cortex_a75) PMUV3_INIT_SIMPLE(armv8_cortex_a76) PMUV3_INIT_SIMPLE(armv8_cortex_a77) PMUV3_INIT_SIMPLE(armv8_cortex_a78) +PMUV3_INIT_SIMPLE(armv9_cortex_a320) PMUV3_INIT_SIMPLE(armv9_cortex_a510) PMUV3_INIT_SIMPLE(armv9_cortex_a520) +PMUV3_INIT_SIMPLE(armv9_cortex_a520ae) PMUV3_INIT_SIMPLE(armv9_cortex_a710) PMUV3_INIT_SIMPLE(armv9_cortex_a715) PMUV3_INIT_SIMPLE(armv9_cortex_a720) +PMUV3_INIT_SIMPLE(armv9_cortex_a720ae) PMUV3_INIT_SIMPLE(armv9_cortex_a725) PMUV3_INIT_SIMPLE(armv8_cortex_x1) PMUV3_INIT_SIMPLE(armv9_cortex_x2) @@ -1518,6 +1525,10 @@ PMUV3_INIT_MAP_EVENT(armv8_brcm_vulcan, armv8_vulcan_map_event) static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init}, + {.compatible = "arm,c1-nano-pmu", .data = armv8_c1_nano_pmu_init}, + {.compatible = "arm,c1-premium-pmu", .data = armv8_c1_premium_pmu_init}, + {.compatible = "arm,c1-pro-pmu", .data = armv8_c1_pro_pmu_init}, + {.compatible = "arm,c1-ultra-pmu", .data = armv8_c1_ultra_pmu_init}, {.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init}, {.compatible = "arm,cortex-a35-pmu", .data = armv8_cortex_a35_pmu_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_cortex_a53_pmu_init}, @@ -1530,11 +1541,14 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init}, {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init}, {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init}, + {.compatible = "arm,cortex-a320-pmu", .data = armv9_cortex_a320_pmu_init}, {.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init}, {.compatible = "arm,cortex-a520-pmu", .data = armv9_cortex_a520_pmu_init}, + {.compatible = "arm,cortex-a520ae-pmu", .data = armv9_cortex_a520ae_pmu_init}, {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, {.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init}, {.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init}, + {.compatible = "arm,cortex-a720ae-pmu", .data = armv9_cortex_a720ae_pmu_init}, {.compatible = "arm,cortex-a725-pmu", .data = armv9_cortex_a725_pmu_init}, {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, From 2d7a824807015209de4dd58c7ac240c5a276753e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 4 Nov 2025 18:29:40 +0000 Subject: [PATCH 09/94] perf/arm-ni: Fix and optimise register offset calculation LKP points out an operator precedence oversight in the new NoC S3 support that, annoyingly, my local W=1 build didn't flag. In fixing that, we can also take the similarly-missed opportunity to cache the version check itself at event_init time. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202511041749.ok8zDP6u-lkp@intel.com/ Fixes: 8fa08f8835e5 ("perf/arm-ni: Add NoC S3 support") Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-ni.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c index aa824abc629e..66858c65215d 100644 --- a/drivers/perf/arm-ni.c +++ b/drivers/perf/arm-ni.c @@ -323,9 +323,15 @@ static int arm_ni_validate_group(struct perf_event *event) return 0; } +static bool arm_ni_is_7xx(const struct arm_ni *ni) +{ + return ni->part == PART_NI_700 || ni->part == PART_NI_710AE; +} + static int arm_ni_event_init(struct perf_event *event) { struct arm_ni_cd *cd = pmu_to_cd(event->pmu); + struct arm_ni *ni; if (event->attr.type != event->pmu->type) return -ENOENT; @@ -333,7 +339,10 @@ static int arm_ni_event_init(struct perf_event *event) if (is_sampling_event(event)) return -EINVAL; - event->cpu = cd_to_ni(cd)->cpu; + ni = cd_to_ni(cd); + event->cpu = ni->cpu; + event->hw.flags = arm_ni_is_7xx(ni); + if (NI_EVENT_TYPE(event) == NI_PMU) return arm_ni_validate_group(event); @@ -411,18 +420,12 @@ static void arm_ni_init_evcnt(struct hw_perf_event *hw) writel_relaxed(S32_MIN, (void __iomem *)hw->event_base); } -static bool arm_ni_is_7xx(const struct arm_ni *ni) -{ - return ni->part == PART_NI_700 || ni->part == PART_NI_710AE; -} - static int arm_ni_event_add(struct perf_event *event, int flags) { struct arm_ni_cd *cd = pmu_to_cd(event->pmu); struct hw_perf_event *hw = &event->hw; struct arm_ni_unit *unit; enum ni_node_type type = NI_EVENT_TYPE(event); - bool is_7xx = arm_ni_is_7xx(cd_to_ni(cd)); u32 reg; if (type == NI_PMU) { @@ -430,7 +433,7 @@ static int arm_ni_event_add(struct perf_event *event, int flags) return -ENOSPC; hw->idx = NI_CCNT_IDX; hw->event_base = (unsigned long)cd->pmu_base + - is_7xx ? NI700_PMCCNTR_L : NI_PMCCNTR_L; + (hw->flags ? NI700_PMCCNTR_L : NI_PMCCNTR_L); cd->ccnt = event; arm_ni_init_ccnt(hw); } else { @@ -443,7 +446,7 @@ static int arm_ni_event_add(struct perf_event *event, int flags) unit = (void *)hw->config_base; unit->event[hw->idx] = NI_EVENT_EVENTID(event); hw->event_base = (unsigned long)cd->pmu_base + - is_7xx ? NI700_PMEVCNTR(hw->idx) : NI_PMEVCNTR(hw->idx); + (hw->flags ? NI700_PMEVCNTR(hw->idx) : NI_PMEVCNTR(hw->idx)); arm_ni_init_evcnt(hw); lo_hi_writeq_relaxed(le64_to_cpu(unit->pmusel), unit->pmusela); From 7ab06ea41af53aa1713186ceaa154179e4b0d4c9 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 5 Nov 2025 18:38:49 +0800 Subject: [PATCH 10/94] arch_topology: Provide a stub topology_core_has_smt() for !CONFIG_GENERIC_ARCH_TOPOLOGY The arm_pmu driver is using topology_core_has_smt() for retrieving the SMT implementation which depends on CONFIG_GENERIC_ARCH_TOPOLOGY. The config is optional on arm platforms so provide a !CONFIG_GENERIC_ARCH_TOPOLOGY stub for topology_core_has_smt(). Fixes: c3d78c34ad00 ("perf: arm_pmuv3: Don't use PMCCNTR_EL0 on SMT cores") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202511041757.vuCGOmFc-lkp@intel.com/ Suggested-by: Will Deacon Signed-off-by: Yicong Yang Reviewed-by: Mark Brown Signed-off-by: Will Deacon --- include/linux/arch_topology.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index daa1af2e8204..0c2a8b846c20 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -100,6 +100,10 @@ static inline bool topology_core_has_smt(int cpu) return cpu_topology[cpu].thread_id != -1; } -#endif +#else + +static inline bool topology_core_has_smt(int cpu) { return false; } + +#endif /* CONFIG_GENERIC_ARCH_TOPOLOGY */ #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ From 4002068508caceb2a6f006fdf265a8de702768d0 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Mon, 3 Nov 2025 23:23:45 +0800 Subject: [PATCH 11/94] arm64: Remove assertion on CONFIG_VMAP_STACK CONFIG_VMAP_STACK is selected by arm64 arch unconditionly since commit ef6861b8e6dd ("arm64: Mandate VMAP_STACK"). Remove the redundant assertion and headers. Signed-off-by: Dawei Li Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/vmap_stack.h | 4 ---- arch/arm64/kernel/sdei.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h index 20873099c035..75daee1a07e9 100644 --- a/arch/arm64/include/asm/vmap_stack.h +++ b/arch/arm64/include/asm/vmap_stack.h @@ -3,9 +3,7 @@ #ifndef __ASM_VMAP_STACK_H #define __ASM_VMAP_STACK_H -#include #include -#include #include #include #include @@ -19,8 +17,6 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) { void *p; - BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, __builtin_return_address(0)); return kasan_reset_tag(p); diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 95169f7b6531..213ac72ce4fd 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -63,8 +63,6 @@ static void free_sdei_stacks(void) { int cpu; - BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - for_each_possible_cpu(cpu) { _free_sdei_stack(&sdei_stack_normal_ptr, cpu); _free_sdei_stack(&sdei_stack_critical_ptr, cpu); @@ -88,8 +86,6 @@ static int init_sdei_stacks(void) int cpu; int err = 0; - BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - for_each_possible_cpu(cpu) { err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu); if (err) From d3b570eba7bf6f712e85e59dd37e8a9c6360b7b0 Mon Sep 17 00:00:00 2001 From: Ryo Takakura Date: Wed, 5 Nov 2025 16:42:59 +0900 Subject: [PATCH 12/94] arm64: use SOFTIRQ_ON_OWN_STACK for enabling softirq stack For those architectures with HAVE_SOFTIRQ_ON_OWN_STACK use their dedicated softirq stack when !PREEMPT_RT. This condition is ensured by SOFTIRQ_ON_OWN_STACK. Let arm64 use SOFTIRQ_ON_OWN_STACK as well to select its usage of the stack. Signed-off-by: Ryo Takakura Reviewed-by: Sebastian Andrzej Siewior Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index c0065a1d77cf..15dedb385b9e 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -62,7 +62,7 @@ static void __init init_irq_stacks(void) } } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK static void ____do_softirq(struct pt_regs *regs) { __do_softirq(); From bf6b3fed186665cecbf0cf8be72da15d6585bfb0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 8 Oct 2025 16:01:30 -0700 Subject: [PATCH 13/94] arm64: remove unused ARCH_PFN_OFFSET This is only relevant to the FLATMEM memory model, which isn't an option since commit 782276b4d0ad ("arm64: Force SPARSEMEM_VMEMMAP as the only memory management model"). Signed-off-by: Omar Sandoval Acked-by: Will Deacon Reviewed-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f1505c4acb38..585a46a122fe 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -392,7 +392,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr) * virt_to_page(x) convert a _valid_ virtual address to struct page * * virt_addr_valid(x) indicates whether a virtual address is valid */ -#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #if defined(CONFIG_DEBUG_VIRTUAL) #define page_to_virt(x) ({ \ From a7717cad615f2b169ccca93418a07eaa526b4a1a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 3 Nov 2025 16:04:17 +0000 Subject: [PATCH 14/94] kselftest/arm64: Align zt-test register dumps The zt-test output is awkward to read, as the 'Expected' value isn't dumped on its own line and isn't aligned with the 'Got' value beneath. For example: Mismatch: PID=5281, iteration=3270249 Expected [00a1146901a1146902a1146903a1146904a1146905a1146906a1146907a1146908a1146909a114690aa114690ba114690ca114690da114690ea114690fa11469] Got [00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000] SVCR: 2 Add a newline, matching the other FPSIMD/SVE/SME tests, so that we get output that can be read more easily: Mismatch: PID=5281, iteration=3270249 Expected [00a1146901a1146902a1146903a1146904a1146905a1146906a1146907a1146908a1146909a114690aa114690ba114690ca114690da114690ea114690fa11469] Got [00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000] SVCR: 2 Admittedly this isn't all that important when the 'Got' value is all zeroes, but otherwise this would be a major help for identifying which portion of the 'Got' value is not as expected. Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Shuah Khan Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kselftest@vger.kernel.org Reviewed-by: Mark Brown Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/zt-test.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S index 38080f3c3280..a8df05771670 100644 --- a/tools/testing/selftests/arm64/fp/zt-test.S +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -276,7 +276,7 @@ function barf bl putdec puts ", iteration=" mov x0, x22 - bl putdec + bl putdecn puts "\tExpected [" mov x0, x10 mov x1, x12 From fc1abd409318b81274566e1808c598b0b0462dc6 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 15 Oct 2025 03:10:24 +0100 Subject: [PATCH 15/94] arm64/mm: Drop cpu_set_[default|idmap]_tcr_t0sz() These TCR_El1 helpers don't have any other callers. Drop these redundant indirections completely thus making this code more compact and readable. No functional change. Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mmu_context.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0dbe3b29049b..1263ab98f836 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -61,11 +61,6 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) cpu_do_switch_mm(virt_to_phys(pgd),mm); } -/* - * TCR.T0SZ value to use when the ID map is active. - */ -#define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS) - /* * Ensure TCR.T0SZ is set to the provided value. */ @@ -82,9 +77,6 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) isb(); } -#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual)) -#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz) - /* * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm. * @@ -103,7 +95,7 @@ static inline void cpu_uninstall_idmap(void) cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual)); if (mm != &init_mm && !system_uses_ttbr0_pan()) cpu_switch_mm(mm->pgd, mm); @@ -113,7 +105,7 @@ static inline void cpu_install_idmap(void) { cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - cpu_set_idmap_tcr_t0sz(); + __cpu_set_tcr_t0sz(TCR_T0SZ(IDMAP_VA_BITS)); cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); } From 40374d308e4e456048d83991e937f13fc8bda8bf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Oct 2025 22:56:36 +0200 Subject: [PATCH 16/94] efi: Add missing static initializer for efi_mm::cpus_allowed_lock Initialize the cpus_allowed_lock struct member of efi_mm. Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas --- drivers/firmware/efi/efi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 1ce428e2ac8a..fc407d891348 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -74,6 +74,9 @@ struct mm_struct efi_mm = { .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, +#ifdef CONFIG_SCHED_MM_CID + .cpus_allowed_lock = __RAW_SPIN_LOCK_UNLOCKED(efi_mm.cpus_allowed_lock), +#endif }; struct workqueue_struct *efi_rts_wq; From a2860501203cf7a2116adf3bb4e4c456c5750872 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Oct 2025 22:56:37 +0200 Subject: [PATCH 17/94] efi/runtime-wrappers: Keep track of the efi_runtime_lock owner The EFI runtime wrappers use a file local semaphore to serialize access to the EFI runtime services. This means that any calls to the arch wrappers around the runtime services will also be serialized, removing the need for redundant locking. For robustness, add a facility that allows those arch wrappers to assert that the semaphore was taken by the current task. Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas --- drivers/firmware/efi/runtime-wrappers.c | 17 ++++++++++++++++- include/linux/efi.h | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 708b777857d3..da8d29621644 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -202,6 +202,8 @@ void efi_call_virt_check_flags(unsigned long flags, const void *caller) */ static DEFINE_SEMAPHORE(efi_runtime_lock, 1); +static struct task_struct *efi_runtime_lock_owner; + /* * Expose the EFI runtime lock to the UV platform */ @@ -219,6 +221,8 @@ static void __nocfi efi_call_rts(struct work_struct *work) efi_status_t status = EFI_NOT_FOUND; unsigned long flags; + efi_runtime_lock_owner = current; + arch_efi_call_virt_setup(); flags = efi_call_virt_save_flags(); @@ -310,6 +314,7 @@ static void __nocfi efi_call_rts(struct work_struct *work) efi_rts_work.status = status; complete(&efi_rts_work.efi_rts_comp); + efi_runtime_lock_owner = NULL; } static efi_status_t __efi_queue_work(enum efi_rts_ids id, @@ -444,8 +449,10 @@ virt_efi_set_variable_nb(efi_char16_t *name, efi_guid_t *vendor, u32 attr, if (down_trylock(&efi_runtime_lock)) return EFI_NOT_READY; + efi_runtime_lock_owner = current; status = efi_call_virt_pointer(efi.runtime, set_variable, name, vendor, attr, data_size, data); + efi_runtime_lock_owner = NULL; up(&efi_runtime_lock); return status; } @@ -481,9 +488,11 @@ virt_efi_query_variable_info_nb(u32 attr, u64 *storage_space, if (down_trylock(&efi_runtime_lock)) return EFI_NOT_READY; + efi_runtime_lock_owner = current; status = efi_call_virt_pointer(efi.runtime, query_variable_info, attr, storage_space, remaining_space, max_variable_size); + efi_runtime_lock_owner = NULL; up(&efi_runtime_lock); return status; } @@ -509,12 +518,13 @@ virt_efi_reset_system(int reset_type, efi_status_t status, return; } + efi_runtime_lock_owner = current; arch_efi_call_virt_setup(); efi_rts_work.efi_rts_id = EFI_RESET_SYSTEM; arch_efi_call_virt(efi.runtime, reset_system, reset_type, status, data_size, data); arch_efi_call_virt_teardown(); - + efi_runtime_lock_owner = NULL; up(&efi_runtime_lock); } @@ -587,3 +597,8 @@ efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *), } #endif + +void efi_runtime_assert_lock_held(void) +{ + WARN_ON(efi_runtime_lock_owner != current); +} diff --git a/include/linux/efi.h b/include/linux/efi.h index a98cc39e7aaa..b23ff8b83219 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1126,6 +1126,8 @@ static inline bool efi_runtime_disabled(void) { return true; } extern void efi_call_virt_check_flags(unsigned long flags, const void *caller); extern unsigned long efi_call_virt_save_flags(void); +void efi_runtime_assert_lock_held(void); + enum efi_secureboot_mode { efi_secureboot_mode_unset, efi_secureboot_mode_unknown, From 1d038e801833f6dcfd4b18d59f96eca5a963a816 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Oct 2025 22:56:38 +0200 Subject: [PATCH 18/94] arm64/fpsimd: Don't warn when EFI execution context is preemptible Kernel mode FP/SIMD no longer requires preemption to be disabled, so only warn on uses of FP/SIMD from preemptible context if the fallback path is taken for cases where kernel mode NEON would not be allowed otherwise. Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e3f8f51748bc..3d848c89604e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1934,11 +1934,11 @@ void __efi_fpsimd_begin(void) if (!system_supports_fpsimd()) return; - WARN_ON(preemptible()); - if (may_use_simd()) { kernel_neon_begin(); } else { + WARN_ON(preemptible()); + /* * If !efi_sve_state, SVE can't be in use yet and doesn't need * preserving: From 7137a203b2515bdbeae1cf13446bdce17db2c2f7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Oct 2025 22:56:39 +0200 Subject: [PATCH 19/94] arm64/fpsimd: Permit kernel mode NEON with IRQs off Currently, may_use_simd() will return false when called from a context where IRQs are disabled. One notable case where this happens is when calling the ResetSystem() EFI runtime service from the reboot/poweroff code path. For this case alone, there is a substantial amount of FP/SIMD support code to handle the corner case where a EFI runtime service is invoked with IRQs disabled. The only reason kernel mode SIMD is not allowed when IRQs are disabled is that re-enabling softirqs in this case produces a noisy diagnostic when lockdep is enabled. The warning is valid, in the sense that delivering pending softirqs over the back of the call to local_bh_enable() is problematic when IRQs are disabled. While the API lacks a facility to simply mask and unmask softirqs without triggering their delivery, disabling softirqs is not needed to begin with when IRQs are disabled, given that softirqs are only every taken asynchronously over the back of a hard IRQ. So dis/enable softirq processing conditionally, based on whether IRQs are enabled, and relax the check in may_use_simd(). Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/simd.h | 2 +- arch/arm64/kernel/fpsimd.c | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 8e86c9e70e48..abd642c92f86 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -29,7 +29,7 @@ static __must_check inline bool may_use_simd(void) */ return !WARN_ON(!system_capabilities_finalized()) && system_supports_fpsimd() && - !in_hardirq() && !irqs_disabled() && !in_nmi(); + !in_hardirq() && !in_nmi(); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3d848c89604e..6d956ed23bd2 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -225,10 +225,21 @@ static void fpsimd_bind_task_to_cpu(void); */ static void get_cpu_fpsimd_context(void) { - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - local_bh_disable(); - else + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* + * The softirq subsystem lacks a true unmask/mask API, and + * re-enabling softirq processing using local_bh_enable() will + * not only unmask softirqs, it will also result in immediate + * delivery of any pending softirqs. + * This is undesirable when running with IRQs disabled, but in + * that case, there is no need to mask softirqs in the first + * place, so only bother doing so when IRQs are enabled. + */ + if (!irqs_disabled()) + local_bh_disable(); + } else { preempt_disable(); + } } /* @@ -240,10 +251,12 @@ static void get_cpu_fpsimd_context(void) */ static void put_cpu_fpsimd_context(void) { - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - local_bh_enable(); - else + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { + if (!irqs_disabled()) + local_bh_enable(); + } else { preempt_enable(); + } } unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) From 1068cb52e8ef4e21a31095dc932685a543b779bc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Oct 2025 22:56:40 +0200 Subject: [PATCH 20/94] arm64/efi: Drop efi_rt_lock spinlock from EFI arch wrapper Since commit 5894cf571e14 ("acpi/prmt: Use EFI runtime sandbox to invoke PRM handlers") all EFI runtime calls on arm64 are routed via the EFI runtime wrappers, which are serialized using the efi_runtime_lock semaphore. This means the efi_rt_lock spinlock in the arm64 arch wrapper code has become redundant, and can be dropped. For robustness, replace it with an assert that the EFI runtime lock is in fact held by 'current'. Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/kernel/efi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 6c371b158b99..0094f5938ba6 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -165,19 +165,16 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) return s; } -static DEFINE_RAW_SPINLOCK(efi_rt_lock); - void arch_efi_call_virt_setup(void) { + efi_runtime_assert_lock_held(); efi_virtmap_load(); - raw_spin_lock(&efi_rt_lock); __efi_fpsimd_begin(); } void arch_efi_call_virt_teardown(void) { __efi_fpsimd_end(); - raw_spin_unlock(&efi_rt_lock); efi_virtmap_unload(); } From 6b9c98e657559408beecde41a532c5bb4cf281bc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Oct 2025 22:56:41 +0200 Subject: [PATCH 21/94] arm64/efi: Move uaccess en/disable out of efi_set_pgd() efi_set_pgd() will no longer be called when invoking EFI runtime services via the efi_rts_wq work queue, but the uaccess en/disable are still needed when using PAN emulation using TTBR0 switching. So move these into the callers. Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 13 +++---------- arch/arm64/kernel/efi.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index bcd5622aa096..aa91165ca140 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -126,21 +126,14 @@ static inline void efi_set_pgd(struct mm_struct *mm) if (mm != current->active_mm) { /* * Update the current thread's saved ttbr0 since it is - * restored as part of a return from exception. Enable - * access to the valid TTBR0_EL1 and invoke the errata - * workaround directly since there is no return from - * exception when invoking the EFI run-time services. + * restored as part of a return from exception. */ update_saved_ttbr0(current, mm); - uaccess_ttbr0_enable(); - post_ttbr_update_workaround(); } else { /* - * Defer the switch to the current thread's TTBR0_EL1 - * until uaccess_enable(). Restore the current - * thread's saved ttbr0 corresponding to its active_mm + * Restore the current thread's saved ttbr0 + * corresponding to its active_mm */ - uaccess_ttbr0_disable(); update_saved_ttbr0(current, current->active_mm); } } diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 0094f5938ba6..85f65d5c863c 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -169,12 +169,30 @@ void arch_efi_call_virt_setup(void) { efi_runtime_assert_lock_held(); efi_virtmap_load(); + + /* + * Enable access to the valid TTBR0_EL1 and invoke the errata + * workaround directly since there is no return from exception when + * invoking the EFI run-time services. + */ + uaccess_ttbr0_enable(); + post_ttbr_update_workaround(); + __efi_fpsimd_begin(); } void arch_efi_call_virt_teardown(void) { __efi_fpsimd_end(); + + /* + * Defer the switch to the current thread's TTBR0_EL1 until + * uaccess_enable(). Do so before efi_virtmap_unload() updates the + * saved TTBR0 value, so the userland page tables are not activated + * inadvertently over the back of an exception. + */ + uaccess_ttbr0_disable(); + efi_virtmap_unload(); } From a5baf582f4c026c25a206ac121bceade926aec74 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Oct 2025 22:56:42 +0200 Subject: [PATCH 22/94] arm64/efi: Call EFI runtime services without disabling preemption The only remaining reason why EFI runtime services are invoked with preemption disabled is the fact that the mm is swapped out behind the back of the context switching code. The kernel no longer disables preemption in kernel_neon_begin(). Furthermore, the EFI spec is being clarified to explicitly state that only baseline FP/SIMD is permitted in EFI runtime service implementations, and so the existing kernel mode NEON context switching code is sufficient to preserve and restore the execution context of an in-progress EFI runtime service call. Most EFI calls are made from the efi_rts_wq, which is serviced by a kthread. As kthreads never return to user space, they usually don't have an mm, and so we can use the existing infrastructure to swap in the efi_mm while the EFI call is in progress. This is visible to the scheduler, which will therefore reactivate the selected mm when switching out the kthread and back in again. Given that the EFI spec explicitly permits runtime services to be called with interrupts enabled, firmware code is already required to tolerate interruptions. So rather than disable preemption, disable only migration so that EFI runtime services are less likely to cause scheduling delays. To avoid potential issues where runtime services are interrupted while polling the secure firmware for async completions, keep migration disabled so that a runtime service invocation does not resume on a different CPU from the one it was started on. Note, though, that the firmware executes at the same privilege level as the kernel, and is therefore able to disable interrupts altogether. Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/kernel/efi.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 85f65d5c863c..a81cb4aa4738 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -168,7 +169,20 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) void arch_efi_call_virt_setup(void) { efi_runtime_assert_lock_held(); - efi_virtmap_load(); + + if (preemptible() && (current->flags & PF_KTHREAD)) { + /* + * Disable migration to ensure that a preempted EFI runtime + * service call will be resumed on the same CPU. This avoids + * potential issues with EFI runtime calls that are preempted + * while polling for an asynchronous completion of a secure + * firmware call, which may not permit the CPU to change. + */ + migrate_disable(); + kthread_use_mm(&efi_mm); + } else { + efi_virtmap_load(); + } /* * Enable access to the valid TTBR0_EL1 and invoke the errata @@ -193,7 +207,12 @@ void arch_efi_call_virt_teardown(void) */ uaccess_ttbr0_disable(); - efi_virtmap_unload(); + if (preemptible() && (current->flags & PF_KTHREAD)) { + kthread_unuse_mm(&efi_mm); + migrate_enable(); + } else { + efi_virtmap_unload(); + } } asmlinkage u64 *efi_rt_stack_top __ro_after_init; From e2e21a9757b9035d9f649e464c0b254cbbe8a148 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 31 Oct 2025 07:12:55 +0000 Subject: [PATCH 23/94] arm64/mm: Ensure PGD_SIZE is aligned to 64 bytes when PA_BITS = 52 Although the comment clearly states about PGD table's alignment requirement (when PA_BITS = 52) but the subsequent BUILD_BUG_ON() tests size comparison to 64 bytes instead. So change it as an actual alignment test. Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/pgd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 8160cff35089..bf5110b91e2f 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -56,7 +56,7 @@ void __init pgtable_cache_init(void) * With 52-bit physical addresses, the architecture requires the * top-level table to be aligned to at least 64 bytes. */ - BUILD_BUG_ON(PGD_SIZE < 64); + BUILD_BUG_ON(!IS_ALIGNED(PGD_SIZE, 64)); #endif /* From 555827a0645641ec3fadfeb0bc3155ab79a84b11 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 5 Nov 2025 15:27:01 +0100 Subject: [PATCH 24/94] arm64: entry: Clean out some indirection The conversion to generic IRQ entry left some functions in the EL1 (kernel) IRQ entry path very shallow, so drop the __inner_functions() where appropriate, saving some time and stack. This is not a fix but an optimization. Drop stale comments about irqentry_enter/exit() while we are at it. Signed-off-by: Linus Walleij Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-common.c | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index f546a914f041..6e8e1e620221 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -34,20 +34,12 @@ * Handle IRQ/context state management when entering from kernel mode. * Before this function is called it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. - * - * This is intended to match the logic in irqentry_enter(), handling the kernel - * mode transitions only. */ -static __always_inline irqentry_state_t __enter_from_kernel_mode(struct pt_regs *regs) -{ - return irqentry_enter(regs); -} - static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs) { irqentry_state_t state; - state = __enter_from_kernel_mode(regs); + state = irqentry_enter(regs); mte_check_tfsr_entry(); mte_disable_tco_entry(current); @@ -58,21 +50,12 @@ static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs) * Handle IRQ/context state management when exiting to kernel mode. * After this function returns it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. - * - * This is intended to match the logic in irqentry_exit(), handling the kernel - * mode transitions only, and with preemption handled elsewhere. */ -static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs, - irqentry_state_t state) -{ - irqentry_exit(regs, state); -} - static void noinstr exit_to_kernel_mode(struct pt_regs *regs, irqentry_state_t state) { mte_check_tfsr_exit(); - __exit_to_kernel_mode(regs, state); + irqentry_exit(regs, state); } /* @@ -80,17 +63,12 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs, * Before this function is called it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. */ -static __always_inline void __enter_from_user_mode(struct pt_regs *regs) +static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs) { enter_from_user_mode(regs); mte_disable_tco_entry(current); } -static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs) -{ - __enter_from_user_mode(regs); -} - /* * Handle IRQ/context state management when exiting to user mode. * After this function returns it is not safe to call regular kernel code, From 420cab0155033c14c5cb308d5078f93e3e8bf9b3 Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Mon, 10 Nov 2025 23:21:01 +0200 Subject: [PATCH 25/94] arm64: acpi: add newline to deferred APEI warning missing newline in pr_warn_ratelimited in apei_claim_sea Signed-off-by: Osama Abdelkader Signed-off-by: Catalin Marinas --- arch/arm64/kernel/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7aca29e1d30b..aab07d179787 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -439,7 +439,7 @@ int apei_claim_sea(struct pt_regs *regs) irq_work_run(); __irq_exit(); } else { - pr_warn_ratelimited("APEI work queued but not completed"); + pr_warn_ratelimited("APEI work queued but not completed\n"); err = -EINPROGRESS; } } From 639f08fc20c92c2cc373b2b4d065185daa9633e3 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 10 Oct 2025 15:01:15 +0200 Subject: [PATCH 26/94] arm64: Replace __ASSEMBLY__ with __ASSEMBLER__ in uapi headers __ASSEMBLY__ is only defined by the Makefile of the kernel, so this is not really useful for uapi headers (unless the userspace Makefile defines it, too). Let's switch to __ASSEMBLER__ which gets set automatically by the compiler when compiling assembly code. Signed-off-by: Thomas Huth Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/kvm.h | 2 +- arch/arm64/include/uapi/asm/ptrace.h | 4 ++-- arch/arm64/include/uapi/asm/sigcontext.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index ed5f3892674c..a792a599b9d6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -31,7 +31,7 @@ #define KVM_SPSR_FIQ 4 #define KVM_NR_SPSR 5 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #include diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 0f39ba4f3efd..6fed93fb2536 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -80,7 +80,7 @@ #define PTRACE_PEEKMTETAGS 33 #define PTRACE_POKEMTETAGS 34 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * User structures for general purpose, floating point and debug registers. @@ -332,6 +332,6 @@ struct user_gcs { __u64 gcspr_el0; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index d42f7a92238b..e29bf3e2d0cc 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -17,7 +17,7 @@ #ifndef _UAPI__ASM_SIGCONTEXT_H #define _UAPI__ASM_SIGCONTEXT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -192,7 +192,7 @@ struct gcs_context { __u64 reserved; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #include From 287d163322b743a50adcad25c851600c004f59e3 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 10 Oct 2025 15:01:16 +0200 Subject: [PATCH 27/94] arm64: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-uapi headers While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with uapi headers that rather should use __ASSEMBLER__ instead. So let's standardize now on the __ASSEMBLER__ macro that is provided by the compilers. This is a mostly mechanical patch (done with a simple "sed -i" statement), except for the following files where comments with mis-spelled macros were tweaked manually: arch/arm64/include/asm/stacktrace/frame.h arch/arm64/include/asm/kvm_ptrauth.h arch/arm64/include/asm/debug-monitors.h arch/arm64/include/asm/esr.h arch/arm64/include/asm/scs.h arch/arm64/include/asm/memory.h Signed-off-by: Thomas Huth Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 8 ++++---- arch/arm64/include/asm/alternative.h | 4 ++-- arch/arm64/include/asm/arch_gicv3.h | 4 ++-- arch/arm64/include/asm/asm-extable.h | 6 +++--- arch/arm64/include/asm/assembler.h | 2 +- arch/arm64/include/asm/barrier.h | 4 ++-- arch/arm64/include/asm/cache.h | 4 ++-- arch/arm64/include/asm/cpucaps.h | 4 ++-- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/include/asm/cputype.h | 4 ++-- arch/arm64/include/asm/current.h | 4 ++-- arch/arm64/include/asm/debug-monitors.h | 4 ++-- arch/arm64/include/asm/el2_setup.h | 2 +- arch/arm64/include/asm/elf.h | 4 ++-- arch/arm64/include/asm/esr.h | 4 ++-- arch/arm64/include/asm/fixmap.h | 4 ++-- arch/arm64/include/asm/fpsimd.h | 2 +- arch/arm64/include/asm/ftrace.h | 6 +++--- arch/arm64/include/asm/gpr-num.h | 6 +++--- arch/arm64/include/asm/hwcap.h | 2 +- arch/arm64/include/asm/image.h | 4 ++-- arch/arm64/include/asm/insn.h | 4 ++-- arch/arm64/include/asm/jump_label.h | 4 ++-- arch/arm64/include/asm/kasan.h | 2 +- arch/arm64/include/asm/kexec.h | 4 ++-- arch/arm64/include/asm/kgdb.h | 4 ++-- arch/arm64/include/asm/kvm_asm.h | 4 ++-- arch/arm64/include/asm/kvm_mmu.h | 4 ++-- arch/arm64/include/asm/kvm_mte.h | 4 ++-- arch/arm64/include/asm/kvm_ptrauth.h | 6 +++--- arch/arm64/include/asm/linkage.h | 2 +- arch/arm64/include/asm/memory.h | 4 ++-- arch/arm64/include/asm/mmu.h | 4 ++-- arch/arm64/include/asm/mmu_context.h | 4 ++-- arch/arm64/include/asm/mte-kasan.h | 4 ++-- arch/arm64/include/asm/mte.h | 4 ++-- arch/arm64/include/asm/page.h | 4 ++-- arch/arm64/include/asm/pgtable-prot.h | 4 ++-- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/include/asm/proc-fns.h | 4 ++-- arch/arm64/include/asm/processor.h | 4 ++-- arch/arm64/include/asm/ptrace.h | 4 ++-- arch/arm64/include/asm/rsi_smc.h | 4 ++-- arch/arm64/include/asm/rwonce.h | 4 ++-- arch/arm64/include/asm/scs.h | 4 ++-- arch/arm64/include/asm/sdei.h | 4 ++-- arch/arm64/include/asm/smp.h | 4 ++-- arch/arm64/include/asm/spectre.h | 4 ++-- arch/arm64/include/asm/stacktrace/frame.h | 4 ++-- arch/arm64/include/asm/sysreg.h | 10 +++++----- arch/arm64/include/asm/system_misc.h | 4 ++-- arch/arm64/include/asm/thread_info.h | 2 +- arch/arm64/include/asm/tlbflush.h | 2 +- arch/arm64/include/asm/vdso.h | 4 ++-- arch/arm64/include/asm/vdso/compat_barrier.h | 4 ++-- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 4 ++-- arch/arm64/include/asm/vdso/getrandom.h | 4 ++-- arch/arm64/include/asm/vdso/gettimeofday.h | 4 ++-- arch/arm64/include/asm/vdso/processor.h | 4 ++-- arch/arm64/include/asm/vdso/vsyscall.h | 4 ++-- arch/arm64/include/asm/virt.h | 4 ++-- tools/arch/arm64/include/asm/cputype.h | 4 ++-- tools/arch/arm64/include/asm/esr.h | 4 ++-- tools/arch/arm64/include/asm/gpr-num.h | 6 +++--- tools/arch/arm64/include/asm/sysreg.h | 10 +++++----- tools/arch/arm64/include/uapi/asm/kvm.h | 2 +- 66 files changed, 136 insertions(+), 136 deletions(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index c8c77f9e36d6..862416624852 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -19,7 +19,7 @@ #error "cpucaps have overflown ARM64_CB_BIT" #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -207,7 +207,7 @@ alternative_endif #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap)); @@ -219,7 +219,7 @@ alternative_endif #define ALTERNATIVE(oldinstr, newinstr, ...) \ _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -263,6 +263,6 @@ alternative_has_cap_unlikely(const unsigned long cpucap) return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ALTERNATIVE_MACROS_H */ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 00d97b8a757f..607a21e7dd9c 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -4,7 +4,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -34,5 +34,5 @@ static inline void apply_alternatives_module(void *start, size_t length) { } void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 9e96f024b2f1..d20b03931a8d 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -9,7 +9,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -188,5 +188,5 @@ static inline bool gic_has_relaxed_pmr_sync(void) return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 292f2687a12e..d67e2fdd1aee 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -27,7 +27,7 @@ /* Data fields for EX_TYPE_UACCESS_CPY */ #define EX_DATA_UACCESS_WRITE BIT(0) -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ @@ -77,7 +77,7 @@ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write) .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #include @@ -132,6 +132,6 @@ EX_DATA_REG(ADDR, addr) \ ")") -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 23be85d93348..b2d633081709 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -5,7 +5,7 @@ * Copyright (C) 1996-2000 Russell King * Copyright (C) 2012 ARM Ltd. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #error "Only include this from assembly code" #endif diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index f5801b0ba9e9..9495c4441a46 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -7,7 +7,7 @@ #ifndef __ASM_BARRIER_H #define __ASM_BARRIER_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -221,6 +221,6 @@ do { \ #include -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_BARRIER_H */ diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 09963004ceea..dd2c8586a725 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -35,7 +35,7 @@ #define ARCH_DMA_MINALIGN (128) #define ARCH_KMALLOC_MINALIGN (8) -#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) +#if !defined(__ASSEMBLER__) && !defined(BUILD_VDSO) #include #include @@ -135,6 +135,6 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) return ctr; } -#endif /* !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) */ +#endif /* !defined(__ASSEMBLER__) && !defined(BUILD_VDSO) */ #endif diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 9d769291a306..2c8029472ad4 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -5,7 +5,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* * Check whether a cpucap is possible at compiletime. @@ -77,6 +77,6 @@ cpucap_is_possible(const unsigned int cap) return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index e223cbf350e4..88f3d618605e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -19,7 +19,7 @@ #define ARM64_SW_FEATURE_OVERRIDE_HVHE 4 #define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -1078,6 +1078,6 @@ static inline bool cpu_has_lpa2(void) #endif } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9b00b75acbf2..024b4fa976a8 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -249,7 +249,7 @@ #define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0)) #define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -328,6 +328,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) { return read_cpuid(CTR_EL0); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h index 54ceae0874c7..c92912eaf186 100644 --- a/arch/arm64/include/asm/current.h +++ b/arch/arm64/include/asm/current.h @@ -4,7 +4,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; @@ -23,7 +23,7 @@ static __always_inline struct task_struct *get_current(void) #define current get_current() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_CURRENT_H */ diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index f5e3ed2420ce..8d5f92418838 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -48,7 +48,7 @@ #define AARCH32_BREAK_THUMB2_LO 0xf7f0 #define AARCH32_BREAK_THUMB2_HI 0xa000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; #define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */ @@ -88,5 +88,5 @@ static inline bool try_step_suspended_breakpoints(struct pt_regs *regs) bool try_handle_aarch32_break(struct pt_regs *regs); -#endif /* __ASSEMBLY */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b37da3ee8529..892761d3db45 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -7,7 +7,7 @@ #ifndef __ARM_KVM_INIT_H__ #define __ARM_KVM_INIT_H__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #error Assembly-only header #endif diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 3f93f4eef953..d2779d604c7b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -133,7 +133,7 @@ #define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3) #endif /* CONFIG_ARM64_FORCE_52BIT */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -293,6 +293,6 @@ static inline int arch_check_elf(void *ehdr, bool has_interp, return 0; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index e1deed824464..4975a92cbd17 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -431,7 +431,7 @@ #define ESR_ELx_IT_GCSPOPCX 6 #define ESR_ELx_IT_GCSPOPX 7 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include static inline unsigned long esr_brk_comment(unsigned long esr) @@ -534,6 +534,6 @@ static inline bool esr_iss_is_eretab(unsigned long esr) } const char *esr_get_class_string(unsigned long esr); -#endif /* __ASSEMBLY */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ESR_H */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 635a43c4ec85..65555284446e 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -15,7 +15,7 @@ #ifndef _ASM_ARM64_FIXMAP_H #define _ASM_ARM64_FIXMAP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #include @@ -117,5 +117,5 @@ extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t pr #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_ARM64_FIXMAP_H */ diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index b8cf0ea43cc0..1d2e33559bd5 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -12,7 +12,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index ba7cf7fec5e9..1621c84f44b3 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -37,7 +37,7 @@ */ #define ARCH_FTRACE_SHIFT_STACK_TRACER 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include extern void _mcount(unsigned long); @@ -217,9 +217,9 @@ static inline bool arch_syscall_match_sym_name(const char *sym, */ return !strcmp(sym + 8, name); } -#endif /* ifndef __ASSEMBLY__ */ +#endif /* ifndef __ASSEMBLER__ */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_FUNCTION_GRAPH_TRACER void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h index 05da4a7c5788..a114e4f8209b 100644 --- a/arch/arm64/include/asm/gpr-num.h +++ b/arch/arm64/include/asm/gpr-num.h @@ -2,7 +2,7 @@ #ifndef __ASM_GPR_NUM_H #define __ASM_GPR_NUM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 .equ .L__gpr_num_x\num, \num @@ -11,7 +11,7 @@ .equ .L__gpr_num_xzr, 31 .equ .L__gpr_num_wzr, 31 -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __DEFINE_ASM_GPR_NUMS \ " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ @@ -21,6 +21,6 @@ " .equ .L__gpr_num_xzr, 31\n" \ " .equ .L__gpr_num_wzr, 31\n" -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_GPR_NUM_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d567265467c..1f63814ae6c4 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -46,7 +46,7 @@ #define COMPAT_HWCAP2_SB (1 << 5) #define COMPAT_HWCAP2_SSBS (1 << 6) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h index c09cf942dc92..9ba85173f857 100644 --- a/arch/arm64/include/asm/image.h +++ b/arch/arm64/include/asm/image.h @@ -20,7 +20,7 @@ #define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3 #define ARM64_IMAGE_FLAG_PHYS_BASE 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define arm64_image_flag_field(flags, field) \ (((flags) >> field##_SHIFT) & field##_MASK) @@ -54,6 +54,6 @@ struct arm64_image_header { __le32 res5; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 18c7811774d3..e1d30ba99d01 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -12,7 +12,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_NOP = 0x0 << 5, @@ -730,6 +730,6 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn); typedef bool (pstate_check_t)(unsigned long); extern pstate_check_t * const aarch32_opcode_cond_checks[16]; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 424ed421cd97..0cb211d3607d 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -8,7 +8,7 @@ #ifndef __ASM_JUMP_LABEL_H #define __ASM_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -58,5 +58,5 @@ static __always_inline bool arch_static_branch_jump(struct static_key * const ke return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index e1b57c13f8a4..b167e9d3da91 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 4d9cc7a76d9c..892e5bebda95 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -25,7 +25,7 @@ #define KEXEC_ARCH KEXEC_ARCH_AARCH64 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /** * crash_setup_regs() - save registers for the panic kernel @@ -130,6 +130,6 @@ extern int load_other_segments(struct kimage *image, char *cmdline); #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index 3184f5d1e3ae..67ef1c5532ae 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -14,7 +14,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void arch_kgdb_breakpoint(void) { @@ -36,7 +36,7 @@ static inline int kgdb_single_step_handler(struct pt_regs *regs, } #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * gdb remote procotol (well most versions of it) expects the following diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9da54d4ee49e..4b34f7b7ed2f 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -46,7 +46,7 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -303,7 +303,7 @@ void kvm_compute_final_ctr_el0(struct alt_instr *alt, void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ .macro get_host_ctxt reg, tmp adr_this_cpu \reg, kvm_host_data, \tmp diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e4069f2ce642..2dc5e6e742bb 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -49,7 +49,7 @@ * mappings, and none of this applies in that case. */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include @@ -396,5 +396,5 @@ void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} #endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h index de002636eb1f..3171963ad25c 100644 --- a/arch/arm64/include/asm/kvm_mte.h +++ b/arch/arm64/include/asm/kvm_mte.h @@ -5,7 +5,7 @@ #ifndef __ASM_KVM_MTE_H #define __ASM_KVM_MTE_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include @@ -62,5 +62,5 @@ alternative_else_nop_endif .endm #endif /* CONFIG_ARM64_MTE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_KVM_MTE_H */ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 6199c9f7ec6e..e50987b32483 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -8,7 +8,7 @@ #ifndef __ASM_KVM_PTRAUTH_H #define __ASM_KVM_PTRAUTH_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include @@ -100,7 +100,7 @@ alternative_else_nop_endif .endm #endif /* CONFIG_ARM64_PTR_AUTH */ -#else /* !__ASSEMBLY */ +#else /* !__ASSEMBLER__ */ #define __ptrauth_save_key(ctxt, key) \ do { \ @@ -120,5 +120,5 @@ alternative_else_nop_endif __ptrauth_save_key(ctxt, APGA); \ } while(0) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_KVM_PTRAUTH_H */ diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index d3acd9c87509..40bd17add539 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -1,7 +1,7 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include #endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f1505c4acb38..433513f590a3 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -207,7 +207,7 @@ */ #define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -422,7 +422,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr) }) void dump_mem_limit(void); -#endif /* !ASSEMBLY */ +#endif /* !__ASSEMBLER__ */ /* * Given that the GIC architecture permits ITS implementations that can only be diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 78a4dbf75e60..137a173df1ff 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -12,7 +12,7 @@ #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -112,5 +112,5 @@ void kpti_install_ng_mappings(void); static inline void kpti_install_ng_mappings(void) {} #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0dbe3b29049b..b4b361a4ec62 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -8,7 +8,7 @@ #ifndef __ASM_MMU_CONTEXT_H #define __ASM_MMU_CONTEXT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -330,6 +330,6 @@ static inline void deactivate_mm(struct task_struct *tsk, #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* !__ASM_MMU_CONTEXT_H */ diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 0f9b08e8fb8d..352139271918 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -9,7 +9,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -259,6 +259,6 @@ static inline int mte_enable_kernel_store_only(void) #endif /* CONFIG_ARM64_MTE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_MTE_KASAN_H */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 3b5069f4683d..6d4a78b9dc3e 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -8,7 +8,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -282,5 +282,5 @@ static inline void mte_check_tfsr_exit(void) } #endif /* CONFIG_KASAN_HW_TAGS */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_MTE_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 2312e6ee595f..0370a1534abc 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -10,7 +10,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* for READ_IMPLIES_EXEC */ #include /* for gfp_t */ @@ -45,7 +45,7 @@ int pfn_is_map_memory(unsigned long pfn); #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 85dceb1c66f4..28460c826298 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -62,7 +62,7 @@ #define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) #define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -127,7 +127,7 @@ static inline bool __pure lpa2_is_enabled(void) #define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC) #define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #define pte_pi_index(pte) ( \ ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index aa89c2e67ebc..26bb1421e63f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -30,7 +30,7 @@ #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -1948,6 +1948,6 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, #endif /* CONFIG_ARM64_CONTPTE */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 0d5d1f0525eb..ab78a78821a2 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -9,7 +9,7 @@ #ifndef __ASM_PROCFNS_H #define __ASM_PROCFNS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -21,5 +21,5 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_PROCFNS_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 61d62bfd5a7b..5acce7962228 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -25,7 +25,7 @@ #define MTE_CTRL_STORE_ONLY (1UL << 19) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -437,5 +437,5 @@ int set_tsc_mode(unsigned int val); #define GET_TSC_CTL(adr) get_tsc_mode((adr)) #define SET_TSC_CTL(val) set_tsc_mode((val)) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 65b053a24d82..39582511ad72 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -94,7 +94,7 @@ */ #define NO_SYSCALL (-1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -361,5 +361,5 @@ static inline void procedure_link_pointer_set(struct pt_regs *regs, extern unsigned long profile_pc(struct pt_regs *regs); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h index 6cb070eca9e9..e19253f96c94 100644 --- a/arch/arm64/include/asm/rsi_smc.h +++ b/arch/arm64/include/asm/rsi_smc.h @@ -122,7 +122,7 @@ */ #define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct realm_config { union { @@ -142,7 +142,7 @@ struct realm_config { */ } __aligned(0x1000); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Read configuration for the current Realm. diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index 97d9256d33c9..78beceec10cd 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -5,7 +5,7 @@ #ifndef __ASM_RWONCE_H #define __ASM_RWONCE_H -#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__) +#if defined(CONFIG_LTO) && !defined(__ASSEMBLER__) #include #include @@ -62,7 +62,7 @@ }) #endif /* !BUILD_VDSO */ -#endif /* CONFIG_LTO && !__ASSEMBLY__ */ +#endif /* CONFIG_LTO && !__ASSEMBLER__ */ #include diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index a76f9b387a26..d31b128f683f 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -2,7 +2,7 @@ #ifndef _ASM_SCS_H #define _ASM_SCS_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include #include @@ -55,6 +55,6 @@ enum { int __pi_scs_patch(const u8 eh_frame[], int size); -#endif /* __ASSEMBLY __ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_SCS_H */ diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 484cb6972e99..b2248bd3cb58 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -9,7 +9,7 @@ #define SDEI_STACK_SIZE IRQ_STACK_SIZE -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -49,5 +49,5 @@ unsigned long do_sdei_event(struct pt_regs *regs, unsigned long sdei_arch_get_entry_point(int conduit); #define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SDEI_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d48ef6d5abcc..10ea4f543069 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -23,7 +23,7 @@ #define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT) #define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -155,6 +155,6 @@ bool cpus_are_stuck_in_kernel(void); extern void crash_smp_send_stop(void); extern bool smp_crash_stop_failed(void); -#endif /* ifndef __ASSEMBLY__ */ +#endif /* ifndef __ASSEMBLER__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 8fef12626090..0527c53b0ec5 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -12,7 +12,7 @@ #define BP_HARDEN_EL2_SLOTS 4 #define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -118,5 +118,5 @@ void spectre_bhb_patch_wa3(struct alt_instr *alt, void spectre_bhb_patch_clearbhb(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h index 0ee0f6ba0fd8..796797b8db7e 100644 --- a/arch/arm64/include/asm/stacktrace/frame.h +++ b/arch/arm64/include/asm/stacktrace/frame.h @@ -25,7 +25,7 @@ #define FRAME_META_TYPE_FINAL 1 #define FRAME_META_TYPE_PT_REGS 2 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * A standard AAPCS64 frame record. */ @@ -43,6 +43,6 @@ struct frame_record_meta { struct frame_record record; u64 type; }; -#endif /* __ASSEMBLY */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_STACKTRACE_FRAME_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6455db1b54fd..9c9a96643412 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -52,7 +52,7 @@ #ifndef CONFIG_BROKEN_GAS_INST -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ // The space separator is omitted so that __emit_inst(x) can be parsed as // either an assembler directive or an assembler macro argument. #define __emit_inst(x) .inst(x) @@ -71,11 +71,11 @@ (((x) >> 24) & 0x000000ff)) #endif /* CONFIG_CPU_BIG_ENDIAN */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __emit_inst(x) .long __INSTR_BSWAP(x) -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t" -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_BROKEN_GAS_INST */ @@ -1131,7 +1131,7 @@ #define ARM64_FEATURE_FIELD_BITS 4 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro mrs_s, rt, sreg __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 344b1c1a4bbb..d316a804eb38 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -7,7 +7,7 @@ #ifndef __ASM_SYSTEM_MISC_H #define __ASM_SYSTEM_MISC_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -28,6 +28,6 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, struct mm_struct; extern void __show_regs(struct pt_regs *); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index f241b8601ebd..a803b887b0b4 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -10,7 +10,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct task_struct; diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 18a5dc0c9a54..ef61b68df347 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -8,7 +8,7 @@ #ifndef __ASM_TLBFLUSH_H #define __ASM_TLBFLUSH_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 61679070f595..232b46969088 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -7,7 +7,7 @@ #define __VDSO_PAGES 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -19,6 +19,6 @@ extern char vdso_start[], vdso_end[]; extern char vdso32_start[], vdso32_end[]; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_H */ diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index 6d75e03d3827..d7ebe7ceefa0 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -5,7 +5,7 @@ #ifndef __COMPAT_BARRIER_H #define __COMPAT_BARRIER_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Warning: This code is meant to be used from the compat vDSO only. */ @@ -31,6 +31,6 @@ #define smp_rmb() aarch32_smp_rmb() #define smp_wmb() aarch32_smp_wmb() -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __COMPAT_BARRIER_H */ diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 7d1a116549b1..0d513f924321 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_COMPAT_GETTIMEOFDAY_H #define __ASM_VDSO_COMPAT_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -161,6 +161,6 @@ static inline bool vdso_clocksource_ok(const struct vdso_clock *vc) } #define vdso_clocksource_ok vdso_clocksource_ok -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_COMPAT_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h index a2197da1951b..da1d58bbfabe 100644 --- a/arch/arm64/include/asm/vdso/getrandom.h +++ b/arch/arm64/include/asm/vdso/getrandom.h @@ -3,7 +3,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -33,6 +33,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns return ret; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index c59e84105b43..3658a757e255 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -7,7 +7,7 @@ #ifdef __aarch64__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -96,7 +96,7 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data( #define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data #endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #else /* !__aarch64__ */ diff --git a/arch/arm64/include/asm/vdso/processor.h b/arch/arm64/include/asm/vdso/processor.h index ff830b766ad2..7abb0cc81cd6 100644 --- a/arch/arm64/include/asm/vdso/processor.h +++ b/arch/arm64/include/asm/vdso/processor.h @@ -5,13 +5,13 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void cpu_relax(void) { asm volatile("yield" ::: "memory"); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 417aae5763a8..3f3c8eb74e2e 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -2,7 +2,7 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -22,6 +22,6 @@ void __arch_update_vdso_clock(struct vdso_clock *vc) /* The asm-generic header needs to be included after the definitions above */ #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index aa280f356b96..530af9620fdb 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -56,7 +56,7 @@ */ #define BOOT_CPU_FLAG_E2H BIT_ULL(32) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -161,6 +161,6 @@ static inline bool is_hyp_nvhe(void) return is_hyp_mode_available() && !is_kernel_in_hyp_mode(); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 139d5e87dc95..b35d954d50c3 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -245,7 +245,7 @@ #define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0)) #define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -338,6 +338,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) { return read_cpuid(CTR_EL0); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/tools/arch/arm64/include/asm/esr.h b/tools/arch/arm64/include/asm/esr.h index bd592ca81571..bbfbd1497a2f 100644 --- a/tools/arch/arm64/include/asm/esr.h +++ b/tools/arch/arm64/include/asm/esr.h @@ -385,7 +385,7 @@ #define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) #define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include static inline unsigned long esr_brk_comment(unsigned long esr) @@ -450,6 +450,6 @@ static inline bool esr_iss_is_eretab(unsigned long esr) } const char *esr_get_class_string(unsigned long esr); -#endif /* __ASSEMBLY */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ESR_H */ diff --git a/tools/arch/arm64/include/asm/gpr-num.h b/tools/arch/arm64/include/asm/gpr-num.h index 05da4a7c5788..a114e4f8209b 100644 --- a/tools/arch/arm64/include/asm/gpr-num.h +++ b/tools/arch/arm64/include/asm/gpr-num.h @@ -2,7 +2,7 @@ #ifndef __ASM_GPR_NUM_H #define __ASM_GPR_NUM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 .equ .L__gpr_num_x\num, \num @@ -11,7 +11,7 @@ .equ .L__gpr_num_xzr, 31 .equ .L__gpr_num_wzr, 31 -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __DEFINE_ASM_GPR_NUMS \ " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ @@ -21,6 +21,6 @@ " .equ .L__gpr_num_xzr, 31\n" \ " .equ .L__gpr_num_wzr, 31\n" -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_GPR_NUM_H */ diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index 65f2759ea27a..10c457d468e8 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -51,7 +51,7 @@ #ifndef CONFIG_BROKEN_GAS_INST -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ // The space separator is omitted so that __emit_inst(x) can be parsed as // either an assembler directive or an assembler macro argument. #define __emit_inst(x) .inst(x) @@ -70,11 +70,11 @@ (((x) >> 24) & 0x000000ff)) #endif /* CONFIG_CPU_BIG_ENDIAN */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __emit_inst(x) .long __INSTR_BSWAP(x) -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t" -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_BROKEN_GAS_INST */ @@ -1080,7 +1080,7 @@ #define ARM64_FEATURE_FIELD_BITS 4 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro mrs_s, rt, sreg __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index ed5f3892674c..a792a599b9d6 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -31,7 +31,7 @@ #define KVM_SPSR_FIQ 4 #define KVM_NR_SPSR 5 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include #include From df717b9564c8370e9198c9cd5d62e277a18563fb Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Fri, 19 Sep 2025 11:33:27 +0800 Subject: [PATCH 28/94] arm64: add unlikely hint to MTE async fault check in el0_svc_common Add unlikely() hint to the _TIF_MTE_ASYNC_FAULT flag check in el0_svc_common() since asynchronous MTE faults are expected to be rare occurrences during normal system call execution. This optimization helps the compiler to improve instruction caching and branch prediction for the common case where no asynchronous MTE faults are pending, while maintaining correct behavior for the exceptional case where such faults need to be handled prior to system call execution. Signed-off-by: Li Qiang Signed-off-by: Catalin Marinas --- arch/arm64/kernel/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index aba7ca6bca2d..c062badd1a56 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -96,7 +96,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * (Similarly for HVC and SMC elsewhere.) */ - if (flags & _TIF_MTE_ASYNC_FAULT) { + if (unlikely(flags & _TIF_MTE_ASYNC_FAULT)) { /* * Process the asynchronous tag check fault before the actual * syscall. do_notify_resume() will send a signal to userspace From 96ac403ea2b4ddfc0aa99815476c3ff13a23935e Mon Sep 17 00:00:00 2001 From: mrigendrachaubey Date: Thu, 6 Nov 2025 19:26:55 +0530 Subject: [PATCH 29/94] arm64: Fix typos and spelling errors in comments This patch corrects several minor typographical and spelling errors in comments across multiple arm64 source files. No functional changes. Signed-off-by: mrigendrachaubey Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 4 ++-- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/include/asm/el2_setup.h | 2 +- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/include/asm/suspend.h | 2 +- arch/arm64/kernel/acpi.c | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/ftrace.c | 2 +- arch/arm64/kernel/machine_kexec.c | 2 +- arch/arm64/kernel/probes/uprobes.c | 2 +- arch/arm64/kernel/sdei.c | 2 +- arch/arm64/kernel/smp.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- arch/arm64/kvm/arch_timer.c | 2 +- arch/arm64/kvm/hyp/nvhe/ffa.c | 2 +- arch/arm64/kvm/mmu.c | 2 +- arch/arm64/kvm/nested.c | 2 +- arch/arm64/net/bpf_jit_comp.c | 2 +- 18 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 23be85d93348..b8b1229e05e5 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -371,7 +371,7 @@ alternative_endif * [start, end) with dcache line size explicitly provided. * * op: operation passed to dc instruction - * domain: domain used in dsb instruciton + * domain: domain used in dsb instruction * start: starting virtual address of the region * end: end virtual address of the region * linesz: dcache line size @@ -412,7 +412,7 @@ alternative_endif * [start, end) * * op: operation passed to dc instruction - * domain: domain used in dsb instruciton + * domain: domain used in dsb instruction * start: starting virtual address of the region * end: end virtual address of the region * fixup: optional label to branch to on user fault diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index e223cbf350e4..71ba4a47ab18 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -199,7 +199,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * registers (e.g, SCTLR, TCR etc.) or patching the kernel via * alternatives. The kernel patching is batched and performed at later * point. The actions are always initiated only after the capability - * is finalised. This is usally denoted by "enabling" the capability. + * is finalised. This is usually denoted by "enabling" the capability. * The actions are initiated as follows : * a) Action is triggered on all online CPUs, after the capability is * finalised, invoked within the stop_machine() context from @@ -251,7 +251,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; #define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0)) #define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1)) /* - * The capabilitiy is detected on the Boot CPU and is used by kernel + * The capability is detected on the Boot CPU and is used by kernel * during early boot. i.e, the capability should be "detected" and * "enabled" as early as possibly on all booting CPUs. */ diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b37da3ee8529..f593ce79fe15 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -28,7 +28,7 @@ * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but * don't advertise it (they predate this relaxation). * - * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H + * Initialize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H * indicating whether the CPU is running in E2H mode. */ mrs_s x1, SYS_ID_AA64MMFR4_EL1 diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index aa89c2e67ebc..36bf4655bc3d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -432,7 +432,7 @@ bool pgattr_change_is_safe(pteval_t old, pteval_t new); * 1 0 | 1 0 1 * 1 1 | 0 1 x * - * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via + * When hardware DBM is not present, the software PTE_DIRTY bit is updated via * the page fault mechanism. Checking the dirty status of a pte becomes: * * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) @@ -598,7 +598,7 @@ static inline int pte_protnone(pte_t pte) /* * pte_present_invalid() tells us that the pte is invalid from HW * perspective but present from SW perspective, so the fields are to be - * interpretted as per the HW layout. The second 2 checks are the unique + * interpreted as per the HW layout. The second 2 checks are the unique * encoding that we use for PROT_NONE. It is insufficient to only use * the first check because we share the same encoding scheme with pmds * which support pmd_mkinvalid(), so can be present-invalid without diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 0cde2f473971..e65f33edf9d6 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -23,7 +23,7 @@ struct cpu_suspend_ctx { * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter(). * This data must survive until cpu_resume() is called. * - * This struct desribes the size and the layout of the saved cpu state. + * This struct describes the size and the layout of the saved cpu state. * The layout of the callee_saved_regs is defined by the implementation * of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed * in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7aca29e1d30b..d1ac0e58651c 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -133,7 +133,7 @@ static int __init acpi_fadt_sanity_check(void) /* * FADT is required on arm64; retrieve it to check its presence - * and carry out revision and ACPI HW reduced compliancy tests + * and carry out revision and ACPI HW reduced compliance tests */ status = acpi_get_table(ACPI_SIG_FADT, 0, &table); if (ACPI_FAILURE(status)) { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ed401ff79e3..77654643b952 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1002,7 +1002,7 @@ static void __init sort_ftr_regs(void) /* * Initialise the CPU feature register from Boot CPU values. - * Also initiliases the strict_mask for the register. + * Also initialises the strict_mask for the register. * Any bits that are not covered by an arm64_ftr_bits entry are considered * RES0 for the system-wide value, and must strictly match. */ diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 5adad37ab4fa..5a1554a44162 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -492,7 +492,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return ret; /* - * When using mcount, callsites in modules may have been initalized to + * When using mcount, callsites in modules may have been initialized to * call an arbitrary module PLT (which redirects to the _mcount stub) * rather than the ftrace PLT we'll use at runtime (which redirects to * the ftrace trampoline). We can ignore the old PLT when initializing diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 6f121a0164a4..239c16e3d02f 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -251,7 +251,7 @@ void crash_post_resume(void) * marked as Reserved as memory was allocated via memblock_reserve(). * * In hibernation, the pages which are Reserved and yet "nosave" are excluded - * from the hibernation iamge. crash_is_nosave() does thich check for crash + * from the hibernation image. crash_is_nosave() does thich check for crash * dump kernel and will reduce the total size of hibernation image. */ diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2799bdb2fb82..941668800aea 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -131,7 +131,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) struct uprobe_task *utask = current->utask; /* - * Task has received a fatal signal, so reset back to probbed + * Task has received a fatal signal, so reset back to probed * address. */ instruction_pointer_set(regs, utask->vaddr); diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 95169f7b6531..965b03fedfb8 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -202,7 +202,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) /* * do_sdei_event() returns one of: * SDEI_EV_HANDLED - success, return to the interrupted context. - * SDEI_EV_FAILED - failure, return this error code to firmare. + * SDEI_EV_FAILED - failure, return this error code to firmware. * virtual-address - success, return to this address. */ unsigned long __kprobes do_sdei_event(struct pt_regs *regs, diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 68cea3a4a35c..a0bfe624f899 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -350,7 +350,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) /* * Now that the dying CPU is beyond the point of no return w.r.t. - * in-kernel synchronisation, try to get the firwmare to help us to + * in-kernel synchronisation, try to get the firmware to help us to * verify that it has really left the kernel before we consider * clobbering anything it might still be using. */ @@ -523,7 +523,7 @@ int arch_register_cpu(int cpu) /* * Availability of the acpi handle is sufficient to establish - * that _STA has aleady been checked. No need to recheck here. + * that _STA has already been checked. No need to recheck here. */ c->hotpluggable = arch_cpu_is_hotpluggable(cpu); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 681939ef5d16..914282016069 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -922,7 +922,7 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne __show_regs(regs); /* - * We use nmi_panic to limit the potential for recusive overflows, and + * We use nmi_panic to limit the potential for recursive overflows, and * to get a better stack trace. */ nmi_panic(NULL, "kernel stack overflow"); diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index dbd74e4885e2..ebcb7f1a55ee 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -815,7 +815,7 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) tpt = tpc = true; /* - * For the poor sods that could not correctly substract one value + * For the poor sods that could not correctly subtract one value * from another, trap the full virtual timer and counter. */ if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 4e16f9b96f63..0ee7ebb2485e 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -115,7 +115,7 @@ static void ffa_set_retval(struct kvm_cpu_context *ctxt, * * FFA-1.3 introduces 64-bit variants of the CPU cycle management * interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests - * complete with SMC32 direct reponses which *should* allow us use the + * complete with SMC32 direct responses which *should* allow us use the * function ID sent by the caller to determine whether to return x8-x17. * * Note that we also cannot rely on function IDs in the response. diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7cc964af8d30..9a6a80c3fbe5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1755,7 +1755,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, /* * Check if this is non-struct page memory PFN, and cannot support - * CMOs. It could potentially be unsafe to access as cachable. + * CMOs. It could potentially be unsafe to access as cacheable. */ if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) { if (is_vma_cacheable) { diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 7a045cad6bdf..4c16dbbd6e90 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -85,7 +85,7 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) /* * Let's treat memory allocation failures as benign: If we fail to * allocate anything, return an error and keep the allocated array - * alive. Userspace may try to recover by intializing the vcpu + * alive. Userspace may try to recover by initializing the vcpu * again, and there is no reason to affect the whole VM for this. */ num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ab83089c3d8f..26b93e0f11b1 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -3053,7 +3053,7 @@ bool bpf_jit_supports_exceptions(void) /* We unwind through both kernel frames starting from within bpf_throw * call and BPF frames. Therefore we require FP unwinder to be enabled * to walk kernel frames and reach BPF frames in the stack trace. - * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y + * ARM64 kernel is always compiled with CONFIG_FRAME_POINTER=y */ return true; } From 337f7e3a4b4d60b41d3e9a389675bb9353925183 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 29 Oct 2025 15:17:42 +0800 Subject: [PATCH 30/94] arm64: Fix double word in comments Remove the repeated word "the" in comments. Signed-off-by: Bo Liu Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-ftrace.S | 2 +- arch/arm64/kvm/arm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 169ccf600066..025140caafe7 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -94,7 +94,7 @@ SYM_CODE_START(ftrace_caller) stp x29, x30, [sp, #FREGS_SIZE] add x29, sp, #FREGS_SIZE - /* Prepare arguments for the the tracer func */ + /* Prepare arguments for the tracer func */ sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn) mov x1, x9 // parent_ip (callsite's LR) mov x3, sp // regs diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f21d1b7f20f8..69d92e3e3d1a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2441,7 +2441,7 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(__icache_flags) = __icache_flags; kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; - /* Propagate the FGT state to the the nVHE side */ + /* Propagate the FGT state to the nVHE side */ kvm_nvhe_sym(hfgrtr_masks) = hfgrtr_masks; kvm_nvhe_sym(hfgwtr_masks) = hfgwtr_masks; kvm_nvhe_sym(hfgitr_masks) = hfgitr_masks; From b0a3f0e894f34e01f14770113f86019b1ef96040 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 13 Oct 2025 10:59:44 +0530 Subject: [PATCH 31/94] arm64/sysreg: Replace TCR_EL1 field macros This just replaces all used TCR_EL1 field macros with tools sysreg variant based fields and subsequently drops them from the header (pgtable-hwdef.h), although while retaining the ones used for KVM (represented via the sysreg tools format). Cc: Will Deacon Cc: Mark Brown Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 6 +- arch/arm64/include/asm/cputype.h | 2 +- arch/arm64/include/asm/mmu_context.h | 4 +- arch/arm64/include/asm/pgtable-hwdef.h | 125 ++++++++----------------- arch/arm64/include/asm/pgtable-prot.h | 2 +- arch/arm64/kernel/cpufeature.c | 4 +- arch/arm64/kernel/pi/map_kernel.c | 8 +- arch/arm64/kernel/vmcore_info.c | 2 +- arch/arm64/mm/proc.S | 36 ++++--- 9 files changed, 75 insertions(+), 114 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 23be85d93348..1392860a3c97 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -325,14 +325,14 @@ alternative_cb_end * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map */ .macro tcr_set_t0sz, valreg, t0sz - bfi \valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH + bfi \valreg, \t0sz, #TCR_EL1_T0SZ_SHIFT, #TCR_EL1_T0SZ_WIDTH .endm /* * tcr_set_t1sz - update TCR.T1SZ */ .macro tcr_set_t1sz, valreg, t1sz - bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH + bfi \valreg, \t1sz, #TCR_EL1_T1SZ_SHIFT, #TCR_EL1_T1SZ_WIDTH .endm /* @@ -589,7 +589,7 @@ alternative_endif .macro offset_ttbr1, ttbr, tmp #if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2) mrs \tmp, tcr_el1 - and \tmp, \tmp, #TCR_T1SZ_MASK + and \tmp, \tmp, #TCR_EL1_T1SZ_MASK cmp \tmp, #TCR_T1SZ(VA_BITS_MIN) orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET csel \ttbr, \tmp, \ttbr, eq diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9b00b75acbf2..f14eb942cb4a 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -247,7 +247,7 @@ /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX #define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0)) -#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) +#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_EL1_NFD1 | TCR_EL1_NFD0) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0dbe3b29049b..1b4ac7b23e18 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -73,10 +73,10 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr = read_sysreg(tcr_el1); - if ((tcr & TCR_T0SZ_MASK) == t0sz) + if ((tcr & TCR_EL1_T0SZ_MASK) == t0sz) return; - tcr &= ~TCR_T0SZ_MASK; + tcr &= ~TCR_EL1_T0SZ_MASK; tcr |= t0sz; write_sysreg(tcr, tcr_el1); isb(); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f3b77deedfa2..d49180bb7cb3 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -228,102 +228,53 @@ /* * TCR flags. */ -#define TCR_T0SZ_OFFSET 0 -#define TCR_T1SZ_OFFSET 16 -#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) -#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) -#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) -#define TCR_TxSZ_WIDTH 6 -#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) -#define TCR_T1SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET) +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_EL1_T0SZ_SHIFT) +#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_EL1_T1SZ_SHIFT) -#define TCR_EPD0_SHIFT 7 -#define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT) -#define TCR_IRGN0_SHIFT 8 -#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) -#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) -#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) -#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) -#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_T0SZ_MASK TCR_EL1_T0SZ_MASK +#define TCR_T1SZ_MASK TCR_EL1_T1SZ_MASK -#define TCR_EPD1_SHIFT 23 -#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT) -#define TCR_IRGN1_SHIFT 24 -#define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT) -#define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT) -#define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT) -#define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT) -#define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT) +#define TCR_EPD0_MASK TCR_EL1_EPD0_MASK +#define TCR_EPD1_MASK TCR_EL1_EPD1_MASK -#define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC) -#define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) -#define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT) -#define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA) -#define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK) +#define TCR_IRGN0_MASK TCR_EL1_IRGN0_MASK +#define TCR_IRGN0_WBWA (TCR_EL1_IRGN0_WBWA << TCR_EL1_IRGN0_SHIFT) +#define TCR_ORGN0_MASK TCR_EL1_ORGN0_MASK +#define TCR_ORGN0_WBWA (TCR_EL1_ORGN0_WBWA << TCR_EL1_ORGN0_SHIFT) -#define TCR_ORGN0_SHIFT 10 -#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) -#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) -#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) -#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) -#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_SH0_MASK TCR_EL1_SH0_MASK +#define TCR_SH0_INNER (TCR_EL1_SH0_INNER << TCR_EL1_SH0_SHIFT) -#define TCR_ORGN1_SHIFT 26 -#define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT) -#define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT) -#define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT) -#define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT) -#define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT) +#define TCR_SH1_MASK TCR_EL1_SH1_MASK -#define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC) -#define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA) -#define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT) -#define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA) -#define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK) +#define TCR_TG0_SHIFT TCR_EL1_TG0_SHIFT +#define TCR_TG0_MASK TCR_EL1_TG0_MASK +#define TCR_TG0_4K (TCR_EL1_TG0_4K << TCR_EL1_TG0_SHIFT) +#define TCR_TG0_64K (TCR_EL1_TG0_64K << TCR_EL1_TG0_SHIFT) +#define TCR_TG0_16K (TCR_EL1_TG0_16K << TCR_EL1_TG0_SHIFT) -#define TCR_SH0_SHIFT 12 -#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) -#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) +#define TCR_TG1_SHIFT TCR_EL1_TG1_SHIFT +#define TCR_TG1_MASK TCR_EL1_TG1_MASK +#define TCR_TG1_16K (TCR_EL1_TG1_16K << TCR_EL1_TG1_SHIFT) +#define TCR_TG1_4K (TCR_EL1_TG1_4K << TCR_EL1_TG1_SHIFT) +#define TCR_TG1_64K (TCR_EL1_TG1_64K << TCR_EL1_TG1_SHIFT) -#define TCR_SH1_SHIFT 28 -#define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT) -#define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT) -#define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER) - -#define TCR_TG0_SHIFT 14 -#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) -#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) -#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) -#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) - -#define TCR_TG1_SHIFT 30 -#define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT) -#define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT) -#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) -#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) - -#define TCR_IPS_SHIFT 32 -#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) -#define TCR_A1 (UL(1) << 22) -#define TCR_ASID16 (UL(1) << 36) -#define TCR_TBI0 (UL(1) << 37) -#define TCR_TBI1 (UL(1) << 38) -#define TCR_HA (UL(1) << 39) -#define TCR_HD (UL(1) << 40) -#define TCR_HPD0_SHIFT 41 -#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT) -#define TCR_HPD1_SHIFT 42 -#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT) -#define TCR_TBID0 (UL(1) << 51) -#define TCR_TBID1 (UL(1) << 52) -#define TCR_NFD0 (UL(1) << 53) -#define TCR_NFD1 (UL(1) << 54) -#define TCR_E0PD0 (UL(1) << 55) -#define TCR_E0PD1 (UL(1) << 56) -#define TCR_TCMA0 (UL(1) << 57) -#define TCR_TCMA1 (UL(1) << 58) -#define TCR_DS (UL(1) << 59) +#define TCR_IPS_SHIFT TCR_EL1_IPS_SHIFT +#define TCR_IPS_MASK TCR_EL1_IPS_MASK +#define TCR_A1 TCR_EL1_A1 +#define TCR_ASID16 TCR_EL1_AS +#define TCR_TBI0 TCR_EL1_TBI0 +#define TCR_TBI1 TCR_EL1_TBI1 +#define TCR_HA TCR_EL1_HA +#define TCR_HD TCR_EL1_HD +#define TCR_HPD0 TCR_EL1_HPD0 +#define TCR_HPD1 TCR_EL1_HPD1 +#define TCR_TBID0 TCR_EL1_TBID0 +#define TCR_TBID1 TCR_EL1_TBID1 +#define TCR_E0PD0 TCR_EL1_E0PD0 +#define TCR_E0PD1 TCR_EL1_E0PD1 +#define TCR_DS TCR_EL1_DS /* * TTBR. diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 85dceb1c66f4..21a3d3342283 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -84,7 +84,7 @@ extern unsigned long prot_ns_shared; #else static inline bool __pure lpa2_is_enabled(void) { - return read_tcr() & TCR_DS; + return read_tcr() & TCR_EL1_DS; } #define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ed401ff79e3..c8e33abfdaef 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1969,7 +1969,7 @@ static struct cpumask dbm_cpus __read_mostly; static inline void __cpu_enable_hw_dbm(void) { - u64 tcr = read_sysreg(tcr_el1) | TCR_HD; + u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_HD; write_sysreg(tcr, tcr_el1); isb(); @@ -2255,7 +2255,7 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) { if (this_cpu_has_cap(ARM64_HAS_E0PD)) - sysreg_clear_set(tcr_el1, 0, TCR_E0PD1); + sysreg_clear_set(tcr_el1, 0, TCR_EL1_E0PD1); } #endif /* CONFIG_ARM64_E0PD */ diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e8ddbde31a83..8ac26be77685 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -141,13 +141,13 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr) { u64 sctlr = read_sysreg(sctlr_el1); - u64 tcr = read_sysreg(tcr_el1) | TCR_DS; + u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_DS; u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1); u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - tcr &= ~TCR_IPS_MASK; - tcr |= parange << TCR_IPS_SHIFT; + tcr &= ~TCR_EL1_IPS_MASK; + tcr |= parange << TCR_EL1_IPS_SHIFT; asm(" msr sctlr_el1, %0 ;" " isb ;" @@ -263,7 +263,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt) } if (va_bits > VA_BITS_MIN) - sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits)); + sysreg_clear_set(tcr_el1, TCR_EL1_T1SZ_MASK, TCR_T1SZ(va_bits)); /* * The virtual KASLR displacement modulo 2MiB is decided by the diff --git a/arch/arm64/kernel/vmcore_info.c b/arch/arm64/kernel/vmcore_info.c index b19d5d6cb8b3..9619ece66b79 100644 --- a/arch/arm64/kernel/vmcore_info.c +++ b/arch/arm64/kernel/vmcore_info.c @@ -14,7 +14,7 @@ static inline u64 get_tcr_el1_t1sz(void); static inline u64 get_tcr_el1_t1sz(void) { - return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET; + return (read_sysreg(tcr_el1) & TCR_EL1_T1SZ_MASK) >> TCR_EL1_T1SZ_SHIFT; } void arch_crash_save_vmcoreinfo(void) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 86818511962b..01e868116448 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -23,15 +23,18 @@ #include #ifdef CONFIG_ARM64_64K_PAGES -#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K +#define TCR_TG_FLAGS ((TCR_EL1_TG0_64K << TCR_EL1_TG0_SHIFT) |\ + (TCR_EL1_TG1_64K << TCR_EL1_TG1_SHIFT)) #elif defined(CONFIG_ARM64_16K_PAGES) -#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K +#define TCR_TG_FLAGS ((TCR_EL1_TG0_16K << TCR_EL1_TG0_SHIFT) |\ + (TCR_EL1_TG1_16K << TCR_EL1_TG1_SHIFT)) #else /* CONFIG_ARM64_4K_PAGES */ -#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K +#define TCR_TG_FLAGS ((TCR_EL1_TG0_4K << TCR_EL1_TG0_SHIFT) |\ + (TCR_EL1_TG1_4K << TCR_EL1_TG1_SHIFT)) #endif #ifdef CONFIG_RANDOMIZE_BASE -#define TCR_KASLR_FLAGS TCR_NFD1 +#define TCR_KASLR_FLAGS TCR_EL1_NFD1 #else #define TCR_KASLR_FLAGS 0 #endif @@ -40,23 +43,30 @@ #define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA #ifdef CONFIG_KASAN_SW_TAGS -#define TCR_KASAN_SW_FLAGS TCR_TBI1 | TCR_TBID1 +#define TCR_KASAN_SW_FLAGS TCR_EL1_TBI1 | TCR_EL1_TBID1 #else #define TCR_KASAN_SW_FLAGS 0 #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS TCR_EL1_TCMA1 | TCR_EL1_TBI1 | TCR_EL1_TBID1 #elif defined(CONFIG_ARM64_MTE) /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on * TBI being enabled at EL1. */ -#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS TCR_EL1_TBI1 | TCR_EL1_TBID1 #else #define TCR_MTE_FLAGS 0 #endif +#define TCR_IRGN_WBWA ((TCR_EL1_IRGN0_WBWA << TCR_EL1_IRGN0_SHIFT) |\ + (TCR_EL1_IRGN1_WBWA << TCR_EL1_IRGN1_SHIFT)) +#define TCR_ORGN_WBWA ((TCR_EL1_ORGN0_WBWA << TCR_EL1_ORGN0_SHIFT) |\ + (TCR_EL1_ORGN1_WBWA << TCR_EL1_ORGN1_SHIFT)) +#define TCR_SHARED ((TCR_EL1_SH0_INNER << TCR_EL1_SH0_SHIFT) |\ + (TCR_EL1_SH1_INNER << TCR_EL1_SH1_SHIFT)) + /* * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and * changed during mte_cpu_setup to Normal Tagged if the system supports MTE. @@ -129,7 +139,7 @@ SYM_FUNC_START(cpu_do_resume) /* Don't change t0sz here, mask those bits when restoring */ mrs x7, tcr_el1 - bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + bfi x8, x7, TCR_EL1_T0SZ_SHIFT, TCR_EL1_T0SZ_WIDTH msr tcr_el1, x8 msr vbar_el1, x9 @@ -481,8 +491,8 @@ SYM_FUNC_START(__cpu_setup) tcr2 .req x15 mov_q mair, MAIR_EL1_SET mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \ - TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ - TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS + TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_EL1_AS | \ + TCR_EL1_TBI0 | TCR_EL1_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS mov tcr2, xzr tcr_clear_errata_bits tcr, x9, x5 @@ -492,7 +502,7 @@ SYM_FUNC_START(__cpu_setup) alternative_if ARM64_HAS_VA52 tcr_set_t1sz tcr, x9 #ifdef CONFIG_ARM64_LPA2 - orr tcr, tcr, #TCR_DS + orr tcr, tcr, #TCR_EL1_DS #endif alternative_else_nop_endif #endif @@ -500,7 +510,7 @@ alternative_else_nop_endif /* * Set the IPS bits in TCR_EL1. */ - tcr_compute_pa_size tcr, #TCR_IPS_SHIFT, x5, x6 + tcr_compute_pa_size tcr, #TCR_EL1_IPS_SHIFT, x5, x6 #ifdef CONFIG_ARM64_HW_AFDBM /* * Enable hardware update of the Access Flags bit. @@ -510,7 +520,7 @@ alternative_else_nop_endif mrs x9, ID_AA64MMFR1_EL1 ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 cbz x9, 1f - orr tcr, tcr, #TCR_HA // hardware Access flag update + orr tcr, tcr, #TCR_EL1_HA // hardware Access flag update #ifdef CONFIG_ARM64_HAFT cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT b.lt 1f From bfc184cb1ba7226b21ab26f0b220581895c5ac9e Mon Sep 17 00:00:00 2001 From: Chaitanya S Prakash Date: Fri, 17 Oct 2025 10:44:36 +0530 Subject: [PATCH 32/94] arm64/mm: Allow __create_pgd_mapping() to propagate pgtable_alloc() errors arch_add_memory() is used to hotplug memory into a system but as a part of its implementation it calls __create_pgd_mapping(), which uses pgtable_alloc() in order to build intermediate page tables. As this path was initally only used during early boot pgtable_alloc() is designed to BUG_ON() on failure. However, in the event that memory hotplug is attempted when the system's memory is extremely tight and the allocation were to fail, it would lead to panicking the system, which is not desirable. Hence update __create_pgd_mapping and all it's callers to be non void and propagate -ENOMEM on allocation failure to allow system to fail gracefully. But during early boot if there is an allocation failure, we want the system to panic, hence create a wrapper around __create_pgd_mapping() called early_create_pgd_mapping() which is designed to panic, if ret is non zero value. All the init calls are updated to use this wrapper rather than the modified __create_pgd_mapping() to restore functionality. Fixes: 4ab215061554 ("arm64: Add memory hotplug support") Reviewed-by: Dev Jain Reviewed-by: Ryan Roberts Reviewed-by: Kevin Brodsky Signed-off-by: Chaitanya S Prakash Signed-off-by: Linu Cherian Reviewed-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 214 ++++++++++++++++++++++++++++---------------- 1 file changed, 136 insertions(+), 78 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b8d37eb037fc..99555ebbab38 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -49,6 +49,8 @@ #define NO_CONT_MAPPINGS BIT(1) #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ +#define INVALID_PHYS_ADDR (-1ULL) + DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key); u64 kimage_voffset __ro_after_init; @@ -194,11 +196,11 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, } while (ptep++, addr += PAGE_SIZE, addr != end); } -static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, - unsigned long end, phys_addr_t phys, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { unsigned long next; pmd_t pmd = READ_ONCE(*pmdp); @@ -213,6 +215,8 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(TABLE_PTE); + if (pte_phys == INVALID_PHYS_ADDR) + return -ENOMEM; ptep = pte_set_fixmap(pte_phys); init_clear_pgtable(ptep); ptep += pte_index(addr); @@ -244,11 +248,13 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, * walker. */ pte_clear_fixmap(); + + return 0; } -static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) +static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -269,22 +275,29 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), READ_ONCE(pmd_val(*pmdp)))); } else { - alloc_init_cont_pte(pmdp, addr, next, phys, prot, - pgtable_alloc, flags); + int ret; + + ret = alloc_init_cont_pte(pmdp, addr, next, phys, prot, + pgtable_alloc, flags); + if (ret) + return ret; BUG_ON(pmd_val(old_pmd) != 0 && pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); } phys += next - addr; } while (pmdp++, addr = next, addr != end); + + return 0; } -static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, - unsigned long end, phys_addr_t phys, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret; unsigned long next; pud_t pud = READ_ONCE(*pudp); pmd_t *pmdp; @@ -301,6 +314,8 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(TABLE_PMD); + if (pmd_phys == INVALID_PHYS_ADDR) + return -ENOMEM; pmdp = pmd_set_fixmap(pmd_phys); init_clear_pgtable(pmdp); pmdp += pmd_index(addr); @@ -320,20 +335,26 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); + ret = init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); + if (ret) + goto out; pmdp += pmd_index(next) - pmd_index(addr); phys += next - addr; } while (addr = next, addr != end); +out: pmd_clear_fixmap(); + + return ret; } -static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret = 0; unsigned long next; p4d_t p4d = READ_ONCE(*p4dp); pud_t *pudp; @@ -346,6 +367,8 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(TABLE_PUD); + if (pud_phys == INVALID_PHYS_ADDR) + return -ENOMEM; pudp = pud_set_fixmap(pud_phys); init_clear_pgtable(pudp); pudp += pud_index(addr); @@ -375,8 +398,10 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), READ_ONCE(pud_val(*pudp)))); } else { - alloc_init_cont_pmd(pudp, addr, next, phys, prot, - pgtable_alloc, flags); + ret = alloc_init_cont_pmd(pudp, addr, next, phys, prot, + pgtable_alloc, flags); + if (ret) + goto out; BUG_ON(pud_val(old_pud) != 0 && pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); @@ -384,14 +409,18 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, phys += next - addr; } while (pudp++, addr = next, addr != end); +out: pud_clear_fixmap(); + + return ret; } -static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret; unsigned long next; pgd_t pgd = READ_ONCE(*pgdp); p4d_t *p4dp; @@ -404,6 +433,8 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, pgdval |= PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); p4d_phys = pgtable_alloc(TABLE_P4D); + if (p4d_phys == INVALID_PHYS_ADDR) + return -ENOMEM; p4dp = p4d_set_fixmap(p4d_phys); init_clear_pgtable(p4dp); p4dp += p4d_index(addr); @@ -418,8 +449,10 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, next = p4d_addr_end(addr, end); - alloc_init_pud(p4dp, addr, next, phys, prot, - pgtable_alloc, flags); + ret = alloc_init_pud(p4dp, addr, next, phys, prot, + pgtable_alloc, flags); + if (ret) + goto out; BUG_ON(p4d_val(old_p4d) != 0 && p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp))); @@ -427,15 +460,19 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, phys += next - addr; } while (p4dp++, addr = next, addr != end); +out: p4d_clear_fixmap(); + + return ret; } -static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret; unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); @@ -444,7 +481,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, * within a page, we cannot map the region as the caller expects. */ if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) - return; + return -EINVAL; phys &= PAGE_MASK; addr = virt & PAGE_MASK; @@ -452,25 +489,45 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, do { next = pgd_addr_end(addr, end); - alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, - flags); + ret = alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + if (ret) + return ret; phys += next - addr; } while (pgdp++, addr = next, addr != end); + + return 0; } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(enum pgtable_type), - int flags) +static int __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { + int ret; + mutex_lock(&fixmap_lock); - __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, - pgtable_alloc, flags); + ret = __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, + pgtable_alloc, flags); mutex_unlock(&fixmap_lock); + + return ret; } -#define INVALID_PHYS_ADDR (-1ULL) +static void early_create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) +{ + int ret; + + ret = __create_pgd_mapping(pgdir, phys, virt, size, prot, pgtable_alloc, + flags); + if (ret) + panic("Failed to create page tables\n"); +} static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp, enum pgtable_type pgtable_type) @@ -511,21 +568,13 @@ try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp) static phys_addr_t __maybe_unused pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type) { - phys_addr_t pa; - - pa = __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type); - BUG_ON(pa == INVALID_PHYS_ADDR); - return pa; + return __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type); } static phys_addr_t pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type) { - phys_addr_t pa; - - pa = __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type); - BUG_ON(pa == INVALID_PHYS_ADDR); - return pa; + return __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type); } static void split_contpte(pte_t *ptep) @@ -903,8 +952,8 @@ void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, @@ -918,8 +967,8 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, if (page_mappings_only) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; - __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc_special_mm, flags); + early_create_pgd_mapping(mm->pgd, phys, virt, size, prot, + pgd_pgtable_alloc_special_mm, flags); } static void update_mapping_prot(phys_addr_t phys, unsigned long virt, @@ -931,8 +980,8 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt, return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); /* flush the TLBs after updating live kernel mappings */ flush_tlb_kernel_range(virt, virt + size); @@ -941,8 +990,8 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, phys_addr_t end, pgprot_t prot, int flags) { - __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, - prot, early_pgtable_alloc, flags); + early_create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, + prot, early_pgtable_alloc, flags); } void __init mark_linear_text_alias_ro(void) @@ -1158,6 +1207,8 @@ static int __init __kpti_install_ng_mappings(void *__unused) remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); if (!cpu) { + int ret; + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); @@ -1178,9 +1229,11 @@ static int __init __kpti_install_ng_mappings(void *__unused) // covers the PTE[] page itself, the remaining entries are free // to be used as a ad-hoc fixmap. // - __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc), - KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, - kpti_ng_pgd_alloc, 0); + ret = __create_pgd_mapping_locked(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, + kpti_ng_pgd_alloc, 0); + if (ret) + panic("Failed to create page tables\n"); } cpu_install_idmap(); @@ -1233,9 +1286,9 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, - entry_tramp_text_size(), prot, - pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS); + early_create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) @@ -1877,23 +1930,28 @@ int arch_add_memory(int nid, u64 start, u64 size, if (force_pte_mapping()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; - __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, params->pgprot, pgd_pgtable_alloc_init_mm, - flags); + ret = __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), + size, params->pgprot, pgd_pgtable_alloc_init_mm, + flags); + if (ret) + goto err; memblock_clear_nomap(start, size); ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params); if (ret) - __remove_pgd_mapping(swapper_pg_dir, - __phys_to_virt(start), size); - else { - /* Address of hotplugged memory can be smaller */ - max_pfn = max(max_pfn, PFN_UP(start + size)); - max_low_pfn = max_pfn; - } + goto err; + /* Address of hotplugged memory can be smaller */ + max_pfn = max(max_pfn, PFN_UP(start + size)); + max_low_pfn = max_pfn; + + return 0; + +err: + __remove_pgd_mapping(swapper_pg_dir, + __phys_to_virt(start), size); return ret; } From 1b214452b6a725a1451b244bd2baf537954863c5 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Fri, 17 Oct 2025 10:44:37 +0530 Subject: [PATCH 33/94] arm64/mm: Rename try_pgd_pgtable_alloc_init_mm With BUG_ON in pgd_pgtable_alloc_init_mm moved up to higher layer, gfp flags is the only difference between try_pgd_pgtable_alloc_init_mm and pgd_pgtable_alloc_init_mm. Hence rename the "try" version to pgd_pgtable_alloc_init_mm_gfp. Reviewed-by: Dev Jain Reviewed-by: Ryan Roberts Reviewed-by: Kevin Brodsky Signed-off-by: Linu Cherian Reviewed-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 99555ebbab38..f604a7983de3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -560,7 +560,7 @@ static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp, } static phys_addr_t -try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp) +pgd_pgtable_alloc_init_mm_gfp(enum pgtable_type pgtable_type, gfp_t gfp) { return __pgd_pgtable_alloc(&init_mm, gfp, pgtable_type); } @@ -568,7 +568,7 @@ try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp) static phys_addr_t __maybe_unused pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type) { - return __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type); + return pgd_pgtable_alloc_init_mm_gfp(pgtable_type, GFP_PGTABLE_KERNEL); } static phys_addr_t @@ -595,7 +595,7 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) pte_t *ptep; int i; - pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE, gfp); + pte_phys = pgd_pgtable_alloc_init_mm_gfp(TABLE_PTE, gfp); if (pte_phys == INVALID_PHYS_ADDR) return -ENOMEM; ptep = (pte_t *)phys_to_virt(pte_phys); @@ -640,7 +640,7 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) pmd_t *pmdp; int i; - pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD, gfp); + pmd_phys = pgd_pgtable_alloc_init_mm_gfp(TABLE_PMD, gfp); if (pmd_phys == INVALID_PHYS_ADDR) return -ENOMEM; pmdp = (pmd_t *)phys_to_virt(pmd_phys); From c320dbb7c80d93a762c01b4a652d9292629869e7 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Fri, 17 Oct 2025 21:32:51 +0530 Subject: [PATCH 34/94] arm64/mm: Elide TLB flush in certain pte protection transitions Currently arm64 does an unconditional TLB flush in mprotect(). This is not required for some cases, for example, when changing from PROT_NONE to PROT_READ | PROT_WRITE (a real usecase - glibc malloc does this to emulate growing into the non-main heaps), and unsetting uffd-wp in a range. Therefore, implement pte_needs_flush() for arm64, which is already implemented by some other arches as well. Running a userspace program changing permissions back and forth between PROT_NONE and PROT_READ | PROT_WRITE, and measuring the average time taken for the none->rw transition, I get a reduction from 3.2 microseconds to 2.85 microseconds, giving a 12.3% improvement. Reviewed-by: Kefeng Wang Signed-off-by: Dev Jain Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 18a5dc0c9a54..8d6c9a867290 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -524,6 +524,33 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b { __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); } + +static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval) +{ + ptdesc_t diff = oldval ^ newval; + + /* invalid to valid transition requires no flush */ + if (!(oldval & PTE_VALID)) + return false; + + /* Transition in the SW bits requires no flush */ + diff &= ~PTE_SWBITS_MASK; + + return diff; +} + +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); +} +#define pte_needs_flush pte_needs_flush + +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); +} +#define huge_pmd_needs_flush huge_pmd_needs_flush + #endif #endif From 37cb0aab9068e8d7907822405fe5545a2cd7af0b Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 23 Oct 2025 13:44:28 -0700 Subject: [PATCH 35/94] arm64: mm: make linear mapping permission update more robust for patial range The commit fcf8dda8cc48 ("arm64: pageattr: Explicitly bail out when changing permissions for vmalloc_huge mappings") made permission update for partial range more robust. But the linear mapping permission update still assumes update the whole range by iterating from the first page all the way to the last page of the area. Make it more robust by updating the linear mapping permission from the page mapped by start address, and update the number of numpages. Reviewed-by: Ryan Roberts Reviewed-by: Dev Jain Signed-off-by: Yang Shi Signed-off-by: Catalin Marinas --- arch/arm64/mm/pageattr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 5135f2d66958..08ac96b9f846 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -148,7 +148,6 @@ static int change_memory_common(unsigned long addr, int numpages, unsigned long size = PAGE_SIZE * numpages; unsigned long end = start + size; struct vm_struct *area; - int i; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; @@ -184,8 +183,9 @@ static int change_memory_common(unsigned long addr, int numpages, */ if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { - for (i = 0; i < area->nr_pages; i++) { - __change_memory_common((u64)page_address(area->pages[i]), + unsigned long idx = (start - (unsigned long)area->addr) >> PAGE_SHIFT; + for (; numpages; idx++, numpages--) { + __change_memory_common((u64)page_address(area->pages[idx]), PAGE_SIZE, set_mask, clear_mask); } } From 0aab5772a53dd006c13ba629e8dc8816b7cd213d Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 22 Oct 2025 13:45:36 +0000 Subject: [PATCH 36/94] arm64/sysreg: Fix checks for incomplete sysreg definitions The checks for incomplete sysreg definitions were checking if the next_bit was greater than 0, which is incorrect and missed occasions where bit 0 hasn't been defined for a sysreg. The reason is that next_bit is -1 when all bits have been processed (LSB - 1). Change the checks to use >= 0, instead. Also, set next_bit in Mapping to -1 instead of 0 to match these new checks. There are no changes to the generated sysreg definitons as part of this change, and conveniently no definitions lack definitions for bit 0. Signed-off-by: Sascha Bischoff Reviewed-by: Mark Brown Signed-off-by: Catalin Marinas --- arch/arm64/tools/gen-sysreg.awk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index bbbb812603e8..172d04bb6cc3 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -133,7 +133,7 @@ $1 == "SysregFields" && block_current() == "Root" { $1 == "EndSysregFields" && block_current() == "SysregFields" { expect_fields(1) - if (next_bit > 0) + if (next_bit >= 0) fatal("Unspecified bits in " reg) define(reg "_RES0", "(" res0 ")") @@ -188,7 +188,7 @@ $1 == "Sysreg" && block_current() == "Root" { $1 == "EndSysreg" && block_current() == "Sysreg" { expect_fields(1) - if (next_bit > 0) + if (next_bit >= 0) fatal("Unspecified bits in " reg) if (res0 != null) @@ -225,7 +225,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" { print "/* For " reg " fields see " $2 " */" print "" - next_bit = 0 + next_bit = -1 res0 = null res1 = null unkn = null From fe2ef46995d5db49a37337f11fe2c6733676c24c Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 22 Oct 2025 13:45:36 +0000 Subject: [PATCH 37/94] arm64/sysreg: Support feature-specific fields with 'Prefix' descriptor Some system register field encodings change based on, for example the in-use architecture features, or the context in which they are accessed. In order to support these different field encodings, introduce the Prefix descriptor (Prefix, EndPrefix) for describing such sysregs. The Prefix descriptor can be used in the following way: Sysreg EXAMPLE 0 1 2 3 4 Prefix FEAT_A Field 63:0 Foo EndPrefix Prefix FEAT_B Field 63:1 Bar Res0 0 EndPrefix Field 63:0 Baz EndSysreg This will generate a single set of system register encodings (REG_, SYS_, ...), and then generate three sets of field definitions for the system register called EXAMPLE. The first set is prefixed by FEAT_A, e.g. FEAT_A_EXAMPLE_Foo. The second set is prefixed by FEAT_B, e.g., FEAT_B_EXAMPLE_Bar. The third set is not given a prefix at all, e.g. EXAMPLE_BAZ. For each set, a corresponding set of defines for Res0, Res1, and Unkn is generated. The intent for the final prefix-less fields is to describe default or legacy field encodings. This ensure that prefixed encodings can be added to already-present sysregs without affecting existing legacy code. Prefixed fields must be defined before those without a prefix, and this is checked by the generator. This ensures consisnt ordering within the sysregs definitions. The Prefix descriptor can be used within Sysreg or SysregFields blocks. Field, Res0, Res1, Unkn, Rax, SignedEnum, Enum can all be used within a Prefix block. Fields and Mapping can not. Fields that vary with features must be described as part of a SysregFields block, instead. Mappings, which are just a code comment, make little sense in this context, and have hence not been included. There are no changes to the generated system register definitions as part of this change. Signed-off-by: Sascha Bischoff Reviewed-by: Mark Brown Signed-off-by: Catalin Marinas --- arch/arm64/tools/gen-sysreg.awk | 126 ++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 38 deletions(-) diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 172d04bb6cc3..5ca81eaac2cb 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -44,21 +44,26 @@ function expect_fields(nf) { # Print a CPP macro definition, padded with spaces so that the macro bodies # line up in a column -function define(name, val) { - printf "%-56s%s\n", "#define " name, val +function define(prefix, name, val) { + printf "%-56s%s\n", "#define " prefix name, val +} + +# Same as above, but without a prefix +function define_reg(name, val) { + define(null, name, val) } # Print standard BITMASK/SHIFT/WIDTH CPP definitions for a field -function define_field(reg, field, msb, lsb) { - define(reg "_" field, "GENMASK(" msb ", " lsb ")") - define(reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")") - define(reg "_" field "_SHIFT", lsb) - define(reg "_" field "_WIDTH", msb - lsb + 1) +function define_field(prefix, reg, field, msb, lsb) { + define(prefix, reg "_" field, "GENMASK(" msb ", " lsb ")") + define(prefix, reg "_" field "_MASK", "GENMASK(" msb ", " lsb ")") + define(prefix, reg "_" field "_SHIFT", lsb) + define(prefix, reg "_" field "_WIDTH", msb - lsb + 1) } # Print a field _SIGNED definition for a field -function define_field_sign(reg, field, sign) { - define(reg "_" field "_SIGNED", sign) +function define_field_sign(prefix, reg, field, sign) { + define(prefix, reg "_" field "_SIGNED", sign) } # Parse a "[:]" string into the global variables @msb and @lsb @@ -128,6 +133,8 @@ $1 == "SysregFields" && block_current() == "Root" { next_bit = 63 + delete seen_prefixes + next } @@ -136,9 +143,9 @@ $1 == "EndSysregFields" && block_current() == "SysregFields" { if (next_bit >= 0) fatal("Unspecified bits in " reg) - define(reg "_RES0", "(" res0 ")") - define(reg "_RES1", "(" res1 ")") - define(reg "_UNKN", "(" unkn ")") + define(prefix, reg "_RES0", "(" res0 ")") + define(prefix, reg "_RES1", "(" res1 ")") + define(prefix, reg "_UNKN", "(" unkn ")") print "" reg = null @@ -170,19 +177,22 @@ $1 == "Sysreg" && block_current() == "Root" { fatal("Duplicate Sysreg definition for " reg) defined_regs[reg] = 1 - define("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2) - define("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")") + define_reg("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2) + define_reg("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")") - define("SYS_" reg "_Op0", op0) - define("SYS_" reg "_Op1", op1) - define("SYS_" reg "_CRn", crn) - define("SYS_" reg "_CRm", crm) - define("SYS_" reg "_Op2", op2) + define_reg("SYS_" reg "_Op0", op0) + define_reg("SYS_" reg "_Op1", op1) + define_reg("SYS_" reg "_CRn", crn) + define_reg("SYS_" reg "_CRm", crm) + define_reg("SYS_" reg "_Op2", op2) print "" + prefix = null next_bit = 63 + delete seen_prefixes + next } @@ -192,11 +202,11 @@ $1 == "EndSysreg" && block_current() == "Sysreg" { fatal("Unspecified bits in " reg) if (res0 != null) - define(reg "_RES0", "(" res0 ")") + define(prefix, reg "_RES0", "(" res0 ")") if (res1 != null) - define(reg "_RES1", "(" res1 ")") + define(prefix, reg "_RES1", "(" res1 ")") if (unkn != null) - define(reg "_UNKN", "(" unkn ")") + define(prefix, reg "_UNKN", "(" unkn ")") if (res0 != null || res1 != null || unkn != null) print "" @@ -209,6 +219,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" { res0 = null res1 = null unkn = null + prefix = null block_pop() next @@ -233,8 +244,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" { next } - -$1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(2) parse_bitdef(reg, "RES0", $2) field = "RES0_" msb "_" lsb @@ -244,7 +254,7 @@ $1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields next } -$1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(2) parse_bitdef(reg, "RES1", $2) field = "RES1_" msb "_" lsb @@ -254,7 +264,7 @@ $1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields next } -$1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(2) parse_bitdef(reg, "UNKN", $2) field = "UNKN_" msb "_" lsb @@ -264,62 +274,62 @@ $1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields next } -$1 == "Field" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Field" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(3) field = $3 parse_bitdef(reg, field, $2) - define_field(reg, field, msb, lsb) + define_field(prefix, reg, field, msb, lsb) print "" next } -$1 == "Raz" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Raz" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { expect_fields(2) parse_bitdef(reg, field, $2) next } -$1 == "SignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "SignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { block_push("Enum") expect_fields(3) field = $3 parse_bitdef(reg, field, $2) - define_field(reg, field, msb, lsb) - define_field_sign(reg, field, "true") + define_field(prefix, reg, field, msb, lsb) + define_field_sign(prefix, reg, field, "true") delete seen_enum_vals next } -$1 == "UnsignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "UnsignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { block_push("Enum") expect_fields(3) field = $3 parse_bitdef(reg, field, $2) - define_field(reg, field, msb, lsb) - define_field_sign(reg, field, "false") + define_field(prefix, reg, field, msb, lsb) + define_field_sign(prefix, reg, field, "false") delete seen_enum_vals next } -$1 == "Enum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Enum" && (block_current() == "Sysreg" || block_current() == "SysregFields" || block_current() == "Prefix") { block_push("Enum") expect_fields(3) field = $3 parse_bitdef(reg, field, $2) - define_field(reg, field, msb, lsb) + define_field(prefix, reg, field, msb, lsb) delete seen_enum_vals @@ -349,7 +359,47 @@ $1 == "EndEnum" && block_current() == "Enum" { fatal("Duplicate Enum value " val " for " name) seen_enum_vals[val] = 1 - define(reg "_" field "_" name, "UL(" val ")") + define(prefix, reg "_" field "_" name, "UL(" val ")") + next +} + +$1 == "Prefix" && (block_current() == "Sysreg" || block_current() == "SysregFields") { + block_push("Prefix") + + expect_fields(2) + + if (next_bit < 63) + fatal("Prefixed fields must precede non-prefixed fields (" reg ")") + + prefix = $2 "_" + + if (prefix in seen_prefixes) + fatal("Duplicate prefix " prefix " for " reg) + seen_prefixes[prefix] = 1 + + res0 = "UL(0)" + res1 = "UL(0)" + unkn = "UL(0)" + next_bit = 63 + + next +} + +$1 == "EndPrefix" && block_current() == "Prefix" { + expect_fields(1) + if (next_bit >= 0) + fatal("Unspecified bits in prefix " prefix " for " reg) + + define_resx_unkn(prefix, reg, res0, res1, unkn) + + prefix = null + res0 = "UL(0)" + res1 = "UL(0)" + unkn = "UL(0)" + next_bit = 63 + + block_pop() + next } From a0b130eedde0bc8c2d03932539e6753e2f0f70bc Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 22 Oct 2025 13:45:37 +0000 Subject: [PATCH 38/94] arm64/sysreg: Move generation of RES0/RES1/UNKN to function The RESx and UNKN define generation happens in two places (EndSysreg and EndSysregFields), and was using nearly identical code. Split this out into a function, and call that instead, rather then keeping the dupliated code. There are no changes to the generated sysregs as part of this change. Signed-off-by: Sascha Bischoff Signed-off-by: Catalin Marinas --- arch/arm64/tools/gen-sysreg.awk | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 5ca81eaac2cb..86860ab672dc 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -66,6 +66,18 @@ function define_field_sign(prefix, reg, field, sign) { define(prefix, reg "_" field "_SIGNED", sign) } +# Print the Res0, Res1, Unkn masks +function define_resx_unkn(prefix, reg, res0, res1, unkn) { + if (res0 != null) + define(prefix, reg "_RES0", "(" res0 ")") + if (res1 != null) + define(prefix, reg "_RES1", "(" res1 ")") + if (unkn != null) + define(prefix, reg "_UNKN", "(" unkn ")") + if (res0 != null || res1 != null || unkn != null) + print "" +} + # Parse a "[:]" string into the global variables @msb and @lsb function parse_bitdef(reg, field, bitdef, _bits) { @@ -143,10 +155,7 @@ $1 == "EndSysregFields" && block_current() == "SysregFields" { if (next_bit >= 0) fatal("Unspecified bits in " reg) - define(prefix, reg "_RES0", "(" res0 ")") - define(prefix, reg "_RES1", "(" res1 ")") - define(prefix, reg "_UNKN", "(" unkn ")") - print "" + define_resx_unkn(prefix, reg, res0, res1, unkn) reg = null res0 = null @@ -201,14 +210,7 @@ $1 == "EndSysreg" && block_current() == "Sysreg" { if (next_bit >= 0) fatal("Unspecified bits in " reg) - if (res0 != null) - define(prefix, reg "_RES0", "(" res0 ")") - if (res1 != null) - define(prefix, reg "_RES1", "(" res1 ")") - if (unkn != null) - define(prefix, reg "_UNKN", "(" unkn ")") - if (res0 != null || res1 != null || unkn != null) - print "" + define_resx_unkn(prefix, reg, res0, res1, unkn) reg = null op0 = null From a04fbfb8a175d4904727048b97fcdef12e392ed1 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 22 Oct 2025 13:45:37 +0000 Subject: [PATCH 39/94] arm64/sysreg: Add ICH_VMCR_EL2 Add the ICH_VMCR_EL2 register, which is required for the upcoming GICv5 KVM support. This register has two different field encodings, based on if it is used for GICv3 or GICv5-based VMs. The GICv5-specific field encodings are generated with a FEAT_GCIE prefix. This register is already described in the GICv3 KVM code directly. This will be ported across to use the generated encodings as part of an upcoming change. Signed-off-by: Sascha Bischoff Signed-off-by: Catalin Marinas --- arch/arm64/tools/sysreg | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 1c6cdf9d54bb..8921b51866d6 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -4669,6 +4669,27 @@ Field 1 V3 Field 0 En EndSysreg +Sysreg ICH_VMCR_EL2 3 4 12 11 7 +Prefix FEAT_GCIE +Res0 63:32 +Field 31:27 VPMR +Res0 26:1 +Field 0 EN +EndPrefix +Res0 63:32 +Field 31:24 VPMR +Field 23:21 VBPR0 +Field 20:18 VBPR1 +Res0 17:10 +Field 9 VEOIM +Res0 8:5 +Field 4 VCBPR +Field 3 VFIQEn +Field 2 VAckCtl +Field 1 VENG1 +Field 0 VENG0 +EndSysreg + Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg From 472800cd5e382ff69c4f9d4179580ed46ab0a436 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 15 Oct 2025 18:56:36 +0100 Subject: [PATCH 40/94] arm64/sme: Support disabling streaming mode via ptrace on SME only systems Currently it is not possible to disable streaming mode via ptrace on SME only systems, the interface for doing this is to write via NT_ARM_SVE but such writes will be rejected on a system without SVE support. Enable this functionality by allowing userspace to write SVE_PT_REGS_FPSIMD format data via NT_ARM_SVE with the vector length set to 0 on SME only systems. Such writes currently error since we require that a vector length is specified which should minimise the risk that existing software is relying on current behaviour. Reads are not supported since I am not aware of any use case for this and there is some risk that an existing userspace application may be confused if it reads NT_ARM_SVE on a system without SVE. Existing kernels will return FPSIMD formatted register state from NT_ARM_SVE if full SVE state is not stored, for example if the task has not used SVE. Returning a vector length of 0 would create a risk that software would try to do things like allocate space for register state with zero sizes, while returning a vector length of 128 bits would look like SVE is supported. It seems safer to just not make the changes to add read support. It remains possible for userspace to detect a SME only system via the ptrace interface only since reads of NT_ARM_SSVE and NT_ARM_ZA will succeed while reads of NT_ARM_SVE will fail. Read/write access to the FPSIMD registers in non-streaming mode is available via REGSET_FPR. sve_set_common() already avoids allocating SVE storage when doing a FPSIMD formatted write and allocating SME storage when doing a NT_ARM_SVE write so we change the function to validate the new case and skip setting a vector length for it. The aim is to make a minimally invasive change, no operation that would previously have succeeded will be affected, and we use a previously defined interface in new circumstances rather than define completely new ABI. Signed-off-by: Mark Brown Reviewed-by: David Spickett Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/sve.rst | 5 ++++ arch/arm64/kernel/ptrace.c | 40 ++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/Documentation/arch/arm64/sve.rst b/Documentation/arch/arm64/sve.rst index 28152492c29c..a61c9d0efe4d 100644 --- a/Documentation/arch/arm64/sve.rst +++ b/Documentation/arch/arm64/sve.rst @@ -402,6 +402,11 @@ The regset data starts with struct user_sve_header, containing: streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode if the target was not in streaming mode. +* On systems that do not support SVE it is permitted to use SETREGSET to + write SVE_PT_REGS_FPSIMD formatted data via NT_ARM_SVE, in this case the + vector length should be specified as 0. This allows streaming mode to be + disabled on systems with SME but not SVE. + * If any register data is provided along with SVE_PT_VL_ONEXEC then the registers data will be interpreted with the current vector length, not the vector length configured for use on exec. diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 4b001121c72d..b9bdd83fbbca 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -912,13 +912,39 @@ static int sve_set_common(struct task_struct *target, return -EINVAL; /* - * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by - * vec_set_vector_length(), which will also validate them for us: + * On systems without SVE we accept FPSIMD format writes with + * a VL of 0 to allow exiting streaming mode, otherwise a VL + * is required. */ - ret = vec_set_vector_length(target, type, header.vl, - ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); - if (ret) - return ret; + if (header.vl) { + /* + * If the system does not support SVE we can't + * configure a SVE VL. + */ + if (!system_supports_sve() && type == ARM64_VEC_SVE) + return -EINVAL; + + /* + * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are + * consumed by vec_set_vector_length(), which will + * also validate them for us: + */ + ret = vec_set_vector_length(target, type, header.vl, + ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16); + if (ret) + return ret; + } else { + /* If the system supports SVE we require a VL. */ + if (system_supports_sve()) + return -EINVAL; + + /* + * Only FPSIMD formatted data with no flags set is + * supported. + */ + if (header.flags != SVE_PT_REGS_FPSIMD) + return -EINVAL; + } /* Allocate SME storage if necessary, preserving any existing ZA/ZT state */ if (type == ARM64_VEC_SME) { @@ -1016,7 +1042,7 @@ static int sve_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) return -EINVAL; return sve_set_common(target, regset, pos, count, kbuf, ubuf, From eb9df6d69a96717fa9320c8d2be8e0033695f353 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 15 Oct 2025 18:56:37 +0100 Subject: [PATCH 41/94] kselftst/arm64: Test NT_ARM_SVE FPSIMD format writes on non-SVE systems In order to allow exiting streaming mode on systems with SME but not SVE we allow writes of FPSIMD format data via NT_ARM_SVE even when SVE is not supported, add a test case that covers this to sve-ptrace. We do not support reads. Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index e0fc3a001e28..f44d44618575 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -394,6 +394,58 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) free(svebuf); } +/* Write the FPSIMD registers via the SVE regset when SVE is not supported */ +static void ptrace_sve_fpsimd_no_sve(pid_t child) +{ + void *svebuf; + struct user_sve_header *sve; + struct user_fpsimd_state *fpsimd, new_fpsimd; + unsigned int i, j; + unsigned char *p; + int ret; + + svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD)); + if (!svebuf) { + ksft_test_result_fail("Failed to allocate FPSIMD buffer\n"); + return; + } + + /* On a system without SVE the VL should be set to 0 */ + memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD)); + sve = svebuf; + sve->flags = SVE_PT_REGS_FPSIMD; + sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD); + sve->vl = 0; + + /* Try to set a known FPSIMD state via PT_REGS_SVE */ + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j) + p[j] = j; + } + + ret = set_sve(child, &vec_types[0], sve); + ksft_test_result(ret == 0, "FPSIMD write via SVE\n"); + if (ret) { + ksft_test_result_skip("Verify FPSIMD write via SVE\n"); + goto out; + } + + /* Verify via the FPSIMD regset */ + if (get_fpsimd(child, &new_fpsimd)) { + ksft_test_result_skip("Verify FPSIMD write via SVE\n"); + goto out; + } + ksft_test_result(memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0, + "Verify FPSIMD write via SVE\n"); + +out: + free(svebuf); +} + /* Validate attempting to set SVE data and read SVE data */ static void ptrace_set_sve_get_sve_data(pid_t child, const struct vec_type *type, @@ -826,6 +878,15 @@ static int do_parent(pid_t child) } } + /* We support SVE writes of FPSMID format on SME only systems */ + if (!(getauxval(AT_HWCAP) & HWCAP_SVE) && + (getauxval(AT_HWCAP2) & HWCAP2_SME)) { + ptrace_sve_fpsimd_no_sve(child); + } else { + ksft_test_result_skip("FPSIMD write via SVE\n"); + ksft_test_result_skip("Verify FPSIMD write via SVE\n"); + } + ret = EXIT_SUCCESS; error: From a0245b42f881be6f0ddf678ce7a0d150362938c8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 15 Oct 2025 18:56:38 +0100 Subject: [PATCH 42/94] kselftest/arm64: Cover disabling streaming mode without SVE in fp-ptrace On a system which support SME but not SVE we can now disable streaming mode via ptrace by writing FPSIMD formatted data through NT_ARM_SVE with a VL of 0. Extend fp-ptrace to cover rather than skip these cases, relax the check for SVE writes of FPSIMD format data to not skip if SME is supported and accept 0 as the VL when performing the ptrace write. Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index a85c19e9524e..0114108ab25f 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1071,7 +1071,7 @@ static bool sve_write_supported(struct test_config *config) static bool sve_write_fpsimd_supported(struct test_config *config) { - if (!sve_supported()) + if (!sve_supported() && !sme_supported()) return false; if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA)) @@ -1231,9 +1231,6 @@ static void sve_write_fpsimd(pid_t child, struct test_config *config) vl = vl_expected(config); vq = __sve_vq_from_vl(vl); - if (!vl) - return; - iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { From 79301c7d605a10efea35af08167e0a362d8dffb1 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 14 Nov 2025 16:54:02 +0800 Subject: [PATCH 43/94] mm: add spurious fault fixing support for huge pmd The page faults may be spurious because of the racy access to the page table. For example, a non-populated virtual page is accessed on 2 CPUs simultaneously, thus the page faults are triggered on both CPUs. However, it's possible that one CPU (say CPU A) cannot find the reason for the page fault if the other CPU (say CPU B) has changed the page table before the PTE is checked on CPU A. Most of the time, the spurious page faults can be ignored safely. However, if the page fault is for the write access, it's possible that a stale read-only TLB entry exists in the local CPU and needs to be flushed on some architectures. This is called the spurious page fault fixing. In the current kernel, there is spurious fault fixing support for pte, but not for huge pmd because no architectures need it. But in the next patch in the series, we will change the write protection fault handling logic on arm64, so that some stale huge pmd entries may remain in the TLB. These entries need to be flushed via the huge pmd spurious fault fixing mechanism. Signed-off-by: Huang Ying Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Zi Yan Cc: Will Deacon Cc: Andrew Morton Cc: Vlastimil Babka Cc: Baolin Wang Cc: Ryan Roberts Cc: Yang Shi Cc: Christoph Lameter (Ampere) Cc: Dev Jain Cc: Barry Song Cc: Anshuman Khandual Cc: Kefeng Wang Cc: Kevin Brodsky Cc: Yin Fengwei Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Catalin Marinas --- include/linux/huge_mm.h | 2 +- include/linux/pgtable.h | 4 +++ mm/huge_memory.c | 33 ++++++++++++++-------- mm/internal.h | 2 +- mm/memory.c | 62 ++++++++++++++++++++++++++++++----------- 5 files changed, 73 insertions(+), 30 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f327d62fc985..887a632ce7a0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -11,7 +11,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); -void huge_pmd_set_accessed(struct vm_fault *vmf); +bool huge_pmd_set_accessed(struct vm_fault *vmf); int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, pud_t *dst_pud, pud_t *src_pud, unsigned long addr, struct vm_area_struct *vma); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 32e8457ad535..ee3148ef87f6 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1232,6 +1232,10 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) #define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address) #endif +#ifndef flush_tlb_fix_spurious_fault_pmd +#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) do { } while (0) +#endif + /* * When walking page tables, get the address of the next boundary, * or the end address of the range if that comes earlier. Although no diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1b81680b4225..6a8679907eaa 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1641,17 +1641,30 @@ vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, EXPORT_SYMBOL_GPL(vmf_insert_folio_pud); #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ -void touch_pmd(struct vm_area_struct *vma, unsigned long addr, +/** + * touch_pmd - Mark page table pmd entry as accessed and dirty (for write) + * @vma: The VMA covering @addr + * @addr: The virtual address + * @pmd: pmd pointer into the page table mapping @addr + * @write: Whether it's a write access + * + * Return: whether the pmd entry is changed + */ +bool touch_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, bool write) { - pmd_t _pmd; + pmd_t entry; - _pmd = pmd_mkyoung(*pmd); + entry = pmd_mkyoung(*pmd); if (write) - _pmd = pmd_mkdirty(_pmd); + entry = pmd_mkdirty(entry); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, - pmd, _pmd, write)) + pmd, entry, write)) { update_mmu_cache_pmd(vma, addr, pmd); + return true; + } + + return false; } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -1841,18 +1854,14 @@ void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) } #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ -void huge_pmd_set_accessed(struct vm_fault *vmf) +bool huge_pmd_set_accessed(struct vm_fault *vmf) { bool write = vmf->flags & FAULT_FLAG_WRITE; - vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) - goto unlock; + return false; - touch_pmd(vmf->vma, vmf->address, vmf->pmd, write); - -unlock: - spin_unlock(vmf->ptl); + return touch_pmd(vmf->vma, vmf->address, vmf->pmd, write); } static vm_fault_t do_huge_zero_wp_pmd(struct vm_fault *vmf) diff --git a/mm/internal.h b/mm/internal.h index 1561fc2ff5b8..27ad37a41868 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1402,7 +1402,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs, */ void touch_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud, bool write); -void touch_pmd(struct vm_area_struct *vma, unsigned long addr, +bool touch_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, bool write); /* diff --git a/mm/memory.c b/mm/memory.c index 74b45e258323..6e5a08c4fd2e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6115,6 +6115,45 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) return VM_FAULT_FALLBACK; } +/* + * The page faults may be spurious because of the racy access to the + * page table. For example, a non-populated virtual page is accessed + * on 2 CPUs simultaneously, thus the page faults are triggered on + * both CPUs. However, it's possible that one CPU (say CPU A) cannot + * find the reason for the page fault if the other CPU (say CPU B) has + * changed the page table before the PTE is checked on CPU A. Most of + * the time, the spurious page faults can be ignored safely. However, + * if the page fault is for the write access, it's possible that a + * stale read-only TLB entry exists in the local CPU and needs to be + * flushed on some architectures. This is called the spurious page + * fault fixing. + * + * Note: flush_tlb_fix_spurious_fault() is defined as flush_tlb_page() + * by default and used as such on most architectures, while + * flush_tlb_fix_spurious_fault_pmd() is defined as NOP by default and + * used as such on most architectures. + */ +static void fix_spurious_fault(struct vm_fault *vmf, + enum pgtable_level ptlevel) +{ + /* Skip spurious TLB flush for retried page fault */ + if (vmf->flags & FAULT_FLAG_TRIED) + return; + /* + * This is needed only for protection faults but the arch code + * is not yet telling us if this is a protection fault or not. + * This still avoids useless tlb flushes for .text page faults + * with threads. + */ + if (vmf->flags & FAULT_FLAG_WRITE) { + if (ptlevel == PGTABLE_LEVEL_PTE) + flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, + vmf->pte); + else + flush_tlb_fix_spurious_fault_pmd(vmf->vma, vmf->address, + vmf->pmd); + } +} /* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most @@ -6196,23 +6235,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) } entry = pte_mkyoung(entry); if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, - vmf->flags & FAULT_FLAG_WRITE)) { + vmf->flags & FAULT_FLAG_WRITE)) update_mmu_cache_range(vmf, vmf->vma, vmf->address, vmf->pte, 1); - } else { - /* Skip spurious TLB flush for retried page fault */ - if (vmf->flags & FAULT_FLAG_TRIED) - goto unlock; - /* - * This is needed only for protection faults but the arch code - * is not yet telling us if this is a protection fault or not. - * This still avoids useless tlb flushes for .text page faults - * with threads. - */ - if (vmf->flags & FAULT_FLAG_WRITE) - flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, - vmf->pte); - } + else + fix_spurious_fault(vmf, PGTABLE_LEVEL_PTE); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; @@ -6309,7 +6336,10 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { - huge_pmd_set_accessed(&vmf); + vmf.ptl = pmd_lock(mm, vmf.pmd); + if (!huge_pmd_set_accessed(&vmf)) + fix_spurious_fault(&vmf, PGTABLE_LEVEL_PMD); + spin_unlock(vmf.ptl); return 0; } } From cb1fa2e999558fd93b519f7c4c16e75e805af1e6 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 14 Nov 2025 16:54:03 +0800 Subject: [PATCH 44/94] arm64, tlbflush: don't TLBI broadcast if page reused in write fault A multi-thread customer workload with large memory footprint uses fork()/exec() to run some external programs every tens seconds. When running the workload on an arm64 server machine, it's observed that quite some CPU cycles are spent in the TLB flushing functions. While running the workload on the x86_64 server machine, it's not. This causes the performance on arm64 to be much worse than that on x86_64. During the workload running, after fork()/exec() write-protects all pages in the parent process, memory writing in the parent process will cause a write protection fault. Then the page fault handler will make the PTE/PDE writable if the page can be reused, which is almost always true in the workload. On arm64, to avoid the write protection fault on other CPUs, the page fault handler flushes the TLB globally with TLBI broadcast after changing the PTE/PDE. However, this isn't always necessary. Firstly, it's safe to leave some stale read-only TLB entries as long as they will be flushed finally. Secondly, it's quite possible that the original read-only PTE/PDEs aren't cached in remote TLB at all if the memory footprint is large. In fact, on x86_64, the page fault handler doesn't flush the remote TLB in this situation, which benefits the performance a lot. To improve the performance on arm64, make the write protection fault handler flush the TLB locally instead of globally via TLBI broadcast after making the PTE/PDE writable. If there are stale read-only TLB entries in the remote CPUs, the page fault handler on these CPUs will regard the page fault as spurious and flush the stale TLB entries. To test the patchset, make the usemem.c from vm-scalability (https://git.kernel.org/pub/scm/linux/kernel/git/wfg/vm-scalability.git). support calling fork()/exec() periodically. To mimic the behavior of the customer workload, run usemem with 4 threads, access 100GB memory, and call fork()/exec() every 40 seconds. Test results show that with the patchset the score of usemem improves ~40.6%. The cycles% of TLB flush functions reduces from ~50.5% to ~0.3% in perf profile. Signed-off-by: Huang Ying Reviewed-by: Ryan Roberts Reviewed-by: Barry Song Acked-by: Zi Yan Cc: Will Deacon Cc: Andrew Morton Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Vlastimil Babka Cc: Baolin Wang Cc: Yang Shi Cc: Christoph Lameter (Ampere) Cc: Dev Jain Cc: Anshuman Khandual Cc: Kefeng Wang Cc: Kevin Brodsky Cc: Yin Fengwei Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Reviewed-by: David Hildenbrand (Red Hat) Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 14 +++++--- arch/arm64/include/asm/tlbflush.h | 56 +++++++++++++++++++++++++++++++ arch/arm64/mm/contpte.c | 3 +- arch/arm64/mm/fault.c | 8 +++-- 4 files changed, 72 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index aa89c2e67ebc..25b3c31edb6c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -130,12 +130,16 @@ static inline void arch_leave_lazy_mmu_mode(void) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * Outside of a few very special situations (e.g. hibernation), we always - * use broadcast TLB invalidation instructions, therefore a spurious page - * fault on one CPU which has been handled concurrently by another CPU - * does not need to perform additional invalidation. + * We use local TLB invalidation instruction when reusing page in + * write protection fault handler to avoid TLBI broadcast in the hot + * path. This will cause spurious page faults if stale read-only TLB + * entries exist. */ -#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) +#define flush_tlb_fix_spurious_fault(vma, address, ptep) \ + local_flush_tlb_page_nonotify(vma, address) + +#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \ + local_flush_tlb_page_nonotify(vma, address) /* * ZERO_PAGE is a global shared page that is always zero: used diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 18a5dc0c9a54..682a01df71d2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -249,6 +249,19 @@ static inline unsigned long get_trans_granule(void) * cannot be easily determined, the value TLBI_TTL_UNKNOWN will * perform a non-hinted invalidation. * + * local_flush_tlb_page(vma, addr) + * Local variant of flush_tlb_page(). Stale TLB entries may + * remain in remote CPUs. + * + * local_flush_tlb_page_nonotify(vma, addr) + * Same as local_flush_tlb_page() except MMU notifier will not be + * called. + * + * local_flush_tlb_contpte(vma, addr) + * Invalidate the virtual-address range + * '[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU + * for the user address space corresponding to 'vma->mm'. Stale + * TLB entries may remain in remote CPUs. * * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented * on top of these routines, since that is our interface to the mmu_gather @@ -282,6 +295,33 @@ static inline void flush_tlb_mm(struct mm_struct *mm) mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } +static inline void __local_flush_tlb_page_nonotify_nosync(struct mm_struct *mm, + unsigned long uaddr) +{ + unsigned long addr; + + dsb(nshst); + addr = __TLBI_VADDR(uaddr, ASID(mm)); + __tlbi(vale1, addr); + __tlbi_user(vale1, addr); +} + +static inline void local_flush_tlb_page_nonotify(struct vm_area_struct *vma, + unsigned long uaddr) +{ + __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr); + dsb(nsh); +} + +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long uaddr) +{ + __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr); + mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, uaddr & PAGE_MASK, + (uaddr & PAGE_MASK) + PAGE_SIZE); + dsb(nsh); +} + static inline void __flush_tlb_page_nosync(struct mm_struct *mm, unsigned long uaddr) { @@ -472,6 +512,22 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ish); } +static inline void local_flush_tlb_contpte(struct vm_area_struct *vma, + unsigned long addr) +{ + unsigned long asid; + + addr = round_down(addr, CONT_PTE_SIZE); + + dsb(nshst); + asid = ASID(vma->vm_mm); + __flush_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid, + 3, true, lpa2_is_enabled()); + mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr, + addr + CONT_PTE_SIZE); + dsb(nsh); +} + static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index c0557945939c..589bcf878938 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c @@ -622,8 +622,7 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma, __ptep_set_access_flags(vma, addr, ptep, entry, 0); if (dirty) - __flush_tlb_range(vma, start_addr, addr, - PAGE_SIZE, true, 3); + local_flush_tlb_contpte(vma, start_addr); } else { __contpte_try_unfold(vma->vm_mm, addr, ptep, orig_pte); __ptep_set_access_flags(vma, addr, ptep, entry, dirty); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d816ff44faff..4ecdfa6bcdbb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -233,9 +233,13 @@ int __ptep_set_access_flags(struct vm_area_struct *vma, pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); } while (pteval != old_pteval); - /* Invalidate a stale read-only entry */ + /* + * Invalidate the local stale read-only entry. Remote stale entries + * may still cause page faults and be invalidated via + * flush_tlb_fix_spurious_fault(). + */ if (dirty) - flush_tlb_page(vma, address); + local_flush_tlb_page(vma, address); return 1; } From 796e29b857aed89f83f70f2c199585c45db5dc0f Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:31 +0000 Subject: [PATCH 45/94] ACPI / PPTT: Add a helper to fill a cpumask from a processor container The ACPI MPAM table uses the UID of a processor container specified in the PPTT to indicate the subset of CPUs and cache topology that can access each MPAM System Component (MSC). This information is not directly useful to the kernel. The equivalent cpumask is needed instead. Add a helper to find the processor container by its id, then walk the possible CPUs to fill a cpumask with the CPUs that have this processor container as a parent. CC: Dave Martin Reviewed-by: Sudeep Holla Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Hanjun Guo Reviewed-by: Jeremy Linton Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/acpi/pptt.c | 84 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 3 ++ 2 files changed, 87 insertions(+) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 54676e3d82dd..b8248c0092fe 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -817,3 +817,87 @@ int find_acpi_cpu_topology_hetero_id(unsigned int cpu) return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, ACPI_PPTT_ACPI_IDENTICAL); } + +/** + * acpi_pptt_get_child_cpus() - Find all the CPUs below a PPTT + * processor hierarchy node + * + * @table_hdr: A reference to the PPTT table + * @parent_node: A pointer to the processor hierarchy node in the + * table_hdr + * @cpus: A cpumask to fill with the CPUs below @parent_node + * + * Walks up the PPTT from every possible CPU to find if the provided + * @parent_node is a parent of this CPU. + */ +static void acpi_pptt_get_child_cpus(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *parent_node, + cpumask_t *cpus) +{ + struct acpi_pptt_processor *cpu_node; + u32 acpi_id; + int cpu; + + cpumask_clear(cpus); + + for_each_possible_cpu(cpu) { + acpi_id = get_acpi_id_for_cpu(cpu); + cpu_node = acpi_find_processor_node(table_hdr, acpi_id); + + while (cpu_node) { + if (cpu_node == parent_node) { + cpumask_set_cpu(cpu, cpus); + break; + } + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } + } +} + +/** + * acpi_pptt_get_cpus_from_container() - Populate a cpumask with all CPUs in a + * processor container + * @acpi_cpu_id: The UID of the processor container + * @cpus: The resulting CPU mask + * + * Find the specified Processor Container, and fill @cpus with all the cpus + * below it. + * + * Not all 'Processor Hierarchy' entries in the PPTT are either a CPU + * or a Processor Container, they may exist purely to describe a + * Private resource. CPUs have to be leaves, so a Processor Container + * is a non-leaf that has the 'ACPI Processor ID valid' flag set. + */ +void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) +{ + struct acpi_table_header *table_hdr; + struct acpi_subtable_header *entry; + unsigned long table_end; + u32 proc_sz; + + cpumask_clear(cpus); + + table_hdr = acpi_get_pptt(); + if (!table_hdr) + return; + + table_end = (unsigned long)table_hdr + table_hdr->length; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, + sizeof(struct acpi_table_pptt)); + proc_sz = sizeof(struct acpi_pptt_processor); + while ((unsigned long)entry + proc_sz <= table_end) { + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR) { + struct acpi_pptt_processor *cpu_node; + + cpu_node = (struct acpi_pptt_processor *)entry; + if (cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID && + !acpi_pptt_leaf_node(table_hdr, cpu_node) && + cpu_node->acpi_processor_id == acpi_cpu_id) { + acpi_pptt_get_child_cpus(table_hdr, cpu_node, cpus); + break; + } + } + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, + entry->length); + } +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5ff5d99f6ead..4752ebd48132 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1541,6 +1541,7 @@ int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); +void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1562,6 +1563,8 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } +static inline void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, + cpumask_t *cpus) { } #endif void acpi_arch_init(void); From eeec7845e966f9278973c02573e3587e6733a4dd Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:32 +0000 Subject: [PATCH 46/94] ACPI / PPTT: Stop acpi_count_levels() expecting callers to clear levels In acpi_count_levels(), the initial value of *levels passed by the caller is really an implementation detail of acpi_count_levels(), so it is unreasonable to expect the callers of this function to know what to pass in for this parameter. The only sensible initial value is 0, which is what the only upstream caller (acpi_get_cache_info()) passes. Use a local variable for the starting cache level in acpi_count_levels(), and pass the result back to the caller via the function return value. Get rid of the levels parameter, which has no remaining purpose. Fix acpi_get_cache_info() to match. Suggested-by: Jonathan Cameron Signed-off-by: James Morse Reviewed-by: Lorenzo Pieralisi Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Hanjun Guo Reviewed-by: Jeremy Linton Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/acpi/pptt.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index b8248c0092fe..2856254e29d7 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -177,14 +177,14 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr, } /** - * acpi_count_levels() - Given a PPTT table, and a CPU node, count the cache - * levels and split cache levels (data/instruction). + * acpi_count_levels() - Given a PPTT table, and a CPU node, count the + * total number of levels and split cache levels (data/instruction). * @table_hdr: Pointer to the head of the PPTT table * @cpu_node: processor node we wish to count caches for - * @levels: Number of levels if success. * @split_levels: Number of split cache levels (data/instruction) if * success. Can by NULL. * + * Return: number of levels. * Given a processor node containing a processing unit, walk into it and count * how many levels exist solely for it, and then walk up each level until we hit * the root node (ignore the package level because it may be possible to have @@ -192,14 +192,18 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr, * split cache levels (data/instruction) that exist at each level on the way * up. */ -static void acpi_count_levels(struct acpi_table_header *table_hdr, - struct acpi_pptt_processor *cpu_node, - unsigned int *levels, unsigned int *split_levels) +static int acpi_count_levels(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node, + unsigned int *split_levels) { + int current_level = 0; + do { - acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0); + acpi_find_cache_level(table_hdr, cpu_node, ¤t_level, split_levels, 0, 0); cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); } while (cpu_node); + + return current_level; } /** @@ -645,7 +649,7 @@ int acpi_get_cache_info(unsigned int cpu, unsigned int *levels, if (!cpu_node) return -ENOENT; - acpi_count_levels(table, cpu_node, levels, split_levels); + *levels = acpi_count_levels(table, cpu_node, split_levels); pr_debug("Cache Setup: last_level=%d split_levels=%d\n", *levels, split_levels ? *split_levels : -1); From cfc085af8398479e855b86236a21e1d870d51184 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:22:33 +0000 Subject: [PATCH 47/94] ACPI / PPTT: Add acpi_pptt_cache_v1_full to use pptt cache as one structure In actbl2.h, acpi_pptt_cache describes the fields in the original Cache Type Structure. In PPTT table version 3 a new field was added at the end, cache_id. This is described in acpi_pptt_cache_v1 but rather than including all v1 fields it just includes this one. In lieu of this being fixed in acpica, introduce acpi_pptt_cache_v1_full to contain all the fields of the Cache Type Structure . Update the existing code to use this new struct. This simplifies the code and removes a non-standard use of ACPI_ADD_PTR. Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Hanjun Guo Reviewed-by: Jeremy Linton Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/acpi/pptt.c | 45 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 2856254e29d7..ef39b176dc00 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -21,6 +21,25 @@ #include #include +/* + * The acpi_pptt_cache_v1 in actbl2.h, which is imported from acpica, + * only contains the cache_id field rather than all the fields of the + * Cache Type Structure. Use this alternative structure until it is + * resolved in acpica. + */ +struct acpi_pptt_cache_v1_full { + struct acpi_subtable_header header; + u16 reserved; + u32 flags; + u32 next_level_of_cache; + u32 size; + u32 number_of_sets; + u8 associativity; + u8 attributes; + u16 line_size; + u32 cache_id; +} __packed; + static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr, u32 pptt_ref) { @@ -56,6 +75,18 @@ static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header *table_ return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, pptt_ref); } +static struct acpi_pptt_cache_v1_full *upgrade_pptt_cache(struct acpi_pptt_cache *cache) +{ + if (cache->header.length < sizeof(struct acpi_pptt_cache_v1_full)) + return NULL; + + /* No use for v1 if the only additional field is invalid */ + if (!(cache->flags & ACPI_PPTT_CACHE_ID_VALID)) + return NULL; + + return (struct acpi_pptt_cache_v1_full *)cache; +} + static struct acpi_subtable_header *acpi_get_pptt_resource(struct acpi_table_header *table_hdr, struct acpi_pptt_processor *node, int resource) @@ -355,7 +386,6 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta * @this_leaf: Kernel cache info structure being updated * @found_cache: The PPTT node describing this cache instance * @cpu_node: A unique reference to describe this cache instance - * @revision: The revision of the PPTT table * * The ACPI spec implies that the fields in the cache structures are used to * extend and correct the information probed from the hardware. Lets only @@ -365,10 +395,9 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta */ static void update_cache_properties(struct cacheinfo *this_leaf, struct acpi_pptt_cache *found_cache, - struct acpi_pptt_processor *cpu_node, - u8 revision) + struct acpi_pptt_processor *cpu_node) { - struct acpi_pptt_cache_v1* found_cache_v1; + struct acpi_pptt_cache_v1_full *found_cache_v1; this_leaf->fw_token = cpu_node; if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) @@ -418,9 +447,8 @@ static void update_cache_properties(struct cacheinfo *this_leaf, found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) this_leaf->type = CACHE_TYPE_UNIFIED; - if (revision >= 3 && (found_cache->flags & ACPI_PPTT_CACHE_ID_VALID)) { - found_cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1, - found_cache, sizeof(struct acpi_pptt_cache)); + found_cache_v1 = upgrade_pptt_cache(found_cache); + if (found_cache_v1) { this_leaf->id = found_cache_v1->cache_id; this_leaf->attributes |= CACHE_ID; } @@ -445,8 +473,7 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table, pr_debug("found = %p %p\n", found_cache, cpu_node); if (found_cache) update_cache_properties(this_leaf, found_cache, - ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)), - table->revision); + ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table))); index++; } From 41a7bb39fede8ecc053c261b86cdfadea45b7b10 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:34 +0000 Subject: [PATCH 48/94] ACPI / PPTT: Find cache level by cache-id The MPAM table identifies caches by id. The MPAM driver also wants to know the cache level to determine if the platform is of the shape that can be managed via resctrl. Cacheinfo has this information, but only for CPUs that are online. Waiting for all CPUs to come online is a problem for platforms where CPUs are brought online late by user-space. Add a helper that walks every possible cache, until it finds the one identified by cache-id, then return the level. Signed-off-by: James Morse Signed-off-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Jeremy Linton Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Catalin Marinas --- drivers/acpi/pptt.c | 66 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 5 ++++ 2 files changed, 71 insertions(+) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index ef39b176dc00..da49b56a1ef2 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -932,3 +932,69 @@ void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) entry->length); } } + +/** + * find_acpi_cache_level_from_id() - Get the level of the specified cache + * @cache_id: The id field of the cache + * + * Determine the level relative to any CPU for the cache identified by + * cache_id. This allows the property to be found even if the CPUs are offline. + * + * The returned level can be used to group caches that are peers. + * + * The PPTT table must be rev 3 or later. + * + * If one CPU's L2 is shared with another CPU as L3, this function will return + * an unpredictable value. + * + * Return: -ENOENT if the PPTT doesn't exist, the revision isn't supported or + * the cache cannot be found. + * Otherwise returns a value which represents the level of the specified cache. + */ +int find_acpi_cache_level_from_id(u32 cache_id) +{ + int cpu; + struct acpi_table_header *table; + + table = acpi_get_pptt(); + if (!table) + return -ENOENT; + + if (table->revision < 3) + return -ENOENT; + + for_each_possible_cpu(cpu) { + bool empty; + int level = 1; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct acpi_pptt_cache *cache; + struct acpi_pptt_processor *cpu_node; + + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (!cpu_node) + continue; + + do { + int cache_type[] = {CACHE_TYPE_INST, CACHE_TYPE_DATA, CACHE_TYPE_UNIFIED}; + + empty = true; + for (int i = 0; i < ARRAY_SIZE(cache_type); i++) { + struct acpi_pptt_cache_v1_full *cache_v1; + + cache = acpi_find_cache_node(table, acpi_cpu_id, cache_type[i], + level, &cpu_node); + if (!cache) + continue; + + empty = false; + + cache_v1 = upgrade_pptt_cache(cache); + if (cache_v1 && cache_v1->cache_id == cache_id) + return level; + } + level++; + } while (!empty); + } + + return -ENOENT; +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4752ebd48132..be074bdfd4d1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1542,6 +1542,7 @@ int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); +int find_acpi_cache_level_from_id(u32 cache_id); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1565,6 +1566,10 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) } static inline void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) { } +static inline int find_acpi_cache_level_from_id(u32 cache_id) +{ + return -ENOENT; +} #endif void acpi_arch_init(void); From a39a723a6f1ed9a1602ccf8dd56392402afa7339 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:35 +0000 Subject: [PATCH 49/94] ACPI / PPTT: Add a helper to fill a cpumask from a cache_id MPAM identifies CPUs by the cache_id in the PPTT cache structure. The driver needs to know which CPUs are associated with the cache. The CPUs may not all be online, so cacheinfo does not have the information. Add a helper to pull this information out of the PPTT. CC: Rohit Mathew Reviewed-by: Gavin Shan Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Jeremy Linton Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/acpi/pptt.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 6 ++++ 2 files changed, 71 insertions(+) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index da49b56a1ef2..de5f8c018333 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -998,3 +998,68 @@ int find_acpi_cache_level_from_id(u32 cache_id) return -ENOENT; } + +/** + * acpi_pptt_get_cpumask_from_cache_id() - Get the cpus associated with the + * specified cache + * @cache_id: The id field of the cache + * @cpus: Where to build the cpumask + * + * Determine which CPUs are below this cache in the PPTT. This allows the property + * to be found even if the CPUs are offline. + * + * The PPTT table must be rev 3 or later, + * + * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found. + * Otherwise returns 0 and sets the cpus in the provided cpumask. + */ +int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus) +{ + int cpu; + struct acpi_table_header *table; + + cpumask_clear(cpus); + + table = acpi_get_pptt(); + if (!table) + return -ENOENT; + + if (table->revision < 3) + return -ENOENT; + + for_each_possible_cpu(cpu) { + bool empty; + int level = 1; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct acpi_pptt_cache *cache; + struct acpi_pptt_processor *cpu_node; + + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (!cpu_node) + continue; + + do { + int cache_type[] = {CACHE_TYPE_INST, CACHE_TYPE_DATA, CACHE_TYPE_UNIFIED}; + + empty = true; + for (int i = 0; i < ARRAY_SIZE(cache_type); i++) { + struct acpi_pptt_cache_v1_full *cache_v1; + + cache = acpi_find_cache_node(table, acpi_cpu_id, cache_type[i], + level, &cpu_node); + + if (!cache) + continue; + + empty = false; + + cache_v1 = upgrade_pptt_cache(cache); + if (cache_v1 && cache_v1->cache_id == cache_id) + cpumask_set_cpu(cpu, cpus); + } + level++; + } while (!empty); + } + + return 0; +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index be074bdfd4d1..a9dbacabdf89 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1543,6 +1543,7 @@ int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); int find_acpi_cache_level_from_id(u32 cache_id); +int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1570,6 +1571,11 @@ static inline int find_acpi_cache_level_from_id(u32 cache_id) { return -ENOENT; } +static inline int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, + cpumask_t *cpus) +{ + return -ENOENT; +} #endif void acpi_arch_init(void); From d8bf01d80919e81a06dca77556dcfb351fa99b0c Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:36 +0000 Subject: [PATCH 50/94] arm64: kconfig: Add Kconfig entry for MPAM The bulk of the MPAM driver lives outside the arch code because it largely manages MMIO devices that generate interrupts. The driver needs a Kconfig symbol to enable it. As MPAM is only found on arm64 platforms, the arm64 tree is the most natural home for the Kconfig option. This Kconfig option will later be used by the arch code to enable or disable the MPAM context-switch code, and to register properties of CPUs with the MPAM driver. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo CC: Dave Martin Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6663ffd23f25..67015d51f7b5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2023,6 +2023,29 @@ config ARM64_TLB_RANGE ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a range of input addresses. +config ARM64_MPAM + bool "Enable support for MPAM" + help + Memory System Resource Partitioning and Monitoring (MPAM) is an + optional extension to the Arm architecture that allows each + transaction issued to the memory system to be labelled with a + Partition identifier (PARTID) and Performance Monitoring Group + identifier (PMG). + + Memory system components, such as the caches, can be configured with + policies to control how much of various physical resources (such as + memory bandwidth or cache memory) the transactions labelled with each + PARTID can consume. Depending on the capabilities of the hardware, + the PARTID and PMG can also be used as filtering criteria to measure + the memory system resource consumption of different parts of a + workload. + + Use of this extension requires CPU support, support in the + Memory System Components (MSC), and a description from firmware + of where the MSCs are in the address space. + + MPAM is exposed to user-space via the resctrl pseudo filesystem. + endmenu # "ARMv8.4 architectural features" menu "ARMv8.5 architectural features" From f5915600cc4ca0338a37d5a8a4032e25d939156b Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:22:37 +0000 Subject: [PATCH 51/94] platform: Define platform_device_put cleanup handler Define a cleanup helper for use with __free to destroy platform devices automatically when the pointer goes out of scope. This is only intended to be used in error cases and so should be used with return_ptr() or no_free_ptr() directly to avoid the automatic destruction on success. A first use of this is introduced in a subsequent commit. Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- include/linux/platform_device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 074754c23d33..23a30ada2d4c 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -232,6 +232,7 @@ extern int platform_device_add_data(struct platform_device *pdev, extern int platform_device_add(struct platform_device *pdev); extern void platform_device_del(struct platform_device *pdev); extern void platform_device_put(struct platform_device *pdev); +DEFINE_FREE(platform_device_put, struct platform_device *, if (_T) platform_device_put(_T)) struct platform_driver { int (*probe)(struct platform_device *); From 96f4a4d53e6660d9b62e8d739388267fbb660e9f Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:22:38 +0000 Subject: [PATCH 52/94] ACPI: Define acpi_put_table cleanup handler and acpi_get_table_pointer() helper Define a cleanup helper for use with __free to release the acpi table when the pointer goes out of scope. Also, introduce the helper acpi_get_table_pointer() to simplify a commonly used pattern involving acpi_get_table(). These are first used in a subsequent commit. Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- include/linux/acpi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a9dbacabdf89..ac8797f95236 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -8,6 +8,7 @@ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H +#include #include #include /* for struct resource */ #include @@ -221,6 +222,17 @@ void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); +static inline struct acpi_table_header *acpi_get_table_pointer(char *signature, u32 instance) +{ + struct acpi_table_header *table; + int status = acpi_get_table(signature, instance, &table); + + if (ACPI_FAILURE(status)) + return ERR_PTR(-ENOENT); + return table; +} +DEFINE_FREE(acpi_put_table, struct acpi_table_header *, if (!IS_ERR_OR_NULL(_T)) acpi_put_table(_T)) + int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init_or_acpilib acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, From 115c5325beae7199219ab7c12ec2a2af8dea6c3c Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:39 +0000 Subject: [PATCH 53/94] ACPI / MPAM: Parse the MPAM table Add code to parse the arm64 specific MPAM table, looking up the cache level from the PPTT and feeding the end result into the MPAM driver. This happens in two stages. Platform devices are created first for the MSC devices. Once the driver probes it calls acpi_mpam_parse_resources() to discover the RIS entries the MSC contains. For now the MPAM hook mpam_ris_create() is stubbed out, but will update the MPAM driver with optional discovered data about the RIS entries. CC: Carl Worth Link: https://developer.arm.com/documentation/den0065/3-0bet/?lang=en Reviewed-by: Lorenzo Pieralisi Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + drivers/acpi/arm64/Kconfig | 3 + drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/mpam.c | 411 ++++++++++++++++++++++++++++++++++++ drivers/acpi/tables.c | 2 +- include/linux/arm_mpam.h | 47 +++++ 6 files changed, 464 insertions(+), 1 deletion(-) create mode 100644 drivers/acpi/arm64/mpam.c create mode 100644 include/linux/arm_mpam.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 67015d51f7b5..c5e66d5d72cd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2025,6 +2025,7 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" + select ACPI_MPAM if ACPI help Memory System Resource Partitioning and Monitoring (MPAM) is an optional extension to the Arm architecture that allows each diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig index b3ed6212244c..f2fd79f22e7d 100644 --- a/drivers/acpi/arm64/Kconfig +++ b/drivers/acpi/arm64/Kconfig @@ -21,3 +21,6 @@ config ACPI_AGDI config ACPI_APMT bool + +config ACPI_MPAM + bool diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 05ecde9eaabe..9390b57cb564 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI_APMT) += apmt.o obj-$(CONFIG_ACPI_FFH) += ffh.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_IORT) += iort.o +obj-$(CONFIG_ACPI_MPAM) += mpam.o obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o obj-$(CONFIG_ARM_AMBA) += amba.o obj-y += dma.o init.o diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c new file mode 100644 index 000000000000..84963a20c3e7 --- /dev/null +++ b/drivers/acpi/arm64/mpam.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. + +/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */ + +#define pr_fmt(fmt) "ACPI MPAM: " fmt + +#include +#include +#include +#include +#include +#include + +#include + +/* + * Flags for acpi_table_mpam_msc.*_interrupt_flags. + * See 2.1.1 Interrupt Flags, Table 5, of DEN0065B_MPAM_ACPI_3.0-bet. + */ +#define ACPI_MPAM_MSC_IRQ_MODE BIT(0) +#define ACPI_MPAM_MSC_IRQ_TYPE_MASK GENMASK(2, 1) +#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_MASK BIT(3) +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR 0 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR_CONTAINER 1 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4) + +/* + * Encodings for the MSC node body interface type field. + * See 2.1 MPAM MSC node, Table 4 of DEN0065B_MPAM_ACPI_3.0-bet. + */ +#define ACPI_MPAM_MSC_IFACE_MMIO 0x00 +#define ACPI_MPAM_MSC_IFACE_PCC 0x0a + +static bool _is_ppi_partition(u32 flags) +{ + u32 aff_type, is_ppi; + bool ret; + + is_ppi = FIELD_GET(ACPI_MPAM_MSC_IRQ_AFFINITY_VALID, flags); + if (!is_ppi) + return false; + + aff_type = FIELD_GET(ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_MASK, flags); + ret = (aff_type == ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR_CONTAINER); + if (ret) + pr_err_once("Partitioned interrupts not supported\n"); + + return ret; +} + +static int acpi_mpam_register_irq(struct platform_device *pdev, + u32 intid, u32 flags) +{ + int irq; + u32 int_type; + int trigger; + + if (!intid) + return -EINVAL; + + if (_is_ppi_partition(flags)) + return -EINVAL; + + trigger = FIELD_GET(ACPI_MPAM_MSC_IRQ_MODE, flags); + int_type = FIELD_GET(ACPI_MPAM_MSC_IRQ_TYPE_MASK, flags); + if (int_type != ACPI_MPAM_MSC_IRQ_TYPE_WIRED) + return -EINVAL; + + irq = acpi_register_gsi(&pdev->dev, intid, trigger, ACPI_ACTIVE_HIGH); + if (irq < 0) + pr_err_once("Failed to register interrupt 0x%x with ACPI\n", intid); + + return irq; +} + +static void acpi_mpam_parse_irqs(struct platform_device *pdev, + struct acpi_mpam_msc_node *tbl_msc, + struct resource *res, int *res_idx) +{ + u32 flags, intid; + int irq; + + intid = tbl_msc->overflow_interrupt; + flags = tbl_msc->overflow_interrupt_flags; + irq = acpi_mpam_register_irq(pdev, intid, flags); + if (irq > 0) + res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "overflow"); + + intid = tbl_msc->error_interrupt; + flags = tbl_msc->error_interrupt_flags; + irq = acpi_mpam_register_irq(pdev, intid, flags); + if (irq > 0) + res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "error"); +} + +static int acpi_mpam_parse_resource(struct mpam_msc *msc, + struct acpi_mpam_resource_node *res) +{ + int level, nid; + u32 cache_id; + + switch (res->locator_type) { + case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE: + cache_id = res->locator.cache_locator.cache_reference; + level = find_acpi_cache_level_from_id(cache_id); + if (level <= 0) { + pr_err_once("Bad level (%d) for cache with id %u\n", level, cache_id); + return -EINVAL; + } + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE, + level, cache_id); + case ACPI_MPAM_LOCATION_TYPE_MEMORY: + nid = pxm_to_node(res->locator.memory_locator.proximity_domain); + if (nid == NUMA_NO_NODE) { + pr_debug("Bad proximity domain %lld, using node 0 instead\n", + res->locator.memory_locator.proximity_domain); + nid = 0; + } + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY, + MPAM_CLASS_ID_DEFAULT, nid); + default: + /* These get discovered later and are treated as unknown */ + return 0; + } +} + +int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc) +{ + int i, err; + char *ptr, *table_end; + struct acpi_mpam_resource_node *resource; + + table_end = (char *)tbl_msc + tbl_msc->length; + ptr = (char *)(tbl_msc + 1); + for (i = 0; i < tbl_msc->num_resource_nodes; i++) { + u64 max_deps, remaining_table; + + if (ptr + sizeof(*resource) > table_end) + return -EINVAL; + + resource = (struct acpi_mpam_resource_node *)ptr; + + remaining_table = table_end - ptr; + max_deps = remaining_table / sizeof(struct acpi_mpam_func_deps); + if (resource->num_functional_deps > max_deps) { + pr_debug("MSC has impossible number of functional dependencies\n"); + return -EINVAL; + } + + err = acpi_mpam_parse_resource(msc, resource); + if (err) + return err; + + ptr += sizeof(*resource); + ptr += resource->num_functional_deps * sizeof(struct acpi_mpam_func_deps); + } + + return 0; +} + +/* + * Creates the device power management link and returns true if the + * acpi id is valid and usable for cpu affinity. This is the case + * when the linked device is a processor or a processor container. + */ +static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc, + struct platform_device *pdev, + u32 *acpi_id) +{ + char hid[sizeof(tbl_msc->hardware_id_linked_device) + 1] = { 0 }; + bool acpi_id_valid = false; + struct acpi_device *buddy; + char uid[11]; + int len; + + memcpy(hid, &tbl_msc->hardware_id_linked_device, + sizeof(tbl_msc->hardware_id_linked_device)); + + if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) { + *acpi_id = tbl_msc->instance_id_linked_device; + acpi_id_valid = true; + } + + len = snprintf(uid, sizeof(uid), "%u", + tbl_msc->instance_id_linked_device); + if (len >= sizeof(uid)) { + pr_debug("Failed to convert uid of device for power management."); + return acpi_id_valid; + } + + buddy = acpi_dev_get_first_match_dev(hid, uid, -1); + if (buddy) { + device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS); + acpi_dev_put(buddy); + } + + return acpi_id_valid; +} + +static int decode_interface_type(struct acpi_mpam_msc_node *tbl_msc, + enum mpam_msc_iface *iface) +{ + switch (tbl_msc->interface_type) { + case ACPI_MPAM_MSC_IFACE_MMIO: + *iface = MPAM_IFACE_MMIO; + return 0; + case ACPI_MPAM_MSC_IFACE_PCC: + *iface = MPAM_IFACE_PCC; + return 0; + default: + return -EINVAL; + } +} + +static struct platform_device * __init acpi_mpam_parse_msc(struct acpi_mpam_msc_node *tbl_msc) +{ + struct platform_device *pdev __free(platform_device_put) = + platform_device_alloc("mpam_msc", tbl_msc->identifier); + int next_res = 0, next_prop = 0, err; + /* pcc, nrdy, affinity and a sentinel */ + struct property_entry props[4] = { 0 }; + /* mmio, 2xirq, no sentinel. */ + struct resource res[3] = { 0 }; + struct acpi_device *companion; + enum mpam_msc_iface iface; + char uid[16]; + u32 acpi_id; + + if (!pdev) + return ERR_PTR(-ENOMEM); + + /* Some power management is described in the namespace: */ + err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier); + if (err > 0 && err < sizeof(uid)) { + companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1); + if (companion) { + ACPI_COMPANION_SET(&pdev->dev, companion); + acpi_dev_put(companion); + } else { + pr_debug("MSC.%u: missing namespace entry\n", tbl_msc->identifier); + } + } + + if (decode_interface_type(tbl_msc, &iface)) { + pr_debug("MSC.%u: unknown interface type\n", tbl_msc->identifier); + return ERR_PTR(-EINVAL); + } + + if (iface == MPAM_IFACE_MMIO) { + res[next_res++] = DEFINE_RES_MEM_NAMED(tbl_msc->base_address, + tbl_msc->mmio_size, + "MPAM:MSC"); + } else if (iface == MPAM_IFACE_PCC) { + props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel", + tbl_msc->base_address); + } + + acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res); + + WARN_ON_ONCE(next_res > ARRAY_SIZE(res)); + err = platform_device_add_resources(pdev, res, next_res); + if (err) + return ERR_PTR(err); + + props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us", + tbl_msc->max_nrdy_usec); + + /* + * The MSC's CPU affinity is described via its linked power + * management device, but only if it points at a Processor or + * Processor Container. + */ + if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) + props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity", acpi_id); + + WARN_ON_ONCE(next_prop > ARRAY_SIZE(props) - 1); + err = device_create_managed_software_node(&pdev->dev, props, NULL); + if (err) + return ERR_PTR(err); + + /* + * Stash the table entry for acpi_mpam_parse_resources() to discover + * what this MSC controls. + */ + err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length); + if (err) + return ERR_PTR(err); + + err = platform_device_add(pdev); + if (err) + return ERR_PTR(err); + + return_ptr(pdev); +} + +static int __init acpi_mpam_parse(void) +{ + char *table_end, *table_offset; + struct acpi_mpam_msc_node *tbl_msc; + struct platform_device *pdev; + + if (acpi_disabled || !system_supports_mpam()) + return 0; + + struct acpi_table_header *table __free(acpi_put_table) = + acpi_get_table_pointer(ACPI_SIG_MPAM, 0); + + if (IS_ERR(table)) + return 0; + + if (table->revision < 1) { + pr_debug("MPAM ACPI table revision %d not supported\n", table->revision); + return 0; + } + + table_offset = (char *)(table + 1); + table_end = (char *)table + table->length; + + while (table_offset < table_end) { + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; + if (table_offset + sizeof(*tbl_msc) > table_end || + table_offset + tbl_msc->length > table_end) { + pr_err("MSC entry overlaps end of ACPI table\n"); + return -EINVAL; + } + table_offset += tbl_msc->length; + + /* + * If any of the reserved fields are set, make no attempt to + * parse the MSC structure. This MSC will still be counted by + * acpi_mpam_count_msc(), meaning the MPAM driver can't probe + * against all MSC, and will never be enabled. There is no way + * to enable it safely, because we cannot determine safe + * system-wide partid and pmg ranges in this situation. + */ + if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) { + pr_err_once("Unrecognised MSC, MPAM not usable\n"); + pr_debug("MSC.%u: reserved field set\n", tbl_msc->identifier); + continue; + } + + if (!tbl_msc->mmio_size) { + pr_debug("MSC.%u: marked as disabled\n", tbl_msc->identifier); + continue; + } + + pdev = acpi_mpam_parse_msc(tbl_msc); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + } + + return 0; +} + +/** + * acpi_mpam_count_msc() - Count the number of MSC described by firmware. + * + * Returns the number of MSCs, or zero for an error. + * + * This can be called before or in parallel with acpi_mpam_parse(). + */ +int acpi_mpam_count_msc(void) +{ + char *table_end, *table_offset; + struct acpi_mpam_msc_node *tbl_msc; + int count = 0; + + if (acpi_disabled || !system_supports_mpam()) + return 0; + + struct acpi_table_header *table __free(acpi_put_table) = + acpi_get_table_pointer(ACPI_SIG_MPAM, 0); + + if (IS_ERR(table)) + return 0; + + if (table->revision < 1) + return 0; + + table_offset = (char *)(table + 1); + table_end = (char *)table + table->length; + + while (table_offset < table_end) { + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; + + if (table_offset + sizeof(*tbl_msc) > table_end) + return -EINVAL; + if (tbl_msc->length < sizeof(*tbl_msc)) + return -EINVAL; + if (tbl_msc->length > table_end - table_offset) + return -EINVAL; + table_offset += tbl_msc->length; + + if (!tbl_msc->mmio_size) + continue; + + count++; + } + + return count; +} + +/* + * Call after ACPI devices have been created, which happens behind acpi_scan_init() + * called from subsys_initcall(). PCC requires the mailbox driver, which is + * initialised from postcore_initcall(). + */ +subsys_initcall_sync(acpi_mpam_parse); diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 57fc8bc56166..4286e4af1092 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -408,7 +408,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __nonstring_array __initconst ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI, - ACPI_SIG_NBFT, ACPI_SIG_SWFT}; + ACPI_SIG_NBFT, ACPI_SIG_SWFT, ACPI_SIG_MPAM}; #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h new file mode 100644 index 000000000000..4b7f335181e0 --- /dev/null +++ b/include/linux/arm_mpam.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2025 Arm Ltd. */ + +#ifndef __LINUX_ARM_MPAM_H +#define __LINUX_ARM_MPAM_H + +#include +#include + +struct mpam_msc; + +enum mpam_msc_iface { + MPAM_IFACE_MMIO, /* a real MPAM MSC */ + MPAM_IFACE_PCC, /* a fake MPAM MSC */ +}; + +enum mpam_class_types { + MPAM_CLASS_CACHE, /* Caches, e.g. L2, L3 */ + MPAM_CLASS_MEMORY, /* Main memory */ + MPAM_CLASS_UNKNOWN, /* Everything else, e.g. SMMU */ +}; + +#define MPAM_CLASS_ID_DEFAULT 255 + +#ifdef CONFIG_ACPI_MPAM +int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc); + +int acpi_mpam_count_msc(void); +#else +static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc) +{ + return -EINVAL; +} + +static inline int acpi_mpam_count_msc(void) { return -EINVAL; } +#endif + +static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, + int component_id) +{ + return -EINVAL; +} + +#endif /* __LINUX_ARM_MPAM_H */ From f04046f2577a5c76167333ca99d3903ee5331ba0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:40 +0000 Subject: [PATCH 54/94] arm_mpam: Add probe/remove for mpam msc driver and kbuild boiler plate Probing MPAM is convoluted. MSCs that are integrated with a CPU may only be accessible from those CPUs, and they may not be online. Touching the hardware early is pointless as MPAM can't be used until the system-wide common values for num_partid and num_pmg have been discovered. Start with driver probe/remove and mapping the MSC. Cc: Carl Worth Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/resctrl/Kconfig | 15 +++ drivers/resctrl/Makefile | 4 + drivers/resctrl/mpam_devices.c | 190 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 49 ++++++++ 7 files changed, 262 insertions(+) create mode 100644 drivers/resctrl/Kconfig create mode 100644 drivers/resctrl/Makefile create mode 100644 drivers/resctrl/mpam_devices.c create mode 100644 drivers/resctrl/mpam_internal.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c5e66d5d72cd..004d58cfbff8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2025,6 +2025,7 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" + select ARM64_MPAM_DRIVER if EXPERT # does nothing yet select ACPI_MPAM if ACPI help Memory System Resource Partitioning and Monitoring (MPAM) is an diff --git a/drivers/Kconfig b/drivers/Kconfig index 4915a63866b0..3054b50a2f4c 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -251,4 +251,6 @@ source "drivers/hte/Kconfig" source "drivers/cdx/Kconfig" +source "drivers/resctrl/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 8e1ffa4358d5..20eb17596b89 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -194,6 +194,7 @@ obj-$(CONFIG_HTE) += hte/ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ obj-$(CONFIG_DPLL) += dpll/ +obj-y += resctrl/ obj-$(CONFIG_DIBS) += dibs/ obj-$(CONFIG_S390) += s390/ diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig new file mode 100644 index 000000000000..5f7f748e611e --- /dev/null +++ b/drivers/resctrl/Kconfig @@ -0,0 +1,15 @@ +menuconfig ARM64_MPAM_DRIVER + bool "MPAM driver" + depends on ARM64 && ARM64_MPAM && EXPERT + help + Memory System Resource Partitioning and Monitoring (MPAM) driver for + System IP, e.g. caches and memory controllers. + +if ARM64_MPAM_DRIVER + +config ARM64_MPAM_DRIVER_DEBUG + bool "Enable debug messages from the MPAM driver" + help + Say yes here to enable debug messages from the MPAM driver. + +endif diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile new file mode 100644 index 000000000000..898199dcf80d --- /dev/null +++ b/drivers/resctrl/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o +mpam-y += mpam_devices.o + +ccflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c new file mode 100644 index 000000000000..e097e852f9c3 --- /dev/null +++ b/drivers/resctrl/mpam_devices.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. + +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpam_internal.h" + +/* + * mpam_list_lock protects the SRCU lists when writing. Once the + * mpam_enabled key is enabled these lists are read-only, + * unless the error interrupt disables the driver. + */ +static DEFINE_MUTEX(mpam_list_lock); +static LIST_HEAD(mpam_all_msc); + +struct srcu_struct mpam_srcu; + +/* + * Number of MSCs that have been probed. Once all MSCs have been probed MPAM + * can be enabled. + */ +static atomic_t mpam_num_msc; + +/* + * An MSC can control traffic from a set of CPUs, but may only be accessible + * from a (hopefully wider) set of CPUs. The common reason for this is power + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the + * corresponding cache may also be powered off. By making accesses from + * one of those CPUs, we ensure we don't access a cache that's powered off. + */ +static void update_msc_accessibility(struct mpam_msc *msc) +{ + u32 affinity_id; + int err; + + err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", + &affinity_id); + if (err) + cpumask_copy(&msc->accessibility, cpu_possible_mask); + else + acpi_pptt_get_cpus_from_container(affinity_id, &msc->accessibility); +} + +static void mpam_msc_destroy(struct mpam_msc *msc) +{ + struct platform_device *pdev = msc->pdev; + + lockdep_assert_held(&mpam_list_lock); + + list_del_rcu(&msc->all_msc_list); + platform_set_drvdata(pdev, NULL); +} + +static void mpam_msc_drv_remove(struct platform_device *pdev) +{ + struct mpam_msc *msc = platform_get_drvdata(pdev); + + mutex_lock(&mpam_list_lock); + mpam_msc_destroy(msc); + mutex_unlock(&mpam_list_lock); + + synchronize_srcu(&mpam_srcu); +} + +static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) +{ + int err; + u32 tmp; + struct mpam_msc *msc; + struct resource *msc_res; + struct device *dev = &pdev->dev; + + lockdep_assert_held(&mpam_list_lock); + + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); + if (!msc) + return ERR_PTR(-ENOMEM); + + err = devm_mutex_init(dev, &msc->probe_lock); + if (err) + return ERR_PTR(err); + + err = devm_mutex_init(dev, &msc->part_sel_lock); + if (err) + return ERR_PTR(err); + + msc->id = pdev->id; + msc->pdev = pdev; + INIT_LIST_HEAD_RCU(&msc->all_msc_list); + INIT_LIST_HEAD_RCU(&msc->ris); + + update_msc_accessibility(msc); + if (cpumask_empty(&msc->accessibility)) { + dev_err_once(dev, "MSC is not accessible from any CPU!"); + return ERR_PTR(-EINVAL); + } + + if (device_property_read_u32(&pdev->dev, "pcc-channel", &tmp)) + msc->iface = MPAM_IFACE_MMIO; + else + msc->iface = MPAM_IFACE_PCC; + + if (msc->iface == MPAM_IFACE_MMIO) { + void __iomem *io; + + io = devm_platform_get_and_ioremap_resource(pdev, 0, + &msc_res); + if (IS_ERR(io)) { + dev_err_once(dev, "Failed to map MSC base address\n"); + return ERR_CAST(io); + } + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; + msc->mapped_hwpage = io; + } else { + return ERR_PTR(-EINVAL); + } + + list_add_rcu(&msc->all_msc_list, &mpam_all_msc); + platform_set_drvdata(pdev, msc); + + return msc; +} + +static int fw_num_msc; + +static int mpam_msc_drv_probe(struct platform_device *pdev) +{ + int err; + struct mpam_msc *msc = NULL; + void *plat_data = pdev->dev.platform_data; + + mutex_lock(&mpam_list_lock); + msc = do_mpam_msc_drv_probe(pdev); + mutex_unlock(&mpam_list_lock); + + if (IS_ERR(msc)) + return PTR_ERR(msc); + + /* Create RIS entries described by firmware */ + err = acpi_mpam_parse_resources(msc, plat_data); + if (err) { + mpam_msc_drv_remove(pdev); + return err; + } + + if (atomic_add_return(1, &mpam_num_msc) == fw_num_msc) + pr_info("Discovered all MSCs\n"); + + return 0; +} + +static struct platform_driver mpam_msc_driver = { + .driver = { + .name = "mpam_msc", + }, + .probe = mpam_msc_drv_probe, + .remove = mpam_msc_drv_remove, +}; + +static int __init mpam_msc_driver_init(void) +{ + if (!system_supports_mpam()) + return -EOPNOTSUPP; + + init_srcu_struct(&mpam_srcu); + + fw_num_msc = acpi_mpam_count_msc(); + if (fw_num_msc <= 0) { + pr_err("No MSC devices found in firmware\n"); + return -EINVAL; + } + + return platform_driver_register(&mpam_msc_driver); +} +subsys_initcall(mpam_msc_driver_init); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h new file mode 100644 index 000000000000..540066903eca --- /dev/null +++ b/drivers/resctrl/mpam_internal.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2025 Arm Ltd. + +#ifndef MPAM_INTERNAL_H +#define MPAM_INTERNAL_H + +#include +#include +#include +#include +#include + +struct platform_device; + +struct mpam_msc { + /* member of mpam_all_msc */ + struct list_head all_msc_list; + + int id; + struct platform_device *pdev; + + /* Not modified after mpam_is_enabled() becomes true */ + enum mpam_msc_iface iface; + u32 nrdy_usec; + cpumask_t accessibility; + + /* + * probe_lock is only taken during discovery. After discovery these + * properties become read-only and the lists are protected by SRCU. + */ + struct mutex probe_lock; + unsigned long ris_idxs; + u32 ris_max; + + /* mpam_msc_ris of this component */ + struct list_head ris; + + /* + * part_sel_lock protects access to the MSC hardware registers that are + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary + * by RIS). + * If needed, take msc->probe_lock first. + */ + struct mutex part_sel_lock; + + void __iomem *mapped_hwpage; + size_t mapped_hwpage_sz; +}; +#endif /* MPAM_INTERNAL_H */ From 01fb4b8224726aa0f2170b63e4685cf0eec85d8d Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:41 +0000 Subject: [PATCH 55/94] arm_mpam: Add the class and component structures for firmware described ris An MSC is a container of resources, each identified by their RIS index. Some RIS are described by firmware to provide their position in the system. Others are discovered when the driver probes the hardware. To configure a resource it needs to be found by its class, e.g. 'L2'. There are two kinds of grouping, a class is a set of components, which are visible to user-space as there are likely to be multiple instances of the L2 cache. (e.g. one per cluster or package) Add support for creating and destroying structures to allow a hierarchy of resources to be created. Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 392 +++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 94 ++++++++ include/linux/arm_mpam.h | 5 + 3 files changed, 490 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index e097e852f9c3..f1dcf9bb14f2 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -36,6 +36,383 @@ struct srcu_struct mpam_srcu; */ static atomic_t mpam_num_msc; +/* + * An MSC is a physical container for controls and monitors, each identified by + * their RIS index. These share a base-address, interrupts and some MMIO + * registers. A vMSC is a virtual container for RIS in an MSC that control or + * monitor the same thing. Members of a vMSC are all RIS in the same MSC, but + * not all RIS in an MSC share a vMSC. + * + * Components are a group of vMSC that control or monitor the same thing but + * are from different MSC, so have different base-address, interrupts etc. + * Classes are the set components of the same type. + * + * The features of a vMSC is the union of the RIS it contains. + * The features of a Class and Component are the common subset of the vMSC + * they contain. + * + * e.g. The system cache may have bandwidth controls on multiple interfaces, + * for regulating traffic from devices independently of traffic from CPUs. + * If these are two RIS in one MSC, they will be treated as controlling + * different things, and will not share a vMSC/component/class. + * + * e.g. The L2 may have one MSC and two RIS, one for cache-controls another + * for bandwidth. These two RIS are members of the same vMSC. + * + * e.g. The set of RIS that make up the L2 are grouped as a component. These + * are sometimes termed slices. They should be configured the same, as if there + * were only one. + * + * e.g. The SoC probably has more than one L2, each attached to a distinct set + * of CPUs. All the L2 components are grouped as a class. + * + * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list, + * then linked via struct mpam_ris to a vmsc, component and class. + * The same MSC may exist under different class->component->vmsc paths, but the + * RIS index will be unique. + */ +LIST_HEAD(mpam_classes); + +/* List of all objects that can be free()d after synchronise_srcu() */ +static LLIST_HEAD(mpam_garbage); + +static inline void init_garbage(struct mpam_garbage *garbage) +{ + init_llist_node(&garbage->llist); +} + +#define add_to_garbage(x) \ +do { \ + __typeof__(x) _x = (x); \ + _x->garbage.to_free = _x; \ + llist_add(&_x->garbage.llist, &mpam_garbage); \ +} while (0) + +static void mpam_free_garbage(void) +{ + struct mpam_garbage *iter, *tmp; + struct llist_node *to_free = llist_del_all(&mpam_garbage); + + if (!to_free) + return; + + synchronize_srcu(&mpam_srcu); + + llist_for_each_entry_safe(iter, tmp, to_free, llist) { + if (iter->pdev) + devm_kfree(&iter->pdev->dev, iter->to_free); + else + kfree(iter->to_free); + } +} + +static struct mpam_class * +mpam_class_alloc(u8 level_idx, enum mpam_class_types type) +{ + struct mpam_class *class; + + lockdep_assert_held(&mpam_list_lock); + + class = kzalloc(sizeof(*class), GFP_KERNEL); + if (!class) + return ERR_PTR(-ENOMEM); + init_garbage(&class->garbage); + + INIT_LIST_HEAD_RCU(&class->components); + /* Affinity is updated when ris are added */ + class->level = level_idx; + class->type = type; + INIT_LIST_HEAD_RCU(&class->classes_list); + + list_add_rcu(&class->classes_list, &mpam_classes); + + return class; +} + +static void mpam_class_destroy(struct mpam_class *class) +{ + lockdep_assert_held(&mpam_list_lock); + + list_del_rcu(&class->classes_list); + add_to_garbage(class); +} + +static struct mpam_class * +mpam_class_find(u8 level_idx, enum mpam_class_types type) +{ + struct mpam_class *class; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, &mpam_classes, classes_list) { + if (class->type == type && class->level == level_idx) + return class; + } + + return mpam_class_alloc(level_idx, type); +} + +static struct mpam_component * +mpam_component_alloc(struct mpam_class *class, int id) +{ + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + comp = kzalloc(sizeof(*comp), GFP_KERNEL); + if (!comp) + return ERR_PTR(-ENOMEM); + init_garbage(&comp->garbage); + + comp->comp_id = id; + INIT_LIST_HEAD_RCU(&comp->vmsc); + /* Affinity is updated when RIS are added */ + INIT_LIST_HEAD_RCU(&comp->class_list); + comp->class = class; + + list_add_rcu(&comp->class_list, &class->components); + + return comp; +} + +static void mpam_component_destroy(struct mpam_component *comp) +{ + struct mpam_class *class = comp->class; + + lockdep_assert_held(&mpam_list_lock); + + list_del_rcu(&comp->class_list); + add_to_garbage(comp); + + if (list_empty(&class->components)) + mpam_class_destroy(class); +} + +static struct mpam_component * +mpam_component_find(struct mpam_class *class, int id) +{ + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(comp, &class->components, class_list) { + if (comp->comp_id == id) + return comp; + } + + return mpam_component_alloc(class, id); +} + +static struct mpam_vmsc * +mpam_vmsc_alloc(struct mpam_component *comp, struct mpam_msc *msc) +{ + struct mpam_vmsc *vmsc; + + lockdep_assert_held(&mpam_list_lock); + + vmsc = kzalloc(sizeof(*vmsc), GFP_KERNEL); + if (!vmsc) + return ERR_PTR(-ENOMEM); + init_garbage(&vmsc->garbage); + + INIT_LIST_HEAD_RCU(&vmsc->ris); + INIT_LIST_HEAD_RCU(&vmsc->comp_list); + vmsc->comp = comp; + vmsc->msc = msc; + + list_add_rcu(&vmsc->comp_list, &comp->vmsc); + + return vmsc; +} + +static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc) +{ + struct mpam_component *comp = vmsc->comp; + + lockdep_assert_held(&mpam_list_lock); + + list_del_rcu(&vmsc->comp_list); + add_to_garbage(vmsc); + + if (list_empty(&comp->vmsc)) + mpam_component_destroy(comp); +} + +static struct mpam_vmsc * +mpam_vmsc_find(struct mpam_component *comp, struct mpam_msc *msc) +{ + struct mpam_vmsc *vmsc; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + if (vmsc->msc->id == msc->id) + return vmsc; + } + + return mpam_vmsc_alloc(comp, msc); +} + +/* + * The cacheinfo structures are only populated when CPUs are online. + * This helper walks the acpi tables to include offline CPUs too. + */ +int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, + cpumask_t *affinity) +{ + return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity); +} + +/* + * cpumask_of_node() only knows about online CPUs. This can't tell us whether + * a class is represented on all possible CPUs. + */ +static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (node_id == cpu_to_node(cpu)) + cpumask_set_cpu(cpu, affinity); + } +} + +static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity, + enum mpam_class_types type, + struct mpam_class *class, + struct mpam_component *comp) +{ + int err; + + switch (type) { + case MPAM_CLASS_CACHE: + err = mpam_get_cpumask_from_cache_id(comp->comp_id, class->level, + affinity); + if (err) { + dev_warn_once(&msc->pdev->dev, + "Failed to determine CPU affinity\n"); + return err; + } + + if (cpumask_empty(affinity)) + dev_warn_once(&msc->pdev->dev, "no CPUs associated with cache node\n"); + + break; + case MPAM_CLASS_MEMORY: + get_cpumask_from_node_id(comp->comp_id, affinity); + /* affinity may be empty for CPU-less memory nodes */ + break; + case MPAM_CLASS_UNKNOWN: + return 0; + } + + cpumask_and(affinity, affinity, &msc->accessibility); + + return 0; +} + +static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, + int component_id) +{ + int err; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + struct mpam_class *class; + struct mpam_component *comp; + struct platform_device *pdev = msc->pdev; + + lockdep_assert_held(&mpam_list_lock); + + if (ris_idx > MPAM_MSC_MAX_NUM_RIS) + return -EINVAL; + + if (test_and_set_bit(ris_idx, &msc->ris_idxs)) + return -EBUSY; + + ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), GFP_KERNEL); + if (!ris) + return -ENOMEM; + init_garbage(&ris->garbage); + ris->garbage.pdev = pdev; + + class = mpam_class_find(class_id, type); + if (IS_ERR(class)) + return PTR_ERR(class); + + comp = mpam_component_find(class, component_id); + if (IS_ERR(comp)) { + if (list_empty(&class->components)) + mpam_class_destroy(class); + return PTR_ERR(comp); + } + + vmsc = mpam_vmsc_find(comp, msc); + if (IS_ERR(vmsc)) { + if (list_empty(&comp->vmsc)) + mpam_component_destroy(comp); + return PTR_ERR(vmsc); + } + + err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp); + if (err) { + if (list_empty(&vmsc->ris)) + mpam_vmsc_destroy(vmsc); + return err; + } + + ris->ris_idx = ris_idx; + INIT_LIST_HEAD_RCU(&ris->msc_list); + INIT_LIST_HEAD_RCU(&ris->vmsc_list); + ris->vmsc = vmsc; + + cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity); + cpumask_or(&class->affinity, &class->affinity, &ris->affinity); + list_add_rcu(&ris->vmsc_list, &vmsc->ris); + list_add_rcu(&ris->msc_list, &msc->ris); + + return 0; +} + +static void mpam_ris_destroy(struct mpam_msc_ris *ris) +{ + struct mpam_vmsc *vmsc = ris->vmsc; + struct mpam_msc *msc = vmsc->msc; + struct mpam_component *comp = vmsc->comp; + struct mpam_class *class = comp->class; + + lockdep_assert_held(&mpam_list_lock); + + /* + * It is assumed affinities don't overlap. If they do the class becomes + * unusable immediately. + */ + cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity); + cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity); + clear_bit(ris->ris_idx, &msc->ris_idxs); + list_del_rcu(&ris->msc_list); + list_del_rcu(&ris->vmsc_list); + add_to_garbage(ris); + + if (list_empty(&vmsc->ris)) + mpam_vmsc_destroy(vmsc); +} + +int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, int component_id) +{ + int err; + + mutex_lock(&mpam_list_lock); + err = mpam_ris_create_locked(msc, ris_idx, type, class_id, + component_id); + mutex_unlock(&mpam_list_lock); + if (err) + mpam_free_garbage(); + + return err; +} + /* * An MSC can control traffic from a set of CPUs, but may only be accessible * from a (hopefully wider) set of CPUs. The common reason for this is power @@ -56,14 +433,25 @@ static void update_msc_accessibility(struct mpam_msc *msc) acpi_pptt_get_cpus_from_container(affinity_id, &msc->accessibility); } +/* + * There are two ways of reaching a struct mpam_msc_ris. Via the + * class->component->vmsc->ris, or via the msc. + * When destroying the msc, the other side needs unlinking and cleaning up too. + */ static void mpam_msc_destroy(struct mpam_msc *msc) { struct platform_device *pdev = msc->pdev; + struct mpam_msc_ris *ris, *tmp; lockdep_assert_held(&mpam_list_lock); + list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list) + mpam_ris_destroy(ris); + list_del_rcu(&msc->all_msc_list); platform_set_drvdata(pdev, NULL); + + add_to_garbage(msc); } static void mpam_msc_drv_remove(struct platform_device *pdev) @@ -74,7 +462,7 @@ static void mpam_msc_drv_remove(struct platform_device *pdev) mpam_msc_destroy(msc); mutex_unlock(&mpam_list_lock); - synchronize_srcu(&mpam_srcu); + mpam_free_garbage(); } static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) @@ -90,6 +478,8 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); if (!msc) return ERR_PTR(-ENOMEM); + init_garbage(&msc->garbage); + msc->garbage.pdev = pdev; err = devm_mutex_init(dev, &msc->probe_lock); if (err) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 540066903eca..8f7a28d2c021 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -7,11 +7,30 @@ #include #include #include +#include #include +#include #include +#define MPAM_MSC_MAX_NUM_RIS 16 + struct platform_device; +/* + * Structures protected by SRCU may not be freed for a surprising amount of + * time (especially if perf is running). To ensure the MPAM error interrupt can + * tear down all the structures, build a list of objects that can be garbage + * collected once synchronize_srcu() has returned. + * If pdev is non-NULL, use devm_kfree(). + */ +struct mpam_garbage { + /* member of mpam_garbage */ + struct llist_node llist; + + void *to_free; + struct platform_device *pdev; +}; + struct mpam_msc { /* member of mpam_all_msc */ struct list_head all_msc_list; @@ -45,5 +64,80 @@ struct mpam_msc { void __iomem *mapped_hwpage; size_t mapped_hwpage_sz; + + struct mpam_garbage garbage; }; + +struct mpam_class { + /* mpam_components in this class */ + struct list_head components; + + cpumask_t affinity; + + u8 level; + enum mpam_class_types type; + + /* member of mpam_classes */ + struct list_head classes_list; + + struct mpam_garbage garbage; +}; + +struct mpam_component { + u32 comp_id; + + /* mpam_vmsc in this component */ + struct list_head vmsc; + + cpumask_t affinity; + + /* member of mpam_class:components */ + struct list_head class_list; + + /* parent: */ + struct mpam_class *class; + + struct mpam_garbage garbage; +}; + +struct mpam_vmsc { + /* member of mpam_component:vmsc_list */ + struct list_head comp_list; + + /* mpam_msc_ris in this vmsc */ + struct list_head ris; + + /* All RIS in this vMSC are members of this MSC */ + struct mpam_msc *msc; + + /* parent: */ + struct mpam_component *comp; + + struct mpam_garbage garbage; +}; + +struct mpam_msc_ris { + u8 ris_idx; + + cpumask_t affinity; + + /* member of mpam_vmsc:ris */ + struct list_head vmsc_list; + + /* member of mpam_msc:ris */ + struct list_head msc_list; + + /* parent: */ + struct mpam_vmsc *vmsc; + + struct mpam_garbage garbage; +}; + +/* List of all classes - protected by srcu*/ +extern struct srcu_struct mpam_srcu; +extern struct list_head mpam_classes; + +int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, + cpumask_t *affinity); + #endif /* MPAM_INTERNAL_H */ diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 4b7f335181e0..13a8ac5c2cbd 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -37,11 +37,16 @@ static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, static inline int acpi_mpam_count_msc(void) { return -EINVAL; } #endif +#ifdef CONFIG_ARM64_MPAM_DRIVER +int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, int component_id); +#else static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, enum mpam_class_types type, u8 class_id, int component_id) { return -EINVAL; } +#endif #endif /* __LINUX_ARM_MPAM_H */ From aa64b9e110515610b6498df0f8fce9b1c6c44f72 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:42 +0000 Subject: [PATCH 56/94] arm_mpam: Add MPAM MSC register layout definitions Memory Partitioning and Monitoring (MPAM) has memory mapped devices (MSCs) with an identity/configuration page. Add the definitions for these registers as offset within the page(s). Link: https://developer.arm.com/documentation/ihi0099/aa/ Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_internal.h | 267 ++++++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 8f7a28d2c021..51f791cc207b 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -140,4 +140,271 @@ extern struct list_head mpam_classes; int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); +/* + * MPAM MSCs have the following register layout. See: + * Arm Memory System Resource Partitioning and Monitoring (MPAM) System + * Component Specification. + * https://developer.arm.com/documentation/ihi0099/aa/ + */ +#define MPAM_ARCHITECTURE_V1 0x10 + +/* Memory mapped control pages */ +/* ID Register offsets in the memory mapped page */ +#define MPAMF_IDR 0x0000 /* features id register */ +#define MPAMF_IIDR 0x0018 /* implementer id register */ +#define MPAMF_AIDR 0x0020 /* architectural id register */ +#define MPAMF_IMPL_IDR 0x0028 /* imp-def partitioning */ +#define MPAMF_CPOR_IDR 0x0030 /* cache-portion partitioning */ +#define MPAMF_CCAP_IDR 0x0038 /* cache-capacity partitioning */ +#define MPAMF_MBW_IDR 0x0040 /* mem-bw partitioning */ +#define MPAMF_PRI_IDR 0x0048 /* priority partitioning */ +#define MPAMF_MSMON_IDR 0x0080 /* performance monitoring features */ +#define MPAMF_CSUMON_IDR 0x0088 /* cache-usage monitor */ +#define MPAMF_MBWUMON_IDR 0x0090 /* mem-bw usage monitor */ +#define MPAMF_PARTID_NRW_IDR 0x0050 /* partid-narrowing */ + +/* Configuration and Status Register offsets in the memory mapped page */ +#define MPAMCFG_PART_SEL 0x0100 /* partid to configure */ +#define MPAMCFG_CPBM 0x1000 /* cache-portion config */ +#define MPAMCFG_CMAX 0x0108 /* cache-capacity config */ +#define MPAMCFG_CMIN 0x0110 /* cache-capacity config */ +#define MPAMCFG_CASSOC 0x0118 /* cache-associativity config */ +#define MPAMCFG_MBW_MIN 0x0200 /* min mem-bw config */ +#define MPAMCFG_MBW_MAX 0x0208 /* max mem-bw config */ +#define MPAMCFG_MBW_WINWD 0x0220 /* mem-bw accounting window config */ +#define MPAMCFG_MBW_PBM 0x2000 /* mem-bw portion bitmap config */ +#define MPAMCFG_PRI 0x0400 /* priority partitioning config */ +#define MPAMCFG_MBW_PROP 0x0500 /* mem-bw stride config */ +#define MPAMCFG_INTPARTID 0x0600 /* partid-narrowing config */ + +#define MSMON_CFG_MON_SEL 0x0800 /* monitor selector */ +#define MSMON_CFG_CSU_FLT 0x0810 /* cache-usage monitor filter */ +#define MSMON_CFG_CSU_CTL 0x0818 /* cache-usage monitor config */ +#define MSMON_CFG_MBWU_FLT 0x0820 /* mem-bw monitor filter */ +#define MSMON_CFG_MBWU_CTL 0x0828 /* mem-bw monitor config */ +#define MSMON_CSU 0x0840 /* current cache-usage */ +#define MSMON_CSU_CAPTURE 0x0848 /* last cache-usage value captured */ +#define MSMON_MBWU 0x0860 /* current mem-bw usage value */ +#define MSMON_MBWU_CAPTURE 0x0868 /* last mem-bw value captured */ +#define MSMON_MBWU_L 0x0880 /* current long mem-bw usage value */ +#define MSMON_MBWU_L_CAPTURE 0x0890 /* last long mem-bw value captured */ +#define MSMON_CAPT_EVNT 0x0808 /* signal a capture event */ +#define MPAMF_ESR 0x00F8 /* error status register */ +#define MPAMF_ECR 0x00F0 /* error control register */ + +/* MPAMF_IDR - MPAM features ID register */ +#define MPAMF_IDR_PARTID_MAX GENMASK(15, 0) +#define MPAMF_IDR_PMG_MAX GENMASK(23, 16) +#define MPAMF_IDR_HAS_CCAP_PART BIT(24) +#define MPAMF_IDR_HAS_CPOR_PART BIT(25) +#define MPAMF_IDR_HAS_MBW_PART BIT(26) +#define MPAMF_IDR_HAS_PRI_PART BIT(27) +#define MPAMF_IDR_EXT BIT(28) +#define MPAMF_IDR_HAS_IMPL_IDR BIT(29) +#define MPAMF_IDR_HAS_MSMON BIT(30) +#define MPAMF_IDR_HAS_PARTID_NRW BIT(31) +#define MPAMF_IDR_HAS_RIS BIT(32) +#define MPAMF_IDR_HAS_EXTD_ESR BIT(38) +#define MPAMF_IDR_HAS_ESR BIT(39) +#define MPAMF_IDR_RIS_MAX GENMASK(59, 56) + +/* MPAMF_MSMON_IDR - MPAM performance monitoring ID register */ +#define MPAMF_MSMON_IDR_MSMON_CSU BIT(16) +#define MPAMF_MSMON_IDR_MSMON_MBWU BIT(17) +#define MPAMF_MSMON_IDR_HAS_LOCAL_CAPT_EVNT BIT(31) + +/* MPAMF_CPOR_IDR - MPAM features cache portion partitioning ID register */ +#define MPAMF_CPOR_IDR_CPBM_WD GENMASK(15, 0) + +/* MPAMF_CCAP_IDR - MPAM features cache capacity partitioning ID register */ +#define MPAMF_CCAP_IDR_CMAX_WD GENMASK(5, 0) +#define MPAMF_CCAP_IDR_CASSOC_WD GENMASK(12, 8) +#define MPAMF_CCAP_IDR_HAS_CASSOC BIT(28) +#define MPAMF_CCAP_IDR_HAS_CMIN BIT(29) +#define MPAMF_CCAP_IDR_NO_CMAX BIT(30) +#define MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM BIT(31) + +/* MPAMF_MBW_IDR - MPAM features memory bandwidth partitioning ID register */ +#define MPAMF_MBW_IDR_BWA_WD GENMASK(5, 0) +#define MPAMF_MBW_IDR_HAS_MIN BIT(10) +#define MPAMF_MBW_IDR_HAS_MAX BIT(11) +#define MPAMF_MBW_IDR_HAS_PBM BIT(12) +#define MPAMF_MBW_IDR_HAS_PROP BIT(13) +#define MPAMF_MBW_IDR_WINDWR BIT(14) +#define MPAMF_MBW_IDR_BWPBM_WD GENMASK(28, 16) + +/* MPAMF_PRI_IDR - MPAM features priority partitioning ID register */ +#define MPAMF_PRI_IDR_HAS_INTPRI BIT(0) +#define MPAMF_PRI_IDR_INTPRI_0_IS_LOW BIT(1) +#define MPAMF_PRI_IDR_INTPRI_WD GENMASK(9, 4) +#define MPAMF_PRI_IDR_HAS_DSPRI BIT(16) +#define MPAMF_PRI_IDR_DSPRI_0_IS_LOW BIT(17) +#define MPAMF_PRI_IDR_DSPRI_WD GENMASK(25, 20) + +/* MPAMF_CSUMON_IDR - MPAM cache storage usage monitor ID register */ +#define MPAMF_CSUMON_IDR_NUM_MON GENMASK(15, 0) +#define MPAMF_CSUMON_IDR_HAS_OFLOW_CAPT BIT(24) +#define MPAMF_CSUMON_IDR_HAS_CEVNT_OFLW BIT(25) +#define MPAMF_CSUMON_IDR_HAS_OFSR BIT(26) +#define MPAMF_CSUMON_IDR_HAS_OFLOW_LNKG BIT(27) +#define MPAMF_CSUMON_IDR_HAS_XCL BIT(29) +#define MPAMF_CSUMON_IDR_CSU_RO BIT(30) +#define MPAMF_CSUMON_IDR_HAS_CAPTURE BIT(31) + +/* MPAMF_MBWUMON_IDR - MPAM memory bandwidth usage monitor ID register */ +#define MPAMF_MBWUMON_IDR_NUM_MON GENMASK(15, 0) +#define MPAMF_MBWUMON_IDR_HAS_RWBW BIT(28) +#define MPAMF_MBWUMON_IDR_LWD BIT(29) +#define MPAMF_MBWUMON_IDR_HAS_LONG BIT(30) +#define MPAMF_MBWUMON_IDR_HAS_CAPTURE BIT(31) + +/* MPAMF_PARTID_NRW_IDR - MPAM PARTID narrowing ID register */ +#define MPAMF_PARTID_NRW_IDR_INTPARTID_MAX GENMASK(15, 0) + +/* MPAMF_IIDR - MPAM implementation ID register */ +#define MPAMF_IIDR_IMPLEMENTER GENMASK(11, 0) +#define MPAMF_IIDR_REVISION GENMASK(15, 12) +#define MPAMF_IIDR_VARIANT GENMASK(19, 16) +#define MPAMF_IIDR_PRODUCTID GENMASK(31, 20) + +/* MPAMF_AIDR - MPAM architecture ID register */ +#define MPAMF_AIDR_ARCH_MINOR_REV GENMASK(3, 0) +#define MPAMF_AIDR_ARCH_MAJOR_REV GENMASK(7, 4) + +/* MPAMCFG_PART_SEL - MPAM partition configuration selection register */ +#define MPAMCFG_PART_SEL_PARTID_SEL GENMASK(15, 0) +#define MPAMCFG_PART_SEL_INTERNAL BIT(16) +#define MPAMCFG_PART_SEL_RIS GENMASK(27, 24) + +/* MPAMCFG_CASSOC - MPAM cache maximum associativity partition configuration register */ +#define MPAMCFG_CASSOC_CASSOC GENMASK(15, 0) + +/* MPAMCFG_CMAX - MPAM cache capacity configuration register */ +#define MPAMCFG_CMAX_SOFTLIM BIT(31) +#define MPAMCFG_CMAX_CMAX GENMASK(15, 0) + +/* MPAMCFG_CMIN - MPAM cache capacity configuration register */ +#define MPAMCFG_CMIN_CMIN GENMASK(15, 0) + +/* + * MPAMCFG_MBW_MIN - MPAM memory minimum bandwidth partitioning configuration + * register + */ +#define MPAMCFG_MBW_MIN_MIN GENMASK(15, 0) + +/* + * MPAMCFG_MBW_MAX - MPAM memory maximum bandwidth partitioning configuration + * register + */ +#define MPAMCFG_MBW_MAX_MAX GENMASK(15, 0) +#define MPAMCFG_MBW_MAX_HARDLIM BIT(31) + +/* + * MPAMCFG_MBW_WINWD - MPAM memory bandwidth partitioning window width + * register + */ +#define MPAMCFG_MBW_WINWD_US_FRAC GENMASK(7, 0) +#define MPAMCFG_MBW_WINWD_US_INT GENMASK(23, 8) + +/* MPAMCFG_PRI - MPAM priority partitioning configuration register */ +#define MPAMCFG_PRI_INTPRI GENMASK(15, 0) +#define MPAMCFG_PRI_DSPRI GENMASK(31, 16) + +/* + * MPAMCFG_MBW_PROP - Memory bandwidth proportional stride partitioning + * configuration register + */ +#define MPAMCFG_MBW_PROP_STRIDEM1 GENMASK(15, 0) +#define MPAMCFG_MBW_PROP_EN BIT(31) + +/* + * MPAMCFG_INTPARTID - MPAM internal partition narrowing configuration register + */ +#define MPAMCFG_INTPARTID_INTPARTID GENMASK(15, 0) +#define MPAMCFG_INTPARTID_INTERNAL BIT(16) + +/* MSMON_CFG_MON_SEL - Memory system performance monitor selection register */ +#define MSMON_CFG_MON_SEL_MON_SEL GENMASK(15, 0) +#define MSMON_CFG_MON_SEL_RIS GENMASK(27, 24) + +/* MPAMF_ESR - MPAM Error Status Register */ +#define MPAMF_ESR_PARTID_MON GENMASK(15, 0) +#define MPAMF_ESR_PMG GENMASK(23, 16) +#define MPAMF_ESR_ERRCODE GENMASK(27, 24) +#define MPAMF_ESR_OVRWR BIT(31) +#define MPAMF_ESR_RIS GENMASK(35, 32) + +/* MPAMF_ECR - MPAM Error Control Register */ +#define MPAMF_ECR_INTEN BIT(0) + +/* Error conditions in accessing memory mapped registers */ +#define MPAM_ERRCODE_NONE 0 +#define MPAM_ERRCODE_PARTID_SEL_RANGE 1 +#define MPAM_ERRCODE_REQ_PARTID_RANGE 2 +#define MPAM_ERRCODE_MSMONCFG_ID_RANGE 3 +#define MPAM_ERRCODE_REQ_PMG_RANGE 4 +#define MPAM_ERRCODE_MONITOR_RANGE 5 +#define MPAM_ERRCODE_INTPARTID_RANGE 6 +#define MPAM_ERRCODE_UNEXPECTED_INTERNAL 7 +#define MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL 8 +#define MPAM_ERRCODE_RIS_NO_CONTROL 9 +#define MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL 10 +#define MPAM_ERRCODE_RIS_NO_MONITOR 11 + +/* + * MSMON_CFG_CSU_CTL - Memory system performance monitor configure cache storage + * usage monitor control register + * MSMON_CFG_MBWU_CTL - Memory system performance monitor configure memory + * bandwidth usage monitor control register + */ +#define MSMON_CFG_x_CTL_TYPE GENMASK(7, 0) +#define MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L BIT(15) +#define MSMON_CFG_x_CTL_MATCH_PARTID BIT(16) +#define MSMON_CFG_x_CTL_MATCH_PMG BIT(17) +#define MSMON_CFG_MBWU_CTL_SCLEN BIT(19) +#define MSMON_CFG_x_CTL_SUBTYPE GENMASK(22, 20) +#define MSMON_CFG_x_CTL_OFLOW_FRZ BIT(24) +#define MSMON_CFG_x_CTL_OFLOW_INTR BIT(25) +#define MSMON_CFG_x_CTL_OFLOW_STATUS BIT(26) +#define MSMON_CFG_x_CTL_CAPT_RESET BIT(27) +#define MSMON_CFG_x_CTL_CAPT_EVNT GENMASK(30, 28) +#define MSMON_CFG_x_CTL_EN BIT(31) + +#define MSMON_CFG_MBWU_CTL_TYPE_MBWU 0x42 +#define MSMON_CFG_CSU_CTL_TYPE_CSU 0x43 + +/* + * MSMON_CFG_CSU_FLT - Memory system performance monitor configure cache storage + * usage monitor filter register + * MSMON_CFG_MBWU_FLT - Memory system performance monitor configure memory + * bandwidth usage monitor filter register + */ +#define MSMON_CFG_x_FLT_PARTID GENMASK(15, 0) +#define MSMON_CFG_x_FLT_PMG GENMASK(23, 16) + +#define MSMON_CFG_MBWU_FLT_RWBW GENMASK(31, 30) +#define MSMON_CFG_CSU_FLT_XCL BIT(31) + +/* + * MSMON_CSU - Memory system performance monitor cache storage usage monitor + * register + * MSMON_CSU_CAPTURE - Memory system performance monitor cache storage usage + * capture register + * MSMON_MBWU - Memory system performance monitor memory bandwidth usage + * monitor register + * MSMON_MBWU_CAPTURE - Memory system performance monitor memory bandwidth usage + * capture register + */ +#define MSMON___VALUE GENMASK(30, 0) +#define MSMON___NRDY BIT(31) +#define MSMON___L_NRDY BIT(63) +#define MSMON___L_VALUE GENMASK(43, 0) +#define MSMON___LWD_VALUE GENMASK(62, 0) + +/* + * MSMON_CAPT_EVNT - Memory system performance monitoring capture event + * generation register + */ +#define MSMON_CAPT_EVNT_NOW BIT(0) + #endif /* MPAM_INTERNAL_H */ From 8f8d0ac1da7885c0d619636f93e0983239dc145c Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:43 +0000 Subject: [PATCH 57/94] arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 176 +++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 5 + 2 files changed, 180 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index f1dcf9bb14f2..51284f55ae9b 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -4,8 +4,10 @@ #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ #include +#include #include #include +#include #include #include #include @@ -17,6 +19,7 @@ #include #include #include +#include #include "mpam_internal.h" @@ -36,6 +39,25 @@ struct srcu_struct mpam_srcu; */ static atomic_t mpam_num_msc; +static int mpam_cpuhp_state; +static DEFINE_MUTEX(mpam_cpuhp_state_lock); + +/* + * mpam is enabled once all devices have been probed from CPU online callbacks, + * scheduled via this work_struct. If access to an MSC depends on a CPU that + * was not brought online at boot, this can happen surprisingly late. + */ +static DECLARE_WORK(mpam_enable_work, &mpam_enable); + +/* + * All mpam error interrupts indicate a software bug. On receipt, disable the + * driver. + */ +static DECLARE_WORK(mpam_broken_work, &mpam_disable); + +/* When mpam is disabled, the printed reason to aid debugging */ +static char *mpam_disable_reason; + /* * An MSC is a physical container for controls and monitors, each identified by * their RIS index. These share a base-address, interrupts and some MMIO @@ -106,6 +128,21 @@ static void mpam_free_garbage(void) } } +static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) +{ + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + return readl_relaxed(msc->mapped_hwpage + reg); +} + +static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg) +{ + lockdep_assert_held_once(&msc->part_sel_lock); + return __mpam_read_reg(msc, reg); +} + +#define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg) + static struct mpam_class * mpam_class_alloc(u8 level_idx, enum mpam_class_types type) { @@ -413,6 +450,86 @@ int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, return err; } +static int mpam_msc_hw_probe(struct mpam_msc *msc) +{ + u64 idr; + struct device *dev = &msc->pdev->dev; + + lockdep_assert_held(&msc->probe_lock); + + idr = __mpam_read_reg(msc, MPAMF_AIDR); + if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) { + dev_err_once(dev, "MSC does not match MPAM architecture v1.x\n"); + return -EIO; + } + + msc->probed = true; + + return 0; +} + +static int mpam_cpu_online(unsigned int cpu) +{ + return 0; +} + +/* Before mpam is enabled, try to probe new MSC */ +static int mpam_discovery_cpu_online(unsigned int cpu) +{ + int err = 0; + struct mpam_msc *msc; + bool new_device_probed = false; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + mutex_lock(&msc->probe_lock); + if (!msc->probed) + err = mpam_msc_hw_probe(msc); + mutex_unlock(&msc->probe_lock); + + if (err) + break; + new_device_probed = true; + } + + if (new_device_probed && !err) + schedule_work(&mpam_enable_work); + if (err) { + mpam_disable_reason = "error during probing"; + schedule_work(&mpam_broken_work); + } + + return err; +} + +static int mpam_cpu_offline(unsigned int cpu) +{ + return 0; +} + +static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online), + int (*offline)(unsigned int offline), + char *name) +{ + mutex_lock(&mpam_cpuhp_state_lock); + if (mpam_cpuhp_state) { + cpuhp_remove_state(mpam_cpuhp_state); + mpam_cpuhp_state = 0; + } + + mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, name, online, + offline); + if (mpam_cpuhp_state <= 0) { + pr_err("Failed to register cpuhp callbacks"); + mpam_cpuhp_state = 0; + } + mutex_unlock(&mpam_cpuhp_state_lock); +} + /* * An MSC can control traffic from a set of CPUs, but may only be accessible * from a (hopefully wider) set of CPUs. The common reason for this is power @@ -549,7 +666,8 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) } if (atomic_add_return(1, &mpam_num_msc) == fw_num_msc) - pr_info("Discovered all MSCs\n"); + mpam_register_cpuhp_callbacks(mpam_discovery_cpu_online, NULL, + "mpam:drv_probe"); return 0; } @@ -562,6 +680,62 @@ static struct platform_driver mpam_msc_driver = { .remove = mpam_msc_drv_remove, }; +static void mpam_enable_once(void) +{ + mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, + "mpam:online"); + + pr_info("MPAM enabled\n"); +} + +void mpam_disable(struct work_struct *ignored) +{ + struct mpam_msc *msc, *tmp; + + mutex_lock(&mpam_cpuhp_state_lock); + if (mpam_cpuhp_state) { + cpuhp_remove_state(mpam_cpuhp_state); + mpam_cpuhp_state = 0; + } + mutex_unlock(&mpam_cpuhp_state_lock); + + mutex_lock(&mpam_list_lock); + list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list) + mpam_msc_destroy(msc); + mutex_unlock(&mpam_list_lock); + mpam_free_garbage(); + + pr_err_once("MPAM disabled due to %s\n", mpam_disable_reason); +} + +/* + * Enable mpam once all devices have been probed. + * Scheduled by mpam_discovery_cpu_online() once all devices have been created. + * Also scheduled when new devices are probed when new CPUs come online. + */ +void mpam_enable(struct work_struct *work) +{ + static atomic_t once; + struct mpam_msc *msc; + bool all_devices_probed = true; + + /* Have we probed all the hw devices? */ + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + mutex_lock(&msc->probe_lock); + if (!msc->probed) + all_devices_probed = false; + mutex_unlock(&msc->probe_lock); + + if (!all_devices_probed) + break; + } + + if (all_devices_probed && !atomic_fetch_inc(&once)) + mpam_enable_once(); +} + static int __init mpam_msc_driver_init(void) { if (!system_supports_mpam()) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 51f791cc207b..4e1538d29783 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -48,6 +48,7 @@ struct mpam_msc { * properties become read-only and the lists are protected by SRCU. */ struct mutex probe_lock; + bool probed; unsigned long ris_idxs; u32 ris_max; @@ -137,6 +138,10 @@ struct mpam_msc_ris { extern struct srcu_struct mpam_srcu; extern struct list_head mpam_classes; +/* Scheduled work callback to enable mpam once all MSC have been probed */ +void mpam_enable(struct work_struct *work); +void mpam_disable(struct work_struct *work); + int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); From bd221f9f82afb616887e0b88b43fbb937479d744 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:44 +0000 Subject: [PATCH 58/94] arm_mpam: Probe hardware to find the supported partid/pmg values CPUs can generate traffic with a range of PARTID and PMG values, but each MSC may also have its own maximum size for these fields. Before MPAM can be used, the driver needs to probe each RIS on each MSC, to find the system-wide smallest value that can be used. The limits from requestors (e.g. CPUs) also need taking into account. While doing this, RIS entries that firmware didn't describe are created under MPAM_CLASS_UNKNOWN. This adds the low level MSC write accessors. While we're here, implement the mpam_register_requestor() call for the arch code to register the CPU limits. Future callers of this will tell us about the SMMU and ITS. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 148 +++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 6 ++ include/linux/arm_mpam.h | 14 +++ 3 files changed, 167 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 51284f55ae9b..3d9b87a9727a 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,15 @@ static atomic_t mpam_num_msc; static int mpam_cpuhp_state; static DEFINE_MUTEX(mpam_cpuhp_state_lock); +/* + * The smallest common values for any CPU or MSC in the system. + * Generating traffic outside this range will result in screaming interrupts. + */ +u16 mpam_partid_max; +u8 mpam_pmg_max; +static bool partid_max_init, partid_max_published; +static DEFINE_SPINLOCK(partid_max_lock); + /* * mpam is enabled once all devices have been probed from CPU online callbacks, * scheduled via this work_struct. If access to an MSC depends on a CPU that @@ -143,6 +153,70 @@ static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg) #define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg) +static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + writel_relaxed(val, msc->mapped_hwpage + reg); +} + +static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + lockdep_assert_held_once(&msc->part_sel_lock); + __mpam_write_reg(msc, reg, val); +} + +#define mpam_write_partsel_reg(msc, reg, val) _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val) + +static u64 mpam_msc_read_idr(struct mpam_msc *msc) +{ + u64 idr_high = 0, idr_low; + + lockdep_assert_held(&msc->part_sel_lock); + + idr_low = mpam_read_partsel_reg(msc, IDR); + if (FIELD_GET(MPAMF_IDR_EXT, idr_low)) + idr_high = mpam_read_partsel_reg(msc, IDR + 4); + + return (idr_high << 32) | idr_low; +} + +static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc) +{ + lockdep_assert_held(&msc->part_sel_lock); + + mpam_write_partsel_reg(msc, PART_SEL, partsel); +} + +static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc) +{ + u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) | + FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid); + + __mpam_part_sel_raw(partsel, msc); +} + +int mpam_register_requestor(u16 partid_max, u8 pmg_max) +{ + guard(spinlock)(&partid_max_lock); + if (!partid_max_init) { + mpam_partid_max = partid_max; + mpam_pmg_max = pmg_max; + partid_max_init = true; + } else if (!partid_max_published) { + mpam_partid_max = min(mpam_partid_max, partid_max); + mpam_pmg_max = min(mpam_pmg_max, pmg_max); + } else { + /* New requestors can't lower the values */ + if (partid_max < mpam_partid_max || pmg_max < mpam_pmg_max) + return -EBUSY; + } + + return 0; +} +EXPORT_SYMBOL(mpam_register_requestor); + static struct mpam_class * mpam_class_alloc(u8 level_idx, enum mpam_class_types type) { @@ -450,9 +524,35 @@ int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, return err; } +static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, + u8 ris_idx) +{ + int err; + struct mpam_msc_ris *ris; + + lockdep_assert_held(&mpam_list_lock); + + if (!test_bit(ris_idx, &msc->ris_idxs)) { + err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN, + 0, 0); + if (err) + return ERR_PTR(err); + } + + list_for_each_entry(ris, &msc->ris, msc_list) { + if (ris->ris_idx == ris_idx) + return ris; + } + + return ERR_PTR(-ENOENT); +} + static int mpam_msc_hw_probe(struct mpam_msc *msc) { u64 idr; + u16 partid_max; + u8 ris_idx, pmg_max; + struct mpam_msc_ris *ris; struct device *dev = &msc->pdev->dev; lockdep_assert_held(&msc->probe_lock); @@ -463,6 +563,40 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) return -EIO; } + /* Grab an IDR value to find out how many RIS there are */ + mutex_lock(&msc->part_sel_lock); + idr = mpam_msc_read_idr(msc); + mutex_unlock(&msc->part_sel_lock); + + msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr); + + /* Use these values so partid/pmg always starts with a valid value */ + msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr); + msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); + + for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) { + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris_idx, 0, msc); + idr = mpam_msc_read_idr(msc); + mutex_unlock(&msc->part_sel_lock); + + partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr); + pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); + msc->partid_max = min(msc->partid_max, partid_max); + msc->pmg_max = min(msc->pmg_max, pmg_max); + + mutex_lock(&mpam_list_lock); + ris = mpam_get_or_create_ris(msc, ris_idx); + mutex_unlock(&mpam_list_lock); + if (IS_ERR(ris)) + return PTR_ERR(ris); + } + + spin_lock(&partid_max_lock); + mpam_partid_max = min(mpam_partid_max, msc->partid_max); + mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max); + spin_unlock(&partid_max_lock); + msc->probed = true; return 0; @@ -682,10 +816,20 @@ static struct platform_driver mpam_msc_driver = { static void mpam_enable_once(void) { + /* + * Once the cpuhp callbacks have been changed, mpam_partid_max can no + * longer change. + */ + spin_lock(&partid_max_lock); + partid_max_published = true; + spin_unlock(&partid_max_lock); + mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); - pr_info("MPAM enabled\n"); + /* Use printk() to avoid the pr_fmt adding the function name. */ + printk(KERN_INFO "MPAM enabled with %u PARTIDs and %u PMGs\n", + mpam_partid_max + 1, mpam_pmg_max + 1); } void mpam_disable(struct work_struct *ignored) @@ -751,4 +895,6 @@ static int __init mpam_msc_driver_init(void) return platform_driver_register(&mpam_msc_driver); } + +/* Must occur after arm64_mpam_register_cpus() from arch_initcall() */ subsys_initcall(mpam_msc_driver_init); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 4e1538d29783..768a58a3ab27 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -49,6 +49,8 @@ struct mpam_msc { */ struct mutex probe_lock; bool probed; + u16 partid_max; + u8 pmg_max; unsigned long ris_idxs; u32 ris_max; @@ -138,6 +140,10 @@ struct mpam_msc_ris { extern struct srcu_struct mpam_srcu; extern struct list_head mpam_classes; +/* System wide partid/pmg values */ +extern u16 mpam_partid_max; +extern u8 mpam_pmg_max; + /* Scheduled work callback to enable mpam once all MSC have been probed */ void mpam_enable(struct work_struct *work); void mpam_disable(struct work_struct *work); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 13a8ac5c2cbd..7f00c5285a32 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -49,4 +49,18 @@ static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, } #endif +/** + * mpam_register_requestor() - Register a requestor with the MPAM driver + * @partid_max: The maximum PARTID value the requestor can generate. + * @pmg_max: The maximum PMG value the requestor can generate. + * + * Registers a requestor with the MPAM driver to ensure the chosen system-wide + * minimum PARTID and PMG values will allow the requestors features to be used. + * + * Returns an error if the registration is too late, and a larger PARTID/PMG + * value has been advertised to user-space. In this case the requestor should + * not use its MPAM features. Returns 0 on success. + */ +int mpam_register_requestor(u16 partid_max, u8 pmg_max); + #endif /* __LINUX_ARM_MPAM_H */ From d02beb06ca2a624e17004659c79d26a23484aa8b Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:45 +0000 Subject: [PATCH 59/94] arm_mpam: Add helpers for managing the locking around the mon_sel registers The MSC MON_SEL register needs to be accessed from hardirq for the overflow interrupt, and when taking an IPI to access these registers on platforms where MSC are not accessible from every CPU. This makes an irqsave spinlock the obvious lock to protect these registers. On systems with SCMI or PCC mailboxes it must be able to sleep, meaning a mutex must be used. The SCMI or PCC platforms can't support an overflow interrupt, and can't access the registers from hardirq context. Clearly these two can't exist for one MSC at the same time. Add helpers for the MON_SEL locking. For now, use a irqsave spinlock and only support 'real' MMIO platforms. In the future this lock will be split in two allowing SCMI/PCC platforms to take a mutex. Because there are contexts where the SCMI/PCC platforms can't make an access, mpam_mon_sel_lock() needs to be able to fail. Do this now, so that all the error handling on these paths is present. This allows the relevant paths to fail if they are needed on a platform where this isn't possible, instead of having to make explicit checks of the interface type. Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 2 ++ drivers/resctrl/mpam_internal.h | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 3d9b87a9727a..dcbc9cf5581d 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -740,6 +741,7 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) if (err) return ERR_PTR(err); + mpam_mon_sel_lock_init(msc); msc->id = pdev->id; msc->pdev = pdev; INIT_LIST_HEAD_RCU(&msc->all_msc_list); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 768a58a3ab27..97f02cf92d7a 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #define MPAM_MSC_MAX_NUM_RIS 16 @@ -65,12 +66,52 @@ struct mpam_msc { */ struct mutex part_sel_lock; + /* + * mon_sel_lock protects access to the MSC hardware registers that are + * affected by MPAMCFG_MON_SEL, and the mbwu_state. + * Access to mon_sel is needed from both process and interrupt contexts, + * but is complicated by firmware-backed platforms that can't make any + * access unless they can sleep. + * Always use the mpam_mon_sel_lock() helpers. + * Accesses to mon_sel need to be able to fail if they occur in the wrong + * context. + * If needed, take msc->probe_lock first. + */ + raw_spinlock_t _mon_sel_lock; + unsigned long _mon_sel_flags; + void __iomem *mapped_hwpage; size_t mapped_hwpage_sz; struct mpam_garbage garbage; }; +/* Returning false here means accesses to mon_sel must fail and report an error. */ +static inline bool __must_check mpam_mon_sel_lock(struct mpam_msc *msc) +{ + /* Locking will require updating to support a firmware backed interface */ + if (WARN_ON_ONCE(msc->iface != MPAM_IFACE_MMIO)) + return false; + + raw_spin_lock_irqsave(&msc->_mon_sel_lock, msc->_mon_sel_flags); + return true; +} + +static inline void mpam_mon_sel_unlock(struct mpam_msc *msc) +{ + raw_spin_unlock_irqrestore(&msc->_mon_sel_lock, msc->_mon_sel_flags); +} + +static inline void mpam_mon_sel_lock_held(struct mpam_msc *msc) +{ + lockdep_assert_held_once(&msc->_mon_sel_lock); +} + +static inline void mpam_mon_sel_lock_init(struct mpam_msc *msc) +{ + raw_spin_lock_init(&msc->_mon_sel_lock); +} + struct mpam_class { /* mpam_components in this class */ struct list_head components; From 8c90dc68a5de4349ef9ba51449fb0a29cd690547 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:46 +0000 Subject: [PATCH 60/94] arm_mpam: Probe the hardware features resctrl supports Expand the probing support with the control and monitor types we can use with resctrl. CC: Dave Martin Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 149 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 33 +++++++ 2 files changed, 182 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index dcbc9cf5581d..ff561a08cd0d 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -170,6 +170,22 @@ static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 va #define mpam_write_partsel_reg(msc, reg, val) _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val) +static inline u32 _mpam_read_monsel_reg(struct mpam_msc *msc, u16 reg) +{ + mpam_mon_sel_lock_held(msc); + return __mpam_read_reg(msc, reg); +} + +#define mpam_read_monsel_reg(msc, reg) _mpam_read_monsel_reg(msc, MSMON_##reg) + +static inline void _mpam_write_monsel_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + mpam_mon_sel_lock_held(msc); + __mpam_write_reg(msc, reg, val); +} + +#define mpam_write_monsel_reg(msc, reg, val) _mpam_write_monsel_reg(msc, MSMON_##reg, val) + static u64 mpam_msc_read_idr(struct mpam_msc *msc) { u64 idr_high = 0, idr_low; @@ -548,6 +564,133 @@ static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, return ERR_PTR(-ENOENT); } +/* + * IHI009A.a has this nugget: "If a monitor does not support automatic behaviour + * of NRDY, software can use this bit for any purpose" - so hardware might not + * implement this - but it isn't RES0. + * + * Try and see what values stick in this bit. If we can write either value, + * its probably not implemented by hardware. + */ +static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) +{ + u32 now; + u64 mon_sel; + bool can_set, can_clear; + struct mpam_msc *msc = ris->vmsc->msc; + + if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + return false; + + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + _mpam_write_monsel_reg(msc, mon_reg, mon_sel); + + _mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY); + now = _mpam_read_monsel_reg(msc, mon_reg); + can_set = now & MSMON___NRDY; + + _mpam_write_monsel_reg(msc, mon_reg, 0); + now = _mpam_read_monsel_reg(msc, mon_reg); + can_clear = !(now & MSMON___NRDY); + mpam_mon_sel_unlock(msc); + + return (!can_set || !can_clear); +} + +#define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg) \ + _mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg) + +static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) +{ + int err; + struct mpam_msc *msc = ris->vmsc->msc; + struct device *dev = &msc->pdev->dev; + struct mpam_props *props = &ris->props; + + lockdep_assert_held(&msc->probe_lock); + lockdep_assert_held(&msc->part_sel_lock); + + /* Cache Portion partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) { + u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR); + + props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features); + if (props->cpbm_wd) + mpam_set_feature(mpam_feat_cpor_part, props); + } + + /* Memory bandwidth partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) { + u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR); + + /* portion bitmap resolution */ + props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features); + if (props->mbw_pbm_bits && + FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features)) + mpam_set_feature(mpam_feat_mbw_part, props); + + props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features); + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) + mpam_set_feature(mpam_feat_mbw_max, props); + } + + /* Performance Monitoring */ + if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) { + u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR); + + /* + * If the firmware max-nrdy-us property is missing, the + * CSU counters can't be used. Should we wait forever? + */ + err = device_property_read_u32(&msc->pdev->dev, + "arm,not-ready-us", + &msc->nrdy_usec); + + if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) { + u32 csumonidr; + + csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR); + props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr); + if (props->num_csu_mon) { + bool hw_managed; + + mpam_set_feature(mpam_feat_msmon_csu, props); + + /* Is NRDY hardware managed? */ + hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU); + if (hw_managed) + mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props); + } + + /* + * Accept the missing firmware property if NRDY appears + * un-implemented. + */ + if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props)) + dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware."); + } + if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) { + bool hw_managed; + u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR); + + props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr); + if (props->num_mbwu_mon) + mpam_set_feature(mpam_feat_msmon_mbwu, props); + + /* Is NRDY hardware managed? */ + hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); + if (hw_managed) + mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); + + /* + * Don't warn about any missing firmware property for + * MBWU NRDY - it doesn't make any sense! + */ + } + } +} + static int mpam_msc_hw_probe(struct mpam_msc *msc) { u64 idr; @@ -591,6 +734,12 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) mutex_unlock(&mpam_list_lock); if (IS_ERR(ris)) return PTR_ERR(ris); + ris->idr = idr; + + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris_idx, 0, msc); + mpam_ris_hw_probe(ris); + mutex_unlock(&msc->part_sel_lock); } spin_lock(&partid_max_lock); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 97f02cf92d7a..cdaa019367e9 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -5,12 +5,14 @@ #define MPAM_INTERNAL_H #include +#include #include #include #include #include #include #include +#include #include #define MPAM_MSC_MAX_NUM_RIS 16 @@ -112,6 +114,33 @@ static inline void mpam_mon_sel_lock_init(struct mpam_msc *msc) raw_spin_lock_init(&msc->_mon_sel_lock); } +/* Bits for mpam features bitmaps */ +enum mpam_device_features { + mpam_feat_cpor_part, + mpam_feat_mbw_part, + mpam_feat_mbw_min, + mpam_feat_mbw_max, + mpam_feat_msmon, + mpam_feat_msmon_csu, + mpam_feat_msmon_csu_hw_nrdy, + mpam_feat_msmon_mbwu, + mpam_feat_msmon_mbwu_hw_nrdy, + MPAM_FEATURE_LAST +}; + +struct mpam_props { + DECLARE_BITMAP(features, MPAM_FEATURE_LAST); + + u16 cpbm_wd; + u16 mbw_pbm_bits; + u16 bwa_wd; + u16 num_csu_mon; + u16 num_mbwu_mon; +}; + +#define mpam_has_feature(_feat, x) test_bit(_feat, (x)->features) +#define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) + struct mpam_class { /* mpam_components in this class */ struct list_head components; @@ -151,6 +180,8 @@ struct mpam_vmsc { /* mpam_msc_ris in this vmsc */ struct list_head ris; + struct mpam_props props; + /* All RIS in this vMSC are members of this MSC */ struct mpam_msc *msc; @@ -162,6 +193,8 @@ struct mpam_vmsc { struct mpam_msc_ris { u8 ris_idx; + u64 idr; + struct mpam_props props; cpumask_t affinity; From c10ca83a778304f976cbea60bbbb2f1fac003f5c Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:47 +0000 Subject: [PATCH 61/94] arm_mpam: Merge supported features during mpam_enable() into mpam_class To make a decision about whether to expose an mpam class as a resctrl resource we need to know its overall supported features and properties. Once we've probed all the resources, we can walk the tree and produce overall values by merging the bitmaps. This eliminates features that are only supported by some MSC that make up a component or class. If bitmap properties are mismatched within a component we cannot support the mismatched feature. Care has to be taken as vMSC may hold mismatched RIS. Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 214 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 3 + 2 files changed, 217 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index ff561a08cd0d..f9ac88bf06b7 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -965,6 +965,216 @@ static struct platform_driver mpam_msc_driver = { .remove = mpam_msc_drv_remove, }; +/* Any of these features mean the BWA_WD field is valid. */ +static bool mpam_has_bwa_wd_feature(struct mpam_props *props) +{ + if (mpam_has_feature(mpam_feat_mbw_min, props)) + return true; + if (mpam_has_feature(mpam_feat_mbw_max, props)) + return true; + return false; +} + +#define MISMATCHED_HELPER(parent, child, helper, field, alias) \ + helper(parent) && \ + ((helper(child) && (parent)->field != (child)->field) || \ + (!helper(child) && !(alias))) + +#define MISMATCHED_FEAT(parent, child, feat, field, alias) \ + mpam_has_feature((feat), (parent)) && \ + ((mpam_has_feature((feat), (child)) && (parent)->field != (child)->field) || \ + (!mpam_has_feature((feat), (child)) && !(alias))) + +#define CAN_MERGE_FEAT(parent, child, feat, alias) \ + (alias) && !mpam_has_feature((feat), (parent)) && \ + mpam_has_feature((feat), (child)) + +/* + * Combine two props fields. + * If this is for controls that alias the same resource, it is safe to just + * copy the values over. If two aliasing controls implement the same scheme + * a safe value must be picked. + * For non-aliasing controls, these control different resources, and the + * resulting safe value must be compatible with both. When merging values in + * the tree, all the aliasing resources must be handled first. + * On mismatch, parent is modified. + */ +static void __props_mismatch(struct mpam_props *parent, + struct mpam_props *child, bool alias) +{ + if (CAN_MERGE_FEAT(parent, child, mpam_feat_cpor_part, alias)) { + parent->cpbm_wd = child->cpbm_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_cpor_part, + cpbm_wd, alias)) { + pr_debug("cleared cpor_part\n"); + mpam_clear_feature(mpam_feat_cpor_part, parent); + parent->cpbm_wd = 0; + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_mbw_part, alias)) { + parent->mbw_pbm_bits = child->mbw_pbm_bits; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_mbw_part, + mbw_pbm_bits, alias)) { + pr_debug("cleared mbw_part\n"); + mpam_clear_feature(mpam_feat_mbw_part, parent); + parent->mbw_pbm_bits = 0; + } + + /* bwa_wd is a count of bits, fewer bits means less precision */ + if (alias && !mpam_has_bwa_wd_feature(parent) && + mpam_has_bwa_wd_feature(child)) { + parent->bwa_wd = child->bwa_wd; + } else if (MISMATCHED_HELPER(parent, child, mpam_has_bwa_wd_feature, + bwa_wd, alias)) { + pr_debug("took the min bwa_wd\n"); + parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd); + } + + /* For num properties, take the minimum */ + if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) { + parent->num_csu_mon = child->num_csu_mon; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_csu, + num_csu_mon, alias)) { + pr_debug("took the min num_csu_mon\n"); + parent->num_csu_mon = min(parent->num_csu_mon, + child->num_csu_mon); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_mbwu, alias)) { + parent->num_mbwu_mon = child->num_mbwu_mon; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_mbwu, + num_mbwu_mon, alias)) { + pr_debug("took the min num_mbwu_mon\n"); + parent->num_mbwu_mon = min(parent->num_mbwu_mon, + child->num_mbwu_mon); + } + + if (alias) { + /* Merge features for aliased resources */ + bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST); + } else { + /* Clear missing features for non aliasing */ + bitmap_and(parent->features, parent->features, child->features, MPAM_FEATURE_LAST); + } +} + +/* + * If a vmsc doesn't match class feature/configuration, do the right thing(tm). + * For 'num' properties we can just take the minimum. + * For properties where the mismatched unused bits would make a difference, we + * nobble the class feature, as we can't configure all the resources. + * e.g. The L3 cache is composed of two resources with 13 and 17 portion + * bitmaps respectively. + */ +static void +__class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc) +{ + struct mpam_props *cprops = &class->props; + struct mpam_props *vprops = &vmsc->props; + struct device *dev = &vmsc->msc->pdev->dev; + + lockdep_assert_held(&mpam_list_lock); /* we modify class */ + + dev_dbg(dev, "Merging features for class:0x%lx &= vmsc:0x%lx\n", + (long)cprops->features, (long)vprops->features); + + /* Take the safe value for any common features */ + __props_mismatch(cprops, vprops, false); +} + +static void +__vmsc_props_mismatch(struct mpam_vmsc *vmsc, struct mpam_msc_ris *ris) +{ + struct mpam_props *rprops = &ris->props; + struct mpam_props *vprops = &vmsc->props; + struct device *dev = &vmsc->msc->pdev->dev; + + lockdep_assert_held(&mpam_list_lock); /* we modify vmsc */ + + dev_dbg(dev, "Merging features for vmsc:0x%lx |= ris:0x%lx\n", + (long)vprops->features, (long)rprops->features); + + /* + * Merge mismatched features - Copy any features that aren't common, + * but take the safe value for any common features. + */ + __props_mismatch(vprops, rprops, true); +} + +/* + * Copy the first component's first vMSC's properties and features to the + * class. __class_props_mismatch() will remove conflicts. + * It is not possible to have a class with no components, or a component with + * no resources. The vMSC properties have already been built. + */ +static void mpam_enable_init_class_features(struct mpam_class *class) +{ + struct mpam_vmsc *vmsc; + struct mpam_component *comp; + + comp = list_first_entry(&class->components, + struct mpam_component, class_list); + vmsc = list_first_entry(&comp->vmsc, + struct mpam_vmsc, comp_list); + + class->props = vmsc->props; +} + +static void mpam_enable_merge_vmsc_features(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + struct mpam_class *class = comp->class; + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + list_for_each_entry(ris, &vmsc->ris, vmsc_list) { + __vmsc_props_mismatch(vmsc, ris); + class->nrdy_usec = max(class->nrdy_usec, + vmsc->msc->nrdy_usec); + } + } +} + +static void mpam_enable_merge_class_features(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + struct mpam_class *class = comp->class; + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) + __class_props_mismatch(class, vmsc); +} + +/* + * Merge all the common resource features into class. + * vmsc features are bitwise-or'd together by mpam_enable_merge_vmsc_features() + * as the first step so that mpam_enable_init_class_features() can initialise + * the class with a representative set of features. + * Next the mpam_enable_merge_class_features() bitwise-and's all the vmsc + * features to form the class features. + * Other features are the min/max as appropriate. + * + * To avoid walking the whole tree twice, the class->nrdy_usec property is + * updated when working with the vmsc as it is a max(), and doesn't need + * initialising first. + */ +static void mpam_enable_merge_features(struct list_head *all_classes_list) +{ + struct mpam_class *class; + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, all_classes_list, classes_list) { + list_for_each_entry(comp, &class->components, class_list) + mpam_enable_merge_vmsc_features(comp); + + mpam_enable_init_class_features(class); + + list_for_each_entry(comp, &class->components, class_list) + mpam_enable_merge_class_features(comp); + } +} + static void mpam_enable_once(void) { /* @@ -975,6 +1185,10 @@ static void mpam_enable_once(void) partid_max_published = true; spin_unlock(&partid_max_lock); + mutex_lock(&mpam_list_lock); + mpam_enable_merge_features(&mpam_classes); + mutex_unlock(&mpam_list_lock); + mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index cdaa019367e9..4749ac223adc 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -140,6 +140,7 @@ struct mpam_props { #define mpam_has_feature(_feat, x) test_bit(_feat, (x)->features) #define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) +#define mpam_clear_feature(_feat, x) clear_bit(_feat, (x)->features) struct mpam_class { /* mpam_components in this class */ @@ -147,6 +148,8 @@ struct mpam_class { cpumask_t affinity; + struct mpam_props props; + u32 nrdy_usec; u8 level; enum mpam_class_types type; From f188a36ca2416e8090453eacbabd2925b20eb906 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:48 +0000 Subject: [PATCH 62/94] arm_mpam: Reset MSC controls from cpuhp callbacks When a CPU comes online, it may bring a newly accessible MSC with it. Only the default partid has its value reset by hardware, and even then the MSC might not have been reset since its config was previously dirtied. e.g. Kexec. Any in-use partid must have its configuration restored, or reset. In-use partids may be held in caches and evicted later. MSC are also reset when CPUs are taken offline to cover cases where firmware doesn't reset the MSC over reboot using UEFI, or kexec where there is no firmware involvement. If the configuration for a RIS has not been touched since it was brought online, it does not need resetting again. To reset, write the maximum values for all discovered controls. CC: Rohit Mathew Signed-off-by: James Morse Reviewed-by: Fenghua Yu Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Gavin Shan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 109 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 3 + 2 files changed, 112 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index f9ac88bf06b7..4bd4d57a3baa 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -752,8 +753,104 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) return 0; } +static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) +{ + u32 num_words, msb; + u32 bm = ~0; + int i; + + lockdep_assert_held(&msc->part_sel_lock); + + if (wd == 0) + return; + + /* + * Write all ~0 to all but the last 32bit-word, which may + * have fewer bits... + */ + num_words = DIV_ROUND_UP(wd, 32); + for (i = 0; i < num_words - 1; i++, reg += sizeof(bm)) + __mpam_write_reg(msc, reg, bm); + + /* + * ....and then the last (maybe) partial 32bit word. When wd is a + * multiple of 32, msb should be 31 to write a full 32bit word. + */ + msb = (wd - 1) % 32; + bm = GENMASK(msb, 0); + __mpam_write_reg(msc, reg, bm); +} + +static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) +{ + struct mpam_msc *msc = ris->vmsc->msc; + struct mpam_props *rprops = &ris->props; + + WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); + + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris->ris_idx, partid, msc); + + if (mpam_has_feature(mpam_feat_cpor_part, rprops)) + mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); + + if (mpam_has_feature(mpam_feat_mbw_part, rprops)) + mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); + + if (mpam_has_feature(mpam_feat_mbw_min, rprops)) + mpam_write_partsel_reg(msc, MBW_MIN, 0); + + if (mpam_has_feature(mpam_feat_mbw_max, rprops)) + mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); + + mutex_unlock(&msc->part_sel_lock); +} + +static void mpam_reset_ris(struct mpam_msc_ris *ris) +{ + u16 partid, partid_max; + + WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); + + if (ris->in_reset_state) + return; + + spin_lock(&partid_max_lock); + partid_max = mpam_partid_max; + spin_unlock(&partid_max_lock); + for (partid = 0; partid <= partid_max; partid++) + mpam_reset_ris_partid(ris, partid); +} + +static void mpam_reset_msc(struct mpam_msc *msc, bool online) +{ + struct mpam_msc_ris *ris; + + list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) { + mpam_reset_ris(ris); + + /* + * Set in_reset_state when coming online. The reset state + * for non-zero partid may be lost while the CPUs are offline. + */ + ris->in_reset_state = online; + } +} + static int mpam_cpu_online(unsigned int cpu) { + struct mpam_msc *msc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + if (atomic_fetch_inc(&msc->online_refs) == 0) + mpam_reset_msc(msc, true); + } + return 0; } @@ -792,6 +889,18 @@ static int mpam_discovery_cpu_online(unsigned int cpu) static int mpam_cpu_offline(unsigned int cpu) { + struct mpam_msc *msc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + if (atomic_dec_and_test(&msc->online_refs)) + mpam_reset_msc(msc, false); + } + return 0; } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 4749ac223adc..dec485cd8a91 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -5,6 +5,7 @@ #define MPAM_INTERNAL_H #include +#include #include #include #include @@ -45,6 +46,7 @@ struct mpam_msc { enum mpam_msc_iface iface; u32 nrdy_usec; cpumask_t accessibility; + atomic_t online_refs; /* * probe_lock is only taken during discovery. After discovery these @@ -198,6 +200,7 @@ struct mpam_msc_ris { u8 ris_idx; u64 idr; struct mpam_props props; + bool in_reset_state; cpumask_t affinity; From 475228d15dd653584b840b8e6c5828cdc3884b1c Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:49 +0000 Subject: [PATCH 63/94] arm_mpam: Add a helper to touch an MSC from any CPU Resetting RIS entries from the cpuhp callback is easy as the callback occurs on the correct CPU. This won't be true for any other caller that wants to reset or configure an MSC. Add a helper that schedules the provided function if necessary. Callers should take the cpuhp lock to prevent the cpuhp callbacks from changing the MSC state. Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 37 +++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 4bd4d57a3baa..7941b093396e 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -806,20 +806,51 @@ static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) mutex_unlock(&msc->part_sel_lock); } -static void mpam_reset_ris(struct mpam_msc_ris *ris) +/* + * Called via smp_call_on_cpu() to prevent migration, while still being + * pre-emptible. + */ +static int mpam_reset_ris(void *arg) { u16 partid, partid_max; + struct mpam_msc_ris *ris = arg; WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); if (ris->in_reset_state) - return; + return 0; spin_lock(&partid_max_lock); partid_max = mpam_partid_max; spin_unlock(&partid_max_lock); for (partid = 0; partid <= partid_max; partid++) mpam_reset_ris_partid(ris, partid); + + return 0; +} + +/* + * Get the preferred CPU for this MSC. If it is accessible from this CPU, + * this CPU is preferred. This can be preempted/migrated, it will only result + * in more work. + */ +static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc) +{ + int cpu = raw_smp_processor_id(); + + if (cpumask_test_cpu(cpu, &msc->accessibility)) + return cpu; + + return cpumask_first_and(&msc->accessibility, cpu_online_mask); +} + +static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg) +{ + lockdep_assert_irqs_enabled(); + lockdep_assert_cpus_held(); + WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); + + return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true); } static void mpam_reset_msc(struct mpam_msc *msc, bool online) @@ -827,7 +858,7 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online) struct mpam_msc_ris *ris; list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) { - mpam_reset_ris(ris); + mpam_touch_msc(msc, &mpam_reset_ris, ris); /* * Set in_reset_state when coming online. The reset state From 3bd04fe7d807bbdcfe75b29ca82fae4e2d7dc524 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:50 +0000 Subject: [PATCH 64/94] arm_mpam: Extend reset logic to allow devices to be reset any time cpuhp callbacks aren't the only time the MSC configuration may need to be reset. Resctrl has an API call to reset a class. If an MPAM error interrupt arrives it indicates the driver has misprogrammed an MSC. The safest thing to do is reset all the MSCs and disable MPAM. Add a helper to reset RIS via their class. Call this from mpam_disable(), which can be scheduled from the error interrupt handler. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 57 ++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 7941b093396e..7943d174b3f4 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -808,15 +808,13 @@ static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) /* * Called via smp_call_on_cpu() to prevent migration, while still being - * pre-emptible. + * pre-emptible. Caller must hold mpam_srcu. */ static int mpam_reset_ris(void *arg) { u16 partid, partid_max; struct mpam_msc_ris *ris = arg; - WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); - if (ris->in_reset_state) return 0; @@ -1337,8 +1335,55 @@ static void mpam_enable_once(void) mpam_partid_max + 1, mpam_pmg_max + 1); } +static void mpam_reset_component_locked(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_msc *msc = vmsc->msc; + struct mpam_msc_ris *ris; + + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!ris->in_reset_state) + mpam_touch_msc(msc, mpam_reset_ris, ris); + ris->in_reset_state = true; + } + } +} + +static void mpam_reset_class_locked(struct mpam_class *class) +{ + struct mpam_component *comp; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(comp, &class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) + mpam_reset_component_locked(comp); +} + +static void mpam_reset_class(struct mpam_class *class) +{ + cpus_read_lock(); + mpam_reset_class_locked(class); + cpus_read_unlock(); +} + +/* + * Called in response to an error IRQ. + * All of MPAMs errors indicate a software bug, restore any modified + * controls to their reset values. + */ void mpam_disable(struct work_struct *ignored) { + int idx; + struct mpam_class *class; struct mpam_msc *msc, *tmp; mutex_lock(&mpam_cpuhp_state_lock); @@ -1348,6 +1393,12 @@ void mpam_disable(struct work_struct *ignored) } mutex_unlock(&mpam_cpuhp_state_lock); + idx = srcu_read_lock(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) + mpam_reset_class(class); + srcu_read_unlock(&mpam_srcu, idx); + mutex_lock(&mpam_list_lock); list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list) mpam_msc_destroy(msc); From 49aa621c4dcaf8e3cfeb9e73d07a9746b889f9e8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:51 +0000 Subject: [PATCH 65/94] arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Rohit Mathew Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 280 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 13 ++ 2 files changed, 293 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 7943d174b3f4..21fccc3ff002 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -200,6 +203,35 @@ static u64 mpam_msc_read_idr(struct mpam_msc *msc) return (idr_high << 32) | idr_low; } +static void mpam_msc_clear_esr(struct mpam_msc *msc) +{ + u64 esr_low = __mpam_read_reg(msc, MPAMF_ESR); + + if (!esr_low) + return; + + /* + * Clearing the high/low bits of MPAMF_ESR can not be atomic. + * Clear the top half first, so that the pending error bits in the + * lower half prevent hardware from updating either half of the + * register. + */ + if (msc->has_extd_esr) + __mpam_write_reg(msc, MPAMF_ESR + 4, 0); + __mpam_write_reg(msc, MPAMF_ESR, 0); +} + +static u64 mpam_msc_read_esr(struct mpam_msc *msc) +{ + u64 esr_high = 0, esr_low; + + esr_low = __mpam_read_reg(msc, MPAMF_ESR); + if (msc->has_extd_esr) + esr_high = __mpam_read_reg(msc, MPAMF_ESR + 4); + + return (esr_high << 32) | esr_low; +} + static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc) { lockdep_assert_held(&msc->part_sel_lock); @@ -729,6 +761,7 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); msc->partid_max = min(msc->partid_max, partid_max); msc->pmg_max = min(msc->pmg_max, pmg_max); + msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXTD_ESR, idr); mutex_lock(&mpam_list_lock); ris = mpam_get_or_create_ris(msc, ris_idx); @@ -743,6 +776,9 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) mutex_unlock(&msc->part_sel_lock); } + /* Clear any stale errors */ + mpam_msc_clear_esr(msc); + spin_lock(&partid_max_lock); mpam_partid_max = min(mpam_partid_max, msc->partid_max); mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max); @@ -866,6 +902,13 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online) } } +static void _enable_percpu_irq(void *_irq) +{ + int *irq = _irq; + + enable_percpu_irq(*irq, IRQ_TYPE_NONE); +} + static int mpam_cpu_online(unsigned int cpu) { struct mpam_msc *msc; @@ -876,6 +919,9 @@ static int mpam_cpu_online(unsigned int cpu) if (!cpumask_test_cpu(cpu, &msc->accessibility)) continue; + if (msc->reenable_error_ppi) + _enable_percpu_irq(&msc->reenable_error_ppi); + if (atomic_fetch_inc(&msc->online_refs) == 0) mpam_reset_msc(msc, true); } @@ -926,6 +972,9 @@ static int mpam_cpu_offline(unsigned int cpu) if (!cpumask_test_cpu(cpu, &msc->accessibility)) continue; + if (msc->reenable_error_ppi) + disable_percpu_irq(msc->reenable_error_ppi); + if (atomic_dec_and_test(&msc->online_refs)) mpam_reset_msc(msc, false); } @@ -952,6 +1001,42 @@ static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online), mutex_unlock(&mpam_cpuhp_state_lock); } +static int __setup_ppi(struct mpam_msc *msc) +{ + int cpu; + + msc->error_dev_id = alloc_percpu(struct mpam_msc *); + if (!msc->error_dev_id) + return -ENOMEM; + + for_each_cpu(cpu, &msc->accessibility) + *per_cpu_ptr(msc->error_dev_id, cpu) = msc; + + return 0; +} + +static int mpam_msc_setup_error_irq(struct mpam_msc *msc) +{ + int irq; + + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + return 0; + + /* Allocate and initialise the percpu device pointer for PPI */ + if (irq_is_percpu(irq)) + return __setup_ppi(msc); + + /* sanity check: shared interrupts can be routed anywhere? */ + if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) { + pr_err_once("msc:%u is a private resource with a shared error interrupt", + msc->id); + return -EINVAL; + } + + return 0; +} + /* * An MSC can control traffic from a set of CPUs, but may only be accessible * from a (hopefully wider) set of CPUs. The common reason for this is power @@ -1028,6 +1113,9 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) if (err) return ERR_PTR(err); + err = devm_mutex_init(dev, &msc->error_irq_lock); + if (err) + return ERR_PTR(err); mpam_mon_sel_lock_init(msc); msc->id = pdev->id; msc->pdev = pdev; @@ -1040,6 +1128,10 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) return ERR_PTR(-EINVAL); } + err = mpam_msc_setup_error_irq(msc); + if (err) + return ERR_PTR(err); + if (device_property_read_u32(&pdev->dev, "pcc-channel", &tmp)) msc->iface = MPAM_IFACE_MMIO; else @@ -1313,8 +1405,177 @@ static void mpam_enable_merge_features(struct list_head *all_classes_list) } } +static char *mpam_errcode_names[16] = { + [MPAM_ERRCODE_NONE] = "No error", + [MPAM_ERRCODE_PARTID_SEL_RANGE] = "PARTID_SEL_Range", + [MPAM_ERRCODE_REQ_PARTID_RANGE] = "Req_PARTID_Range", + [MPAM_ERRCODE_MSMONCFG_ID_RANGE] = "MSMONCFG_ID_RANGE", + [MPAM_ERRCODE_REQ_PMG_RANGE] = "Req_PMG_Range", + [MPAM_ERRCODE_MONITOR_RANGE] = "Monitor_Range", + [MPAM_ERRCODE_INTPARTID_RANGE] = "intPARTID_Range", + [MPAM_ERRCODE_UNEXPECTED_INTERNAL] = "Unexpected_INTERNAL", + [MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL] = "Undefined_RIS_PART_SEL", + [MPAM_ERRCODE_RIS_NO_CONTROL] = "RIS_No_Control", + [MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL] = "Undefined_RIS_MON_SEL", + [MPAM_ERRCODE_RIS_NO_MONITOR] = "RIS_No_Monitor", + [12 ... 15] = "Reserved" +}; + +static int mpam_enable_msc_ecr(void *_msc) +{ + struct mpam_msc *msc = _msc; + + __mpam_write_reg(msc, MPAMF_ECR, MPAMF_ECR_INTEN); + + return 0; +} + +/* This can run in mpam_disable(), and the interrupt handler on the same CPU */ +static int mpam_disable_msc_ecr(void *_msc) +{ + struct mpam_msc *msc = _msc; + + __mpam_write_reg(msc, MPAMF_ECR, 0); + + return 0; +} + +static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) +{ + u64 reg; + u16 partid; + u8 errcode, pmg, ris; + + if (WARN_ON_ONCE(!msc) || + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), + &msc->accessibility))) + return IRQ_NONE; + + reg = mpam_msc_read_esr(msc); + + errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg); + if (!errcode) + return IRQ_NONE; + + /* Clear level triggered irq */ + mpam_msc_clear_esr(msc); + + partid = FIELD_GET(MPAMF_ESR_PARTID_MON, reg); + pmg = FIELD_GET(MPAMF_ESR_PMG, reg); + ris = FIELD_GET(MPAMF_ESR_RIS, reg); + + pr_err_ratelimited("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n", + msc->id, mpam_errcode_names[errcode], partid, pmg, + ris); + + /* Disable this interrupt. */ + mpam_disable_msc_ecr(msc); + + /* + * Schedule the teardown work. Don't use a threaded IRQ as we can't + * unregister the interrupt from the threaded part of the handler. + */ + mpam_disable_reason = "hardware error interrupt"; + schedule_work(&mpam_broken_work); + + return IRQ_HANDLED; +} + +static irqreturn_t mpam_ppi_handler(int irq, void *dev_id) +{ + struct mpam_msc *msc = *(struct mpam_msc **)dev_id; + + return __mpam_irq_handler(irq, msc); +} + +static irqreturn_t mpam_spi_handler(int irq, void *dev_id) +{ + struct mpam_msc *msc = dev_id; + + return __mpam_irq_handler(irq, msc); +} + +static int mpam_register_irqs(void) +{ + int err, irq; + struct mpam_msc *msc; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + continue; + + /* The MPAM spec says the interrupt can be SPI, PPI or LPI */ + /* We anticipate sharing the interrupt with other MSCs */ + if (irq_is_percpu(irq)) { + err = request_percpu_irq(irq, &mpam_ppi_handler, + "mpam:msc:error", + msc->error_dev_id); + if (err) + return err; + + msc->reenable_error_ppi = irq; + smp_call_function_many(&msc->accessibility, + &_enable_percpu_irq, &irq, + true); + } else { + err = devm_request_irq(&msc->pdev->dev, irq, + &mpam_spi_handler, IRQF_SHARED, + "mpam:msc:error", msc); + if (err) + return err; + } + + mutex_lock(&msc->error_irq_lock); + msc->error_irq_req = true; + mpam_touch_msc(msc, mpam_enable_msc_ecr, msc); + msc->error_irq_hw_enabled = true; + mutex_unlock(&msc->error_irq_lock); + } + + return 0; +} + +static void mpam_unregister_irqs(void) +{ + int irq; + struct mpam_msc *msc; + + guard(cpus_read_lock)(); + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + continue; + + mutex_lock(&msc->error_irq_lock); + if (msc->error_irq_hw_enabled) { + mpam_touch_msc(msc, mpam_disable_msc_ecr, msc); + msc->error_irq_hw_enabled = false; + } + + if (msc->error_irq_req) { + if (irq_is_percpu(irq)) { + msc->reenable_error_ppi = 0; + free_percpu_irq(irq, msc->error_dev_id); + } else { + devm_free_irq(&msc->pdev->dev, irq, msc); + } + msc->error_irq_req = false; + } + mutex_unlock(&msc->error_irq_lock); + } +} + static void mpam_enable_once(void) { + int err; + /* * Once the cpuhp callbacks have been changed, mpam_partid_max can no * longer change. @@ -1323,9 +1584,26 @@ static void mpam_enable_once(void) partid_max_published = true; spin_unlock(&partid_max_lock); + /* + * If all the MSC have been probed, enabling the IRQs happens next. + * That involves cross-calling to a CPU that can reach the MSC, and + * the locks must be taken in this order: + */ + cpus_read_lock(); mutex_lock(&mpam_list_lock); mpam_enable_merge_features(&mpam_classes); + + err = mpam_register_irqs(); + mutex_unlock(&mpam_list_lock); + cpus_read_unlock(); + + if (err) { + pr_warn("Failed to register irqs: %d\n", err); + mpam_disable_reason = "Failed to enable."; + schedule_work(&mpam_broken_work); + return; + } mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); @@ -1393,6 +1671,8 @@ void mpam_disable(struct work_struct *ignored) } mutex_unlock(&mpam_cpuhp_state_lock); + mpam_unregister_irqs(); + idx = srcu_read_lock(&mpam_srcu); list_for_each_entry_srcu(class, &mpam_classes, classes_list, srcu_read_lock_held(&mpam_srcu)) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index dec485cd8a91..fa9d9a176a54 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -46,6 +46,11 @@ struct mpam_msc { enum mpam_msc_iface iface; u32 nrdy_usec; cpumask_t accessibility; + bool has_extd_esr; + + int reenable_error_ppi; + struct mpam_msc * __percpu *error_dev_id; + atomic_t online_refs; /* @@ -59,6 +64,14 @@ struct mpam_msc { unsigned long ris_idxs; u32 ris_max; + /* + * error_irq_lock is taken when registering/unregistering the error + * interrupt and maniupulating the below flags. + */ + struct mutex error_irq_lock; + bool error_irq_req; + bool error_irq_hw_enabled; + /* mpam_msc_ris of this component */ struct list_head ris; From 3796f75aa7958d26b93a2508de5fc1e0b2f8a853 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:52 +0000 Subject: [PATCH 66/94] arm_mpam: Use a static key to indicate when mpam is enabled Once all the MSC have been probed, the system wide usable number of PARTID is known and the configuration arrays can be allocated. After this point, checking all the MSC have been probed is pointless, and the cpuhp callbacks should restore the configuration, instead of just resetting the MSC. Add a static key to enable this behaviour. This will also allow MPAM to be disabled in response to an error, and the architecture code to enable/disable the context switch of the MPAM system registers. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 12 ++++++++++++ drivers/resctrl/mpam_internal.h | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 21fccc3ff002..c126a95490f1 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -29,6 +29,8 @@ #include "mpam_internal.h" +DEFINE_STATIC_KEY_FALSE(mpam_enabled); /* This moves to arch code */ + /* * mpam_list_lock protects the SRCU lists when writing. Once the * mpam_enabled key is enabled these lists are read-only, @@ -936,6 +938,9 @@ static int mpam_discovery_cpu_online(unsigned int cpu) struct mpam_msc *msc; bool new_device_probed = false; + if (mpam_is_enabled()) + return 0; + guard(srcu)(&mpam_srcu); list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, srcu_read_lock_held(&mpam_srcu)) { @@ -1471,6 +1476,10 @@ static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) /* Disable this interrupt. */ mpam_disable_msc_ecr(msc); + /* Are we racing with the thread disabling MPAM? */ + if (!mpam_is_enabled()) + return IRQ_HANDLED; + /* * Schedule the teardown work. Don't use a threaded IRQ as we can't * unregister the interrupt from the threaded part of the handler. @@ -1605,6 +1614,7 @@ static void mpam_enable_once(void) return; } + static_branch_enable(&mpam_enabled); mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); @@ -1671,6 +1681,8 @@ void mpam_disable(struct work_struct *ignored) } mutex_unlock(&mpam_cpuhp_state_lock); + static_branch_disable(&mpam_enabled); + mpam_unregister_irqs(); idx = srcu_read_lock(&mpam_srcu); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index fa9d9a176a54..93a629f6e15a 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,13 @@ struct platform_device; +DECLARE_STATIC_KEY_FALSE(mpam_enabled); + +static inline bool mpam_is_enabled(void) +{ + return static_branch_likely(&mpam_enabled); +} + /* * Structures protected by SRCU may not be freed for a surprising amount of * time (especially if perf is running). To ensure the MPAM error interrupt can From 09b89d2a72f37b078198cbb09d5b9e13ba9d68b9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:53 +0000 Subject: [PATCH 67/94] arm_mpam: Allow configuration to be applied and restored during cpu online When CPUs come online the MSC's original configuration should be restored. Add struct mpam_config to hold the configuration. For each component, this has a bitmap of features that have been changed from the reset values. The mpam_config is also used on RIS reset where all bits are set to ensure all features are reset. Once the maximum partid is known, allocate a configuration array for each component, and reprogram each RIS configuration from this. CC: Dave Martin Signed-off-by: James Morse Cc: Fujitsu Fujitsu Cc: Peter Newman peternewman@google.com Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 290 +++++++++++++++++++++++++++++--- drivers/resctrl/mpam_internal.h | 27 +++ 2 files changed, 291 insertions(+), 26 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index c126a95490f1..6dbd378acdcf 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -145,6 +145,16 @@ static void mpam_free_garbage(void) } } +/* + * Once mpam is enabled, new requestors cannot further reduce the available + * partid. Assert that the size is fixed, and new requestors will be turned + * away. + */ +static void mpam_assert_partid_sizes_fixed(void) +{ + WARN_ON_ONCE(!partid_max_published); +} + static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) { WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); @@ -338,12 +348,16 @@ mpam_component_alloc(struct mpam_class *class, int id) return comp; } +static void __destroy_component_cfg(struct mpam_component *comp); + static void mpam_component_destroy(struct mpam_component *comp) { struct mpam_class *class = comp->class; lockdep_assert_held(&mpam_list_lock); + __destroy_component_cfg(comp); + list_del_rcu(&comp->class_list); add_to_garbage(comp); @@ -819,31 +833,57 @@ static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) __mpam_write_reg(msc, reg, bm); } -static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) +/* Called via IPI. Call while holding an SRCU reference */ +static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, + struct mpam_config *cfg) { struct mpam_msc *msc = ris->vmsc->msc; struct mpam_props *rprops = &ris->props; - WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); - mutex_lock(&msc->part_sel_lock); __mpam_part_sel(ris->ris_idx, partid, msc); - if (mpam_has_feature(mpam_feat_cpor_part, rprops)) - mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); + if (mpam_has_feature(mpam_feat_cpor_part, rprops) && + mpam_has_feature(mpam_feat_cpor_part, cfg)) { + if (cfg->reset_cpbm) + mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); + else + mpam_write_partsel_reg(msc, CPBM, cfg->cpbm); + } - if (mpam_has_feature(mpam_feat_mbw_part, rprops)) - mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); + if (mpam_has_feature(mpam_feat_mbw_part, rprops) && + mpam_has_feature(mpam_feat_mbw_part, cfg)) { + if (cfg->reset_mbw_pbm) + mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); + else + mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm); + } - if (mpam_has_feature(mpam_feat_mbw_min, rprops)) + if (mpam_has_feature(mpam_feat_mbw_min, rprops) && + mpam_has_feature(mpam_feat_mbw_min, cfg)) mpam_write_partsel_reg(msc, MBW_MIN, 0); - if (mpam_has_feature(mpam_feat_mbw_max, rprops)) - mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); + if (mpam_has_feature(mpam_feat_mbw_max, rprops) && + mpam_has_feature(mpam_feat_mbw_max, cfg)) { + if (cfg->reset_mbw_max) + mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); + else + mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max); + } mutex_unlock(&msc->part_sel_lock); } +static void mpam_init_reset_cfg(struct mpam_config *reset_cfg) +{ + *reset_cfg = (struct mpam_config) { + .reset_cpbm = true, + .reset_mbw_pbm = true, + .reset_mbw_max = true, + }; + bitmap_fill(reset_cfg->features, MPAM_FEATURE_LAST); +} + /* * Called via smp_call_on_cpu() to prevent migration, while still being * pre-emptible. Caller must hold mpam_srcu. @@ -851,16 +891,19 @@ static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) static int mpam_reset_ris(void *arg) { u16 partid, partid_max; + struct mpam_config reset_cfg; struct mpam_msc_ris *ris = arg; if (ris->in_reset_state) return 0; + mpam_init_reset_cfg(&reset_cfg); + spin_lock(&partid_max_lock); partid_max = mpam_partid_max; spin_unlock(&partid_max_lock); for (partid = 0; partid <= partid_max; partid++) - mpam_reset_ris_partid(ris, partid); + mpam_reprogram_ris_partid(ris, partid, &reset_cfg); return 0; } @@ -889,19 +932,58 @@ static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg) return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true); } -static void mpam_reset_msc(struct mpam_msc *msc, bool online) -{ +struct mpam_write_config_arg { struct mpam_msc_ris *ris; + struct mpam_component *comp; + u16 partid; +}; - list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) { - mpam_touch_msc(msc, &mpam_reset_ris, ris); +static int __write_config(void *arg) +{ + struct mpam_write_config_arg *c = arg; - /* - * Set in_reset_state when coming online. The reset state - * for non-zero partid may be lost while the CPUs are offline. - */ - ris->in_reset_state = online; + mpam_reprogram_ris_partid(c->ris, c->partid, &c->comp->cfg[c->partid]); + + return 0; +} + +static void mpam_reprogram_msc(struct mpam_msc *msc) +{ + u16 partid; + bool reset; + struct mpam_config *cfg; + struct mpam_msc_ris *ris; + struct mpam_write_config_arg arg; + + /* + * No lock for mpam_partid_max as partid_max_published has been + * set by mpam_enabled(), so the values can no longer change. + */ + mpam_assert_partid_sizes_fixed(); + + mutex_lock(&msc->cfg_lock); + list_for_each_entry_srcu(ris, &msc->ris, msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_is_enabled() && !ris->in_reset_state) { + mpam_touch_msc(msc, &mpam_reset_ris, ris); + ris->in_reset_state = true; + continue; + } + + arg.comp = ris->vmsc->comp; + arg.ris = ris; + reset = true; + for (partid = 0; partid <= mpam_partid_max; partid++) { + cfg = &ris->vmsc->comp->cfg[partid]; + if (!bitmap_empty(cfg->features, MPAM_FEATURE_LAST)) + reset = false; + + arg.partid = partid; + mpam_touch_msc(msc, __write_config, &arg); + } + ris->in_reset_state = reset; } + mutex_unlock(&msc->cfg_lock); } static void _enable_percpu_irq(void *_irq) @@ -925,7 +1007,7 @@ static int mpam_cpu_online(unsigned int cpu) _enable_percpu_irq(&msc->reenable_error_ppi); if (atomic_fetch_inc(&msc->online_refs) == 0) - mpam_reset_msc(msc, true); + mpam_reprogram_msc(msc); } return 0; @@ -980,8 +1062,22 @@ static int mpam_cpu_offline(unsigned int cpu) if (msc->reenable_error_ppi) disable_percpu_irq(msc->reenable_error_ppi); - if (atomic_dec_and_test(&msc->online_refs)) - mpam_reset_msc(msc, false); + if (atomic_dec_and_test(&msc->online_refs)) { + struct mpam_msc_ris *ris; + + mutex_lock(&msc->cfg_lock); + list_for_each_entry_srcu(ris, &msc->ris, msc_list, + srcu_read_lock_held(&mpam_srcu)) { + mpam_touch_msc(msc, &mpam_reset_ris, ris); + + /* + * The reset state for non-zero partid may be + * lost while the CPUs are offline. + */ + ris->in_reset_state = false; + } + mutex_unlock(&msc->cfg_lock); + } } return 0; @@ -1121,6 +1217,11 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) err = devm_mutex_init(dev, &msc->error_irq_lock); if (err) return ERR_PTR(err); + + err = devm_mutex_init(dev, &msc->cfg_lock); + if (err) + return ERR_PTR(err); + mpam_mon_sel_lock_init(msc); msc->id = pdev->id; msc->pdev = pdev; @@ -1581,6 +1682,72 @@ static void mpam_unregister_irqs(void) } } +static void __destroy_component_cfg(struct mpam_component *comp) +{ + add_to_garbage(comp->cfg); +} + +static void mpam_reset_component_cfg(struct mpam_component *comp) +{ + int i; + struct mpam_props *cprops = &comp->class->props; + + mpam_assert_partid_sizes_fixed(); + + if (!comp->cfg) + return; + + for (i = 0; i <= mpam_partid_max; i++) { + comp->cfg[i] = (struct mpam_config) {}; + if (cprops->cpbm_wd) + comp->cfg[i].cpbm = GENMASK(cprops->cpbm_wd - 1, 0); + if (cprops->mbw_pbm_bits) + comp->cfg[i].mbw_pbm = GENMASK(cprops->mbw_pbm_bits - 1, 0); + if (cprops->bwa_wd) + comp->cfg[i].mbw_max = GENMASK(15, 16 - cprops->bwa_wd); + } +} + +static int __allocate_component_cfg(struct mpam_component *comp) +{ + mpam_assert_partid_sizes_fixed(); + + if (comp->cfg) + return 0; + + comp->cfg = kcalloc(mpam_partid_max + 1, sizeof(*comp->cfg), GFP_KERNEL); + if (!comp->cfg) + return -ENOMEM; + + /* + * The array is free()d in one go, so only cfg[0]'s structure needs + * to be initialised. + */ + init_garbage(&comp->cfg[0].garbage); + + mpam_reset_component_cfg(comp); + + return 0; +} + +static int mpam_allocate_config(void) +{ + struct mpam_class *class; + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, &mpam_classes, classes_list) { + list_for_each_entry(comp, &class->components, class_list) { + int err = __allocate_component_cfg(comp); + if (err) + return err; + } + } + + return 0; +} + static void mpam_enable_once(void) { int err; @@ -1600,15 +1767,25 @@ static void mpam_enable_once(void) */ cpus_read_lock(); mutex_lock(&mpam_list_lock); - mpam_enable_merge_features(&mpam_classes); + do { + mpam_enable_merge_features(&mpam_classes); - err = mpam_register_irqs(); + err = mpam_register_irqs(); + if (err) { + pr_warn("Failed to register irqs: %d\n", err); + break; + } + err = mpam_allocate_config(); + if (err) { + pr_err("Failed to allocate configuration arrays.\n"); + break; + } + } while (0); mutex_unlock(&mpam_list_lock); cpus_read_unlock(); if (err) { - pr_warn("Failed to register irqs: %d\n", err); mpam_disable_reason = "Failed to enable."; schedule_work(&mpam_broken_work); return; @@ -1628,6 +1805,9 @@ static void mpam_reset_component_locked(struct mpam_component *comp) struct mpam_vmsc *vmsc; lockdep_assert_cpus_held(); + mpam_assert_partid_sizes_fixed(); + + mpam_reset_component_cfg(comp); guard(srcu)(&mpam_srcu); list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, @@ -1728,6 +1908,64 @@ void mpam_enable(struct work_struct *work) mpam_enable_once(); } +#define maybe_update_config(cfg, feature, newcfg, member, changes) do { \ + if (mpam_has_feature(feature, newcfg) && \ + (newcfg)->member != (cfg)->member) { \ + (cfg)->member = (newcfg)->member; \ + mpam_set_feature(feature, cfg); \ + \ + (changes) = true; \ + } \ +} while (0) + +static bool mpam_update_config(struct mpam_config *cfg, + const struct mpam_config *newcfg) +{ + bool has_changes = false; + + maybe_update_config(cfg, mpam_feat_cpor_part, newcfg, cpbm, has_changes); + maybe_update_config(cfg, mpam_feat_mbw_part, newcfg, mbw_pbm, has_changes); + maybe_update_config(cfg, mpam_feat_mbw_max, newcfg, mbw_max, has_changes); + + return has_changes; +} + +int mpam_apply_config(struct mpam_component *comp, u16 partid, + struct mpam_config *cfg) +{ + struct mpam_write_config_arg arg; + struct mpam_msc_ris *ris; + struct mpam_vmsc *vmsc; + struct mpam_msc *msc; + + lockdep_assert_cpus_held(); + + /* Don't pass in the current config! */ + WARN_ON_ONCE(&comp->cfg[partid] == cfg); + + if (!mpam_update_config(&comp->cfg[partid], cfg)) + return 0; + + arg.comp = comp; + arg.partid = partid; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + msc = vmsc->msc; + + mutex_lock(&msc->cfg_lock); + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + arg.ris = ris; + mpam_touch_msc(msc, __write_config, &arg); + } + mutex_unlock(&msc->cfg_lock); + } + + return 0; +} + static int __init mpam_msc_driver_init(void) { if (!system_supports_mpam()) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 93a629f6e15a..b8fdbd7ab7a5 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -91,6 +91,9 @@ struct mpam_msc { */ struct mutex part_sel_lock; + /* cfg_lock protects the msc configuration. */ + struct mutex cfg_lock; + /* * mon_sel_lock protects access to the MSC hardware registers that are * affected by MPAMCFG_MON_SEL, and the mbwu_state. @@ -182,6 +185,21 @@ struct mpam_class { struct mpam_garbage garbage; }; +struct mpam_config { + /* Which configuration values are valid. */ + DECLARE_BITMAP(features, MPAM_FEATURE_LAST); + + u32 cpbm; + u32 mbw_pbm; + u16 mbw_max; + + bool reset_cpbm; + bool reset_mbw_pbm; + bool reset_mbw_max; + + struct mpam_garbage garbage; +}; + struct mpam_component { u32 comp_id; @@ -190,6 +208,12 @@ struct mpam_component { cpumask_t affinity; + /* + * Array of configuration values, indexed by partid. + * Read from cpuhp callbacks, hold the cpuhp lock when writing. + */ + struct mpam_config *cfg; + /* member of mpam_class:components */ struct list_head class_list; @@ -249,6 +273,9 @@ extern u8 mpam_pmg_max; void mpam_enable(struct work_struct *work); void mpam_disable(struct work_struct *work); +int mpam_apply_config(struct mpam_component *comp, u16 partid, + struct mpam_config *cfg); + int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); From 880df85d8673f8e2395f139d3618661366e5d4d8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:54 +0000 Subject: [PATCH 68/94] arm_mpam: Probe and reset the rest of the features MPAM supports more features than are going to be exposed to resctrl. For partid other than 0, the reset values of these controls isn't known. Discover the rest of the features so they can be reset to avoid any side effects when resctrl is in use. PARTID narrowing allows MSC/RIS to support less configuration space than is usable. If this feature is found on a class of device we are likely to use, then reduce the partid_max to make it usable. This allows us to map a PARTID to itself. CC: Rohit Mathew CC: Zeng Heng CC: Dave Martin Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 188 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 18 +++ 2 files changed, 206 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 6dbd378acdcf..67eb0c79ca49 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -259,6 +259,15 @@ static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc) __mpam_part_sel_raw(partsel, msc); } +static void __mpam_intpart_sel(u8 ris_idx, u16 intpartid, struct mpam_msc *msc) +{ + u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) | + FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, intpartid) | + MPAMCFG_PART_SEL_INTERNAL; + + __mpam_part_sel_raw(partsel, msc); +} + int mpam_register_requestor(u16 partid_max, u8 pmg_max) { guard(spinlock)(&partid_max_lock); @@ -656,10 +665,34 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) struct mpam_msc *msc = ris->vmsc->msc; struct device *dev = &msc->pdev->dev; struct mpam_props *props = &ris->props; + struct mpam_class *class = ris->vmsc->comp->class; lockdep_assert_held(&msc->probe_lock); lockdep_assert_held(&msc->part_sel_lock); + /* Cache Capacity Partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_CCAP_PART, ris->idr)) { + u32 ccap_features = mpam_read_partsel_reg(msc, CCAP_IDR); + + props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ccap_features); + if (props->cmax_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM, ccap_features)) + mpam_set_feature(mpam_feat_cmax_softlim, props); + + if (props->cmax_wd && + !FIELD_GET(MPAMF_CCAP_IDR_NO_CMAX, ccap_features)) + mpam_set_feature(mpam_feat_cmax_cmax, props); + + if (props->cmax_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CMIN, ccap_features)) + mpam_set_feature(mpam_feat_cmax_cmin, props); + + props->cassoc_wd = FIELD_GET(MPAMF_CCAP_IDR_CASSOC_WD, ccap_features); + if (props->cassoc_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CASSOC, ccap_features)) + mpam_set_feature(mpam_feat_cmax_cassoc, props); + } + /* Cache Portion partitioning */ if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) { u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR); @@ -682,6 +715,31 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features); if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) mpam_set_feature(mpam_feat_mbw_max, props); + + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MIN, mbw_features)) + mpam_set_feature(mpam_feat_mbw_min, props); + + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_PROP, mbw_features)) + mpam_set_feature(mpam_feat_mbw_prop, props); + } + + /* Priority partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_PRI_PART, ris->idr)) { + u32 pri_features = mpam_read_partsel_reg(msc, PRI_IDR); + + props->intpri_wd = FIELD_GET(MPAMF_PRI_IDR_INTPRI_WD, pri_features); + if (props->intpri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_INTPRI, pri_features)) { + mpam_set_feature(mpam_feat_intpri_part, props); + if (FIELD_GET(MPAMF_PRI_IDR_INTPRI_0_IS_LOW, pri_features)) + mpam_set_feature(mpam_feat_intpri_part_0_low, props); + } + + props->dspri_wd = FIELD_GET(MPAMF_PRI_IDR_DSPRI_WD, pri_features); + if (props->dspri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_DSPRI, pri_features)) { + mpam_set_feature(mpam_feat_dspri_part, props); + if (FIELD_GET(MPAMF_PRI_IDR_DSPRI_0_IS_LOW, pri_features)) + mpam_set_feature(mpam_feat_dspri_part_0_low, props); + } } /* Performance Monitoring */ @@ -706,6 +764,9 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) mpam_set_feature(mpam_feat_msmon_csu, props); + if (FIELD_GET(MPAMF_CSUMON_IDR_HAS_XCL, csumonidr)) + mpam_set_feature(mpam_feat_msmon_csu_xcl, props); + /* Is NRDY hardware managed? */ hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU); if (hw_managed) @@ -727,6 +788,9 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) if (props->num_mbwu_mon) mpam_set_feature(mpam_feat_msmon_mbwu, props); + if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr)) + mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); + /* Is NRDY hardware managed? */ hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); if (hw_managed) @@ -738,6 +802,21 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) */ } } + + /* + * RIS with PARTID narrowing don't have enough storage for one + * configuration per PARTID. If these are in a class we could use, + * reduce the supported partid_max to match the number of intpartid. + * If the class is unknown, just ignore it. + */ + if (FIELD_GET(MPAMF_IDR_HAS_PARTID_NRW, ris->idr) && + class->type != MPAM_CLASS_UNKNOWN) { + u32 nrwidr = mpam_read_partsel_reg(msc, PARTID_NRW_IDR); + u16 partid_max = FIELD_GET(MPAMF_PARTID_NRW_IDR_INTPARTID_MAX, nrwidr); + + mpam_set_feature(mpam_feat_partid_nrw, props); + msc->partid_max = min(msc->partid_max, partid_max); + } } static int mpam_msc_hw_probe(struct mpam_msc *msc) @@ -837,12 +916,28 @@ static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, struct mpam_config *cfg) { + u32 pri_val = 0; + u16 cmax = MPAMCFG_CMAX_CMAX; struct mpam_msc *msc = ris->vmsc->msc; struct mpam_props *rprops = &ris->props; + u16 dspri = GENMASK(rprops->dspri_wd, 0); + u16 intpri = GENMASK(rprops->intpri_wd, 0); mutex_lock(&msc->part_sel_lock); __mpam_part_sel(ris->ris_idx, partid, msc); + if (mpam_has_feature(mpam_feat_partid_nrw, rprops)) { + /* Update the intpartid mapping */ + mpam_write_partsel_reg(msc, INTPARTID, + MPAMCFG_INTPARTID_INTERNAL | partid); + + /* + * Then switch to the 'internal' partid to update the + * configuration. + */ + __mpam_intpart_sel(ris->ris_idx, partid, msc); + } + if (mpam_has_feature(mpam_feat_cpor_part, rprops) && mpam_has_feature(mpam_feat_cpor_part, cfg)) { if (cfg->reset_cpbm) @@ -871,6 +966,35 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max); } + if (mpam_has_feature(mpam_feat_mbw_prop, rprops) && + mpam_has_feature(mpam_feat_mbw_prop, cfg)) + mpam_write_partsel_reg(msc, MBW_PROP, 0); + + if (mpam_has_feature(mpam_feat_cmax_cmax, rprops)) + mpam_write_partsel_reg(msc, CMAX, cmax); + + if (mpam_has_feature(mpam_feat_cmax_cmin, rprops)) + mpam_write_partsel_reg(msc, CMIN, 0); + + if (mpam_has_feature(mpam_feat_cmax_cassoc, rprops)) + mpam_write_partsel_reg(msc, CASSOC, MPAMCFG_CASSOC_CASSOC); + + if (mpam_has_feature(mpam_feat_intpri_part, rprops) || + mpam_has_feature(mpam_feat_dspri_part, rprops)) { + /* aces high? */ + if (!mpam_has_feature(mpam_feat_intpri_part_0_low, rprops)) + intpri = 0; + if (!mpam_has_feature(mpam_feat_dspri_part_0_low, rprops)) + dspri = 0; + + if (mpam_has_feature(mpam_feat_intpri_part, rprops)) + pri_val |= FIELD_PREP(MPAMCFG_PRI_INTPRI, intpri); + if (mpam_has_feature(mpam_feat_dspri_part, rprops)) + pri_val |= FIELD_PREP(MPAMCFG_PRI_DSPRI, dspri); + + mpam_write_partsel_reg(msc, PRI, pri_val); + } + mutex_unlock(&msc->part_sel_lock); } @@ -1308,6 +1432,18 @@ static bool mpam_has_bwa_wd_feature(struct mpam_props *props) return true; if (mpam_has_feature(mpam_feat_mbw_max, props)) return true; + if (mpam_has_feature(mpam_feat_mbw_prop, props)) + return true; + return false; +} + +/* Any of these features mean the CMAX_WD field is valid. */ +static bool mpam_has_cmax_wd_feature(struct mpam_props *props) +{ + if (mpam_has_feature(mpam_feat_cmax_cmax, props)) + return true; + if (mpam_has_feature(mpam_feat_cmax_cmin, props)) + return true; return false; } @@ -1366,6 +1502,23 @@ static void __props_mismatch(struct mpam_props *parent, parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd); } + if (alias && !mpam_has_cmax_wd_feature(parent) && mpam_has_cmax_wd_feature(child)) { + parent->cmax_wd = child->cmax_wd; + } else if (MISMATCHED_HELPER(parent, child, mpam_has_cmax_wd_feature, + cmax_wd, alias)) { + pr_debug("%s took the min cmax_wd\n", __func__); + parent->cmax_wd = min(parent->cmax_wd, child->cmax_wd); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_cmax_cassoc, alias)) { + parent->cassoc_wd = child->cassoc_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_cmax_cassoc, + cassoc_wd, alias)) { + pr_debug("%s cleared cassoc_wd\n", __func__); + mpam_clear_feature(mpam_feat_cmax_cassoc, parent); + parent->cassoc_wd = 0; + } + /* For num properties, take the minimum */ if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) { parent->num_csu_mon = child->num_csu_mon; @@ -1385,6 +1538,41 @@ static void __props_mismatch(struct mpam_props *parent, child->num_mbwu_mon); } + if (CAN_MERGE_FEAT(parent, child, mpam_feat_intpri_part, alias)) { + parent->intpri_wd = child->intpri_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_intpri_part, + intpri_wd, alias)) { + pr_debug("%s took the min intpri_wd\n", __func__); + parent->intpri_wd = min(parent->intpri_wd, child->intpri_wd); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_dspri_part, alias)) { + parent->dspri_wd = child->dspri_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_dspri_part, + dspri_wd, alias)) { + pr_debug("%s took the min dspri_wd\n", __func__); + parent->dspri_wd = min(parent->dspri_wd, child->dspri_wd); + } + + /* TODO: alias support for these two */ + /* {int,ds}pri may not have differing 0-low behaviour */ + if (mpam_has_feature(mpam_feat_intpri_part, parent) && + (!mpam_has_feature(mpam_feat_intpri_part, child) || + mpam_has_feature(mpam_feat_intpri_part_0_low, parent) != + mpam_has_feature(mpam_feat_intpri_part_0_low, child))) { + pr_debug("%s cleared intpri_part\n", __func__); + mpam_clear_feature(mpam_feat_intpri_part, parent); + mpam_clear_feature(mpam_feat_intpri_part_0_low, parent); + } + if (mpam_has_feature(mpam_feat_dspri_part, parent) && + (!mpam_has_feature(mpam_feat_dspri_part, child) || + mpam_has_feature(mpam_feat_dspri_part_0_low, parent) != + mpam_has_feature(mpam_feat_dspri_part_0_low, child))) { + pr_debug("%s cleared dspri_part\n", __func__); + mpam_clear_feature(mpam_feat_dspri_part, parent); + mpam_clear_feature(mpam_feat_dspri_part_0_low, parent); + } + if (alias) { /* Merge features for aliased resources */ bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index b8fdbd7ab7a5..618e5355a95e 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -143,14 +143,28 @@ static inline void mpam_mon_sel_lock_init(struct mpam_msc *msc) /* Bits for mpam features bitmaps */ enum mpam_device_features { mpam_feat_cpor_part, + mpam_feat_cmax_softlim, + mpam_feat_cmax_cmax, + mpam_feat_cmax_cmin, + mpam_feat_cmax_cassoc, mpam_feat_mbw_part, mpam_feat_mbw_min, mpam_feat_mbw_max, + mpam_feat_mbw_prop, + mpam_feat_intpri_part, + mpam_feat_intpri_part_0_low, + mpam_feat_dspri_part, + mpam_feat_dspri_part_0_low, mpam_feat_msmon, mpam_feat_msmon_csu, + mpam_feat_msmon_csu_capture, + mpam_feat_msmon_csu_xcl, mpam_feat_msmon_csu_hw_nrdy, mpam_feat_msmon_mbwu, + mpam_feat_msmon_mbwu_capture, + mpam_feat_msmon_mbwu_rwbw, mpam_feat_msmon_mbwu_hw_nrdy, + mpam_feat_partid_nrw, MPAM_FEATURE_LAST }; @@ -160,6 +174,10 @@ struct mpam_props { u16 cpbm_wd; u16 mbw_pbm_bits; u16 bwa_wd; + u16 cmax_wd; + u16 cassoc_wd; + u16 intpri_wd; + u16 dspri_wd; u16 num_csu_mon; u16 num_mbwu_mon; }; From c891bae66423bc69a680ca1de34940132e2c8ace Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:55 +0000 Subject: [PATCH 69/94] arm_mpam: Add helpers to allocate monitors MPAM's MSC support a number of monitors, each of which supports bandwidth counters, or cache-storage-utilisation counters. To use a counter, a monitor needs to be configured. Add helpers to allocate and free CSU or MBWU monitors. Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 2 ++ drivers/resctrl/mpam_internal.h | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 67eb0c79ca49..a7ba07ac5a2f 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -305,6 +305,8 @@ mpam_class_alloc(u8 level_idx, enum mpam_class_types type) class->level = level_idx; class->type = type; INIT_LIST_HEAD_RCU(&class->classes_list); + ida_init(&class->ida_csu_mon); + ida_init(&class->ida_mbwu_mon); list_add_rcu(&class->classes_list, &mpam_classes); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 618e5355a95e..8bbc67df6d97 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -200,6 +200,9 @@ struct mpam_class { /* member of mpam_classes */ struct list_head classes_list; + struct ida ida_csu_mon; + struct ida ida_mbwu_mon; + struct mpam_garbage garbage; }; @@ -279,6 +282,38 @@ struct mpam_msc_ris { struct mpam_garbage garbage; }; +static inline int mpam_alloc_csu_mon(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) + return -EOPNOTSUPP; + + return ida_alloc_max(&class->ida_csu_mon, cprops->num_csu_mon - 1, + GFP_KERNEL); +} + +static inline void mpam_free_csu_mon(struct mpam_class *class, int csu_mon) +{ + ida_free(&class->ida_csu_mon, csu_mon); +} + +static inline int mpam_alloc_mbwu_mon(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops)) + return -EOPNOTSUPP; + + return ida_alloc_max(&class->ida_mbwu_mon, cprops->num_mbwu_mon - 1, + GFP_KERNEL); +} + +static inline void mpam_free_mbwu_mon(struct mpam_class *class, int mbwu_mon) +{ + ida_free(&class->ida_mbwu_mon, mbwu_mon); +} + /* List of all classes - protected by srcu*/ extern struct srcu_struct mpam_srcu; extern struct list_head mpam_classes; From 823e7c3712c584641b4ef890a8af34884c677197 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:56 +0000 Subject: [PATCH 70/94] arm_mpam: Add mpam_msmon_read() to read monitor value Reading a monitor involves configuring what you want to monitor, and reading the value. Components made up of multiple MSC may need values from each MSC. MSCs may take time to configure, returning 'not ready'. The maximum 'not ready' time should have been provided by firmware. Add mpam_msmon_read() to hide all this. If (one of) the MSC returns not ready, then wait the full timeout value before trying again. CC: Shanker Donthineni Cc: Shaopeng Tan (Fujitsu) Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 235 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 19 +++ 2 files changed, 254 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index a7ba07ac5a2f..4859c8b096c3 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -886,6 +886,241 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) return 0; } +struct mon_read { + struct mpam_msc_ris *ris; + struct mon_cfg *ctx; + enum mpam_device_features type; + u64 *val; + int err; +}; + +static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, + u32 *flt_val) +{ + struct mon_cfg *ctx = m->ctx; + + /* + * For CSU counters its implementation-defined what happens when not + * filtering by partid. + */ + *ctl_val = MSMON_CFG_x_CTL_MATCH_PARTID; + + *flt_val = FIELD_PREP(MSMON_CFG_x_FLT_PARTID, ctx->partid); + + if (m->ctx->match_pmg) { + *ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG; + *flt_val |= FIELD_PREP(MSMON_CFG_x_FLT_PMG, ctx->pmg); + } + + switch (m->type) { + case mpam_feat_msmon_csu: + *ctl_val |= MSMON_CFG_CSU_CTL_TYPE_CSU; + + if (mpam_has_feature(mpam_feat_msmon_csu_xcl, &m->ris->props)) + *flt_val |= FIELD_PREP(MSMON_CFG_CSU_FLT_XCL, ctx->csu_exclude_clean); + + break; + case mpam_feat_msmon_mbwu: + *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU; + + if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props)) + *flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts); + + break; + default: + pr_warn("Unexpected monitor type %d\n", m->type); + } +} + +static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, + u32 *flt_val) +{ + struct mpam_msc *msc = m->ris->vmsc->msc; + + switch (m->type) { + case mpam_feat_msmon_csu: + *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL); + *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT); + break; + case mpam_feat_msmon_mbwu: + *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); + *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); + break; + default: + pr_warn("Unexpected monitor type %d\n", m->type); + } +} + +/* Remove values set by the hardware to prevent apparent mismatches. */ +static inline void clean_msmon_ctl_val(u32 *cur_ctl) +{ + *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS; +} + +static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, + u32 flt_val) +{ + struct mpam_msc *msc = m->ris->vmsc->msc; + + /* + * Write the ctl_val with the enable bit cleared, reset the counter, + * then enable counter. + */ + switch (m->type) { + case mpam_feat_msmon_csu: + mpam_write_monsel_reg(msc, CFG_CSU_FLT, flt_val); + mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val); + mpam_write_monsel_reg(msc, CSU, 0); + mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); + break; + case mpam_feat_msmon_mbwu: + mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val); + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val); + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); + /* Counting monitors require NRDY to be reset by software */ + mpam_write_monsel_reg(msc, MBWU, 0); + break; + default: + pr_warn("Unexpected monitor type %d\n", m->type); + } +} + +static void __ris_msmon_read(void *arg) +{ + u64 now; + bool nrdy = false; + bool config_mismatch; + struct mon_read *m = arg; + struct mon_cfg *ctx = m->ctx; + struct mpam_msc_ris *ris = m->ris; + struct mpam_props *rprops = &ris->props; + struct mpam_msc *msc = m->ris->vmsc->msc; + u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; + + if (!mpam_mon_sel_lock(msc)) { + m->err = -EIO; + return; + } + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, ctx->mon) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + + /* + * Read the existing configuration to avoid re-writing the same values. + * This saves waiting for 'nrdy' on subsequent reads. + */ + read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); + clean_msmon_ctl_val(&cur_ctl); + gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); + config_mismatch = cur_flt != flt_val || + cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN); + + if (config_mismatch) + write_msmon_ctl_flt_vals(m, ctl_val, flt_val); + + switch (m->type) { + case mpam_feat_msmon_csu: + now = mpam_read_monsel_reg(msc, CSU); + if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY; + break; + case mpam_feat_msmon_mbwu: + now = mpam_read_monsel_reg(msc, MBWU); + if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY; + break; + default: + m->err = -EINVAL; + } + mpam_mon_sel_unlock(msc); + + if (nrdy) { + m->err = -EBUSY; + return; + } + + now = FIELD_GET(MSMON___VALUE, now); + *m->val += now; +} + +static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) +{ + int err, any_err = 0; + struct mpam_vmsc *vmsc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_msc *msc = vmsc->msc; + struct mpam_msc_ris *ris; + + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + arg->ris = ris; + + err = smp_call_function_any(&msc->accessibility, + __ris_msmon_read, arg, + true); + if (!err && arg->err) + err = arg->err; + + /* + * Save one error to be returned to the caller, but + * keep reading counters so that get reprogrammed. On + * platforms with NRDY this lets us wait once. + */ + if (err) + any_err = err; + } + } + + return any_err; +} + +int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, + enum mpam_device_features type, u64 *val) +{ + int err; + struct mon_read arg; + u64 wait_jiffies = 0; + struct mpam_props *cprops = &comp->class->props; + + might_sleep(); + + if (!mpam_is_enabled()) + return -EIO; + + if (!mpam_has_feature(type, cprops)) + return -EOPNOTSUPP; + + arg = (struct mon_read) { + .ctx = ctx, + .type = type, + .val = val, + }; + *val = 0; + + err = _msmon_read(comp, &arg); + if (err == -EBUSY && comp->class->nrdy_usec) + wait_jiffies = usecs_to_jiffies(comp->class->nrdy_usec); + + while (wait_jiffies) + wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies); + + if (err == -EBUSY) { + arg = (struct mon_read) { + .ctx = ctx, + .type = type, + .val = val, + }; + *val = 0; + + err = _msmon_read(comp, &arg); + } + + return err; +} + static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) { u32 num_words, msb; diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 8bbc67df6d97..12f0a5b7f39e 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -186,6 +186,22 @@ struct mpam_props { #define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) #define mpam_clear_feature(_feat, x) clear_bit(_feat, (x)->features) +/* The values for MSMON_CFG_MBWU_FLT.RWBW */ +enum mon_filter_options { + COUNT_BOTH = 0, + COUNT_WRITE = 1, + COUNT_READ = 2, +}; + +struct mon_cfg { + u16 mon; + u8 pmg; + bool match_pmg; + bool csu_exclude_clean; + u32 partid; + enum mon_filter_options opts; +}; + struct mpam_class { /* mpam_components in this class */ struct list_head components; @@ -329,6 +345,9 @@ void mpam_disable(struct work_struct *work); int mpam_apply_config(struct mpam_component *comp, u16 partid, struct mpam_config *cfg); +int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, + enum mpam_device_features, u64 *val); + int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); From 41e8a14950e1732af51cfec8fa09f8ded02a5ca9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:57 +0000 Subject: [PATCH 71/94] arm_mpam: Track bandwidth counter state for power management Bandwidth counters need to run continuously to correctly reflect the bandwidth. Save the counter state when the hardware is reset due to CPU hotplug. Add struct mbwu_state to track the bandwidth counter. Support for tracking overflow with the same structure will be added in a subsequent commit. Cc: Zeng Heng Reviewed-by: Gavin Shan Reviewed-by: Zeng Heng Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 126 +++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 21 +++++- 2 files changed, 145 insertions(+), 2 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 4859c8b096c3..c8ea37558f69 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -993,6 +993,7 @@ static void __ris_msmon_read(void *arg) struct mon_read *m = arg; struct mon_cfg *ctx = m->ctx; struct mpam_msc_ris *ris = m->ris; + struct msmon_mbwu_state *mbwu_state; struct mpam_props *rprops = &ris->props; struct mpam_msc *msc = m->ris->vmsc->msc; u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; @@ -1023,11 +1024,21 @@ static void __ris_msmon_read(void *arg) now = mpam_read_monsel_reg(msc, CSU); if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops)) nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); break; case mpam_feat_msmon_mbwu: now = mpam_read_monsel_reg(msc, MBWU); if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); + + if (nrdy) + break; + + mbwu_state = &ris->mbwu_state[ctx->mon]; + + /* Include bandwidth consumed before the last hardware reset */ + now += mbwu_state->correction; break; default: m->err = -EINVAL; @@ -1039,7 +1050,6 @@ static void __ris_msmon_read(void *arg) return; } - now = FIELD_GET(MSMON___VALUE, now); *m->val += now; } @@ -1235,6 +1245,67 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, mutex_unlock(&msc->part_sel_lock); } +/* Call with msc cfg_lock held */ +static int mpam_restore_mbwu_state(void *_ris) +{ + int i; + struct mon_read mwbu_arg; + struct mpam_msc_ris *ris = _ris; + + for (i = 0; i < ris->props.num_mbwu_mon; i++) { + if (ris->mbwu_state[i].enabled) { + mwbu_arg.ris = ris; + mwbu_arg.ctx = &ris->mbwu_state[i].cfg; + mwbu_arg.type = mpam_feat_msmon_mbwu; + + __ris_msmon_read(&mwbu_arg); + } + } + + return 0; +} + +/* Call with MSC cfg_lock held */ +static int mpam_save_mbwu_state(void *arg) +{ + int i; + u64 val; + struct mon_cfg *cfg; + u32 cur_flt, cur_ctl, mon_sel; + struct mpam_msc_ris *ris = arg; + struct msmon_mbwu_state *mbwu_state; + struct mpam_msc *msc = ris->vmsc->msc; + + for (i = 0; i < ris->props.num_mbwu_mon; i++) { + mbwu_state = &ris->mbwu_state[i]; + cfg = &mbwu_state->cfg; + + if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + return -EIO; + + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + + cur_flt = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); + cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0); + + val = mpam_read_monsel_reg(msc, MBWU); + mpam_write_monsel_reg(msc, MBWU, 0); + + cfg->mon = i; + cfg->pmg = FIELD_GET(MSMON_CFG_x_FLT_PMG, cur_flt); + cfg->match_pmg = FIELD_GET(MSMON_CFG_x_CTL_MATCH_PMG, cur_ctl); + cfg->partid = FIELD_GET(MSMON_CFG_x_FLT_PARTID, cur_flt); + mbwu_state->correction += val; + mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl); + mpam_mon_sel_unlock(msc); + } + + return 0; +} + static void mpam_init_reset_cfg(struct mpam_config *reset_cfg) { *reset_cfg = (struct mpam_config) { @@ -1343,6 +1414,9 @@ static void mpam_reprogram_msc(struct mpam_msc *msc) mpam_touch_msc(msc, __write_config, &arg); } ris->in_reset_state = reset; + + if (mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) + mpam_touch_msc(msc, &mpam_restore_mbwu_state, ris); } mutex_unlock(&msc->cfg_lock); } @@ -1436,6 +1510,9 @@ static int mpam_cpu_offline(unsigned int cpu) * lost while the CPUs are offline. */ ris->in_reset_state = false; + + if (mpam_is_enabled()) + mpam_touch_msc(msc, &mpam_save_mbwu_state, ris); } mutex_unlock(&msc->cfg_lock); } @@ -2109,7 +2186,22 @@ static void mpam_unregister_irqs(void) static void __destroy_component_cfg(struct mpam_component *comp) { + struct mpam_msc *msc; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + + lockdep_assert_held(&mpam_list_lock); + add_to_garbage(comp->cfg); + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + msc = vmsc->msc; + + if (mpam_mon_sel_lock(msc)) { + list_for_each_entry(ris, &vmsc->ris, vmsc_list) + add_to_garbage(ris->mbwu_state); + mpam_mon_sel_unlock(msc); + } + } } static void mpam_reset_component_cfg(struct mpam_component *comp) @@ -2135,6 +2227,8 @@ static void mpam_reset_component_cfg(struct mpam_component *comp) static int __allocate_component_cfg(struct mpam_component *comp) { + struct mpam_vmsc *vmsc; + mpam_assert_partid_sizes_fixed(); if (comp->cfg) @@ -2152,6 +2246,36 @@ static int __allocate_component_cfg(struct mpam_component *comp) mpam_reset_component_cfg(comp); + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + struct mpam_msc *msc; + struct mpam_msc_ris *ris; + struct msmon_mbwu_state *mbwu_state; + + if (!vmsc->props.num_mbwu_mon) + continue; + + msc = vmsc->msc; + list_for_each_entry(ris, &vmsc->ris, vmsc_list) { + if (!ris->props.num_mbwu_mon) + continue; + + mbwu_state = kcalloc(ris->props.num_mbwu_mon, + sizeof(*ris->mbwu_state), + GFP_KERNEL); + if (!mbwu_state) { + __destroy_component_cfg(comp); + return -ENOMEM; + } + + init_garbage(&mbwu_state[0].garbage); + + if (mpam_mon_sel_lock(msc)) { + ris->mbwu_state = mbwu_state; + mpam_mon_sel_unlock(msc); + } + } + } + return 0; } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 12f0a5b7f39e..12ce80bc7ff7 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -91,7 +91,10 @@ struct mpam_msc { */ struct mutex part_sel_lock; - /* cfg_lock protects the msc configuration. */ + /* + * cfg_lock protects the msc configuration and guards against mbwu_state + * save and restore racing. + */ struct mutex cfg_lock; /* @@ -202,6 +205,19 @@ struct mon_cfg { enum mon_filter_options opts; }; +/* Changes to msmon_mbwu_state are protected by the msc's mon_sel_lock. */ +struct msmon_mbwu_state { + bool enabled; + struct mon_cfg cfg; + + /* + * The value to add to the new reading to account for power management. + */ + u64 correction; + + struct mpam_garbage garbage; +}; + struct mpam_class { /* mpam_components in this class */ struct list_head components; @@ -295,6 +311,9 @@ struct mpam_msc_ris { /* parent: */ struct mpam_vmsc *vmsc; + /* msmon mbwu configuration is preserved over reset */ + struct msmon_mbwu_state *mbwu_state; + struct mpam_garbage garbage; }; From b35363793291e36c91d4a5b62d7ae7079c70d826 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:22:58 +0000 Subject: [PATCH 72/94] arm_mpam: Consider overflow in bandwidth counter state Use the overflow status bit to track overflow on each bandwidth counter read and add the counter size to the correction when overflow is detected. This assumes that only a single overflow has occurred since the last read of the counter. Overflow interrupts, on hardware that supports them could be used to remove this limitation. Cc: Zeng Heng Reviewed-by: Gavin Shan Reviewed-by: Zeng Heng Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 24 ++++++++++++++++++++++-- drivers/resctrl/mpam_internal.h | 3 ++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index c8ea37558f69..ecb5ecad50f8 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -985,11 +985,18 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, } } +static u64 mpam_msmon_overflow_val(enum mpam_device_features type) +{ + /* TODO: scaling, and long counters */ + return BIT_ULL(hweight_long(MSMON___VALUE)); +} + static void __ris_msmon_read(void *arg) { u64 now; bool nrdy = false; bool config_mismatch; + bool overflow; struct mon_read *m = arg; struct mon_cfg *ctx = m->ctx; struct mpam_msc_ris *ris = m->ris; @@ -1011,13 +1018,20 @@ static void __ris_msmon_read(void *arg) * This saves waiting for 'nrdy' on subsequent reads. */ read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); + overflow = cur_ctl & MSMON_CFG_x_CTL_OFLOW_STATUS; + clean_msmon_ctl_val(&cur_ctl); gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); config_mismatch = cur_flt != flt_val || cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN); - if (config_mismatch) + if (config_mismatch) { write_msmon_ctl_flt_vals(m, ctl_val, flt_val); + overflow = false; + } else if (overflow) { + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, + cur_ctl & ~MSMON_CFG_x_CTL_OFLOW_STATUS); + } switch (m->type) { case mpam_feat_msmon_csu: @@ -1037,7 +1051,13 @@ static void __ris_msmon_read(void *arg) mbwu_state = &ris->mbwu_state[ctx->mon]; - /* Include bandwidth consumed before the last hardware reset */ + if (overflow) + mbwu_state->correction += mpam_msmon_overflow_val(m->type); + + /* + * Include bandwidth consumed before the last hardware reset and + * a counter size increment for each overflow. + */ now += mbwu_state->correction; break; default: diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 12ce80bc7ff7..218e2f48c7bf 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -211,7 +211,8 @@ struct msmon_mbwu_state { struct mon_cfg cfg; /* - * The value to add to the new reading to account for power management. + * The value to add to the new reading to account for power management, + * and overflow. */ u64 correction; From fdc29a141d6364645509cb20129cba1f84e4c10f Mon Sep 17 00:00:00 2001 From: Rohit Mathew Date: Wed, 19 Nov 2025 12:22:59 +0000 Subject: [PATCH 73/94] arm_mpam: Probe for long/lwd mbwu counters mpam v0.1 and versions above v1.0 support optional long counter for memory bandwidth monitoring. The MPAMF_MBWUMON_IDR register has fields indicating support for long counters. Probe these feature bits. The mpam_feat_msmon_mbwu feature is used to indicate that bandwidth monitors are supported, instead of muddling this with which size of bandwidth monitors, add an explicit 31 bit counter feature. Signed-off-by: Rohit Mathew [ morse: Added 31bit counter feature to simplify later logic ] Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 35 ++++++++++++++++++++++----------- drivers/resctrl/mpam_internal.h | 3 +++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index ecb5ecad50f8..380386cceb74 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -783,25 +783,36 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware."); } if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) { - bool hw_managed; + bool has_long, hw_managed; u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR); props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr); - if (props->num_mbwu_mon) + if (props->num_mbwu_mon) { mpam_set_feature(mpam_feat_msmon_mbwu, props); - if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr)) - mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); + if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr)) + mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); - /* Is NRDY hardware managed? */ - hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); - if (hw_managed) - mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); + has_long = FIELD_GET(MPAMF_MBWUMON_IDR_HAS_LONG, mbwumon_idr); + if (has_long) { + if (FIELD_GET(MPAMF_MBWUMON_IDR_LWD, mbwumon_idr)) + mpam_set_feature(mpam_feat_msmon_mbwu_63counter, props); + else + mpam_set_feature(mpam_feat_msmon_mbwu_44counter, props); + } else { + mpam_set_feature(mpam_feat_msmon_mbwu_31counter, props); + } - /* - * Don't warn about any missing firmware property for - * MBWU NRDY - it doesn't make any sense! - */ + /* Is NRDY hardware managed? */ + hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); + if (hw_managed) + mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); + + /* + * Don't warn about any missing firmware property for + * MBWU NRDY - it doesn't make any sense! + */ + } } } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 218e2f48c7bf..693a315c4710 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -164,6 +164,9 @@ enum mpam_device_features { mpam_feat_msmon_csu_xcl, mpam_feat_msmon_csu_hw_nrdy, mpam_feat_msmon_mbwu, + mpam_feat_msmon_mbwu_31counter, + mpam_feat_msmon_mbwu_44counter, + mpam_feat_msmon_mbwu_63counter, mpam_feat_msmon_mbwu_capture, mpam_feat_msmon_mbwu_rwbw, mpam_feat_msmon_mbwu_hw_nrdy, From 9e5afb7c32830bcd123976a7729ef4e2dff0cd77 Mon Sep 17 00:00:00 2001 From: Rohit Mathew Date: Wed, 19 Nov 2025 12:23:00 +0000 Subject: [PATCH 74/94] arm_mpam: Use long MBWU counters if supported Now that the larger counter sizes are probed, make use of them. Callers of mpam_msmon_read() may not know (or care!) about the different counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the driver pick the counter to use. Only 32bit accesses to the MSC are required to be supported by the spec, but these registers are 64bits. The lower half may overflow into the higher half between two 32bit reads. To avoid this, use a helper that reads the top half multiple times to check for overflow. Signed-off-by: Rohit Mathew [morse: merged multiple patches from Rohit, added explicit counter selection ] Signed-off-by: James Morse Cc: Peter Newman Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 145 ++++++++++++++++++++++++++++----- 1 file changed, 126 insertions(+), 19 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 380386cceb74..0fb08222b91d 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -905,6 +905,50 @@ struct mon_read { int err; }; +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris) +{ + return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) || + mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props)); +} + +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc) +{ + int retry = 3; + u32 mbwu_l_low; + u64 mbwu_l_high1, mbwu_l_high2; + + mpam_mon_sel_lock_held(msc); + + WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4); + do { + mbwu_l_high1 = mbwu_l_high2; + mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L); + mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4); + + retry--; + } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0); + + if (mbwu_l_high1 == mbwu_l_high2) + return (mbwu_l_high1 << 32) | mbwu_l_low; + + pr_warn("Failed to read a stable value\n"); + return MSMON___L_NRDY; +} + +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc) +{ + mpam_mon_sel_lock_held(msc); + + WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + __mpam_write_reg(msc, MSMON_MBWU_L, 0); + __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0); +} + static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, u32 *flt_val) { @@ -931,7 +975,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, *flt_val |= FIELD_PREP(MSMON_CFG_CSU_FLT_XCL, ctx->csu_exclude_clean); break; - case mpam_feat_msmon_mbwu: + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU; if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props)) @@ -953,7 +999,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL); *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT); break; - case mpam_feat_msmon_mbwu: + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); break; @@ -966,6 +1014,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, static inline void clean_msmon_ctl_val(u32 *cur_ctl) { *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS; + + if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU) + *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L; } static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, @@ -984,12 +1035,17 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, mpam_write_monsel_reg(msc, CSU, 0); mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); break; - case mpam_feat_msmon_mbwu: + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); /* Counting monitors require NRDY to be reset by software */ - mpam_write_monsel_reg(msc, MBWU, 0); + if (m->type == mpam_feat_msmon_mbwu_31counter) + mpam_write_monsel_reg(msc, MBWU, 0); + else + mpam_msc_zero_mbwu_l(m->ris->vmsc->msc); break; default: pr_warn("Unexpected monitor type %d\n", m->type); @@ -998,8 +1054,17 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, static u64 mpam_msmon_overflow_val(enum mpam_device_features type) { - /* TODO: scaling, and long counters */ - return BIT_ULL(hweight_long(MSMON___VALUE)); + /* TODO: implement scaling counters */ + switch (type) { + case mpam_feat_msmon_mbwu_63counter: + return BIT_ULL(hweight_long(MSMON___LWD_VALUE)); + case mpam_feat_msmon_mbwu_44counter: + return BIT_ULL(hweight_long(MSMON___L_VALUE)); + case mpam_feat_msmon_mbwu_31counter: + return BIT_ULL(hweight_long(MSMON___VALUE)); + default: + return 0; + } } static void __ris_msmon_read(void *arg) @@ -1029,7 +1094,12 @@ static void __ris_msmon_read(void *arg) * This saves waiting for 'nrdy' on subsequent reads. */ read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); - overflow = cur_ctl & MSMON_CFG_x_CTL_OFLOW_STATUS; + + if (mpam_feat_msmon_mbwu_31counter == m->type) + overflow = cur_ctl & MSMON_CFG_x_CTL_OFLOW_STATUS; + else if (mpam_feat_msmon_mbwu_44counter == m->type || + mpam_feat_msmon_mbwu_63counter == m->type) + overflow = cur_ctl & MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L; clean_msmon_ctl_val(&cur_ctl); gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); @@ -1041,7 +1111,9 @@ static void __ris_msmon_read(void *arg) overflow = false; } else if (overflow) { mpam_write_monsel_reg(msc, CFG_MBWU_CTL, - cur_ctl & ~MSMON_CFG_x_CTL_OFLOW_STATUS); + cur_ctl & + ~(MSMON_CFG_x_CTL_OFLOW_STATUS | + MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L)); } switch (m->type) { @@ -1051,11 +1123,24 @@ static void __ris_msmon_read(void *arg) nrdy = now & MSMON___NRDY; now = FIELD_GET(MSMON___VALUE, now); break; - case mpam_feat_msmon_mbwu: - now = mpam_read_monsel_reg(msc, MBWU); - if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) - nrdy = now & MSMON___NRDY; - now = FIELD_GET(MSMON___VALUE, now); + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: + if (m->type != mpam_feat_msmon_mbwu_31counter) { + now = mpam_msc_read_mbwu_l(msc); + if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) + nrdy = now & MSMON___L_NRDY; + + if (m->type == mpam_feat_msmon_mbwu_63counter) + now = FIELD_GET(MSMON___LWD_VALUE, now); + else + now = FIELD_GET(MSMON___L_VALUE, now); + } else { + now = mpam_read_monsel_reg(msc, MBWU); + if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); + } if (nrdy) break; @@ -1118,13 +1203,26 @@ static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) return any_err; } +static enum mpam_device_features mpam_msmon_choose_counter(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops)) + return mpam_feat_msmon_mbwu_63counter; + if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, cprops)) + return mpam_feat_msmon_mbwu_44counter; + + return mpam_feat_msmon_mbwu_31counter; +} + int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, enum mpam_device_features type, u64 *val) { int err; struct mon_read arg; u64 wait_jiffies = 0; - struct mpam_props *cprops = &comp->class->props; + struct mpam_class *class = comp->class; + struct mpam_props *cprops = &class->props; might_sleep(); @@ -1134,6 +1232,9 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, if (!mpam_has_feature(type, cprops)) return -EOPNOTSUPP; + if (type == mpam_feat_msmon_mbwu) + type = mpam_msmon_choose_counter(class); + arg = (struct mon_read) { .ctx = ctx, .type = type, @@ -1142,8 +1243,8 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, *val = 0; err = _msmon_read(comp, &arg); - if (err == -EBUSY && comp->class->nrdy_usec) - wait_jiffies = usecs_to_jiffies(comp->class->nrdy_usec); + if (err == -EBUSY && class->nrdy_usec) + wait_jiffies = usecs_to_jiffies(class->nrdy_usec); while (wait_jiffies) wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies); @@ -1282,12 +1383,13 @@ static int mpam_restore_mbwu_state(void *_ris) int i; struct mon_read mwbu_arg; struct mpam_msc_ris *ris = _ris; + struct mpam_class *class = ris->vmsc->comp->class; for (i = 0; i < ris->props.num_mbwu_mon; i++) { if (ris->mbwu_state[i].enabled) { mwbu_arg.ris = ris; mwbu_arg.ctx = &ris->mbwu_state[i].cfg; - mwbu_arg.type = mpam_feat_msmon_mbwu; + mwbu_arg.type = mpam_msmon_choose_counter(class); __ris_msmon_read(&mwbu_arg); } @@ -1322,8 +1424,13 @@ static int mpam_save_mbwu_state(void *arg) cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0); - val = mpam_read_monsel_reg(msc, MBWU); - mpam_write_monsel_reg(msc, MBWU, 0); + if (mpam_ris_has_mbwu_long_counter(ris)) { + val = mpam_msc_read_mbwu_l(msc); + mpam_msc_zero_mbwu_l(msc); + } else { + val = mpam_read_monsel_reg(msc, MBWU); + mpam_write_monsel_reg(msc, MBWU, 0); + } cfg->mon = i; cfg->pmg = FIELD_GET(MSMON_CFG_x_FLT_PMG, cur_flt); From 201d96ca4c867695880450930258cd5c97f099d4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:23:01 +0000 Subject: [PATCH 75/94] arm_mpam: Add helper to reset saved mbwu state resctrl expects to reset the bandwidth counters when the filesystem is mounted. To allow this, add a helper that clears the saved mbwu state. Instead of cross calling to each CPU that can access the component MSC to write to the counter, set a flag that causes it to be zero'd on the the next read. This is easily done by forcing a configuration update. Signed-off-by: James Morse Cc: Peter Newman Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 48 ++++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 2 ++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 0fb08222b91d..b4aa81799429 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -1075,6 +1075,7 @@ static void __ris_msmon_read(void *arg) bool overflow; struct mon_read *m = arg; struct mon_cfg *ctx = m->ctx; + bool reset_on_next_read = false; struct mpam_msc_ris *ris = m->ris; struct msmon_mbwu_state *mbwu_state; struct mpam_props *rprops = &ris->props; @@ -1089,6 +1090,20 @@ static void __ris_msmon_read(void *arg) FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + switch (m->type) { + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: + mbwu_state = &ris->mbwu_state[ctx->mon]; + if (mbwu_state) { + reset_on_next_read = mbwu_state->reset_on_next_read; + mbwu_state->reset_on_next_read = false; + } + break; + default: + break; + } + /* * Read the existing configuration to avoid re-writing the same values. * This saves waiting for 'nrdy' on subsequent reads. @@ -1106,7 +1121,7 @@ static void __ris_msmon_read(void *arg) config_mismatch = cur_flt != flt_val || cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN); - if (config_mismatch) { + if (config_mismatch || reset_on_next_read) { write_msmon_ctl_flt_vals(m, ctl_val, flt_val); overflow = false; } else if (overflow) { @@ -1263,6 +1278,37 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, return err; } +void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx) +{ + struct mpam_msc *msc; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + + if (!mpam_is_enabled()) + return; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_has_feature(mpam_feat_msmon_mbwu, &vmsc->props)) + continue; + + msc = vmsc->msc; + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) + continue; + + if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + continue; + + ris->mbwu_state[ctx->mon].correction = 0; + ris->mbwu_state[ctx->mon].reset_on_next_read = true; + mpam_mon_sel_unlock(msc); + } + } +} + static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) { u32 num_words, msb; diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 693a315c4710..18d53c07b3d7 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -211,6 +211,7 @@ struct mon_cfg { /* Changes to msmon_mbwu_state are protected by the msc's mon_sel_lock. */ struct msmon_mbwu_state { bool enabled; + bool reset_on_next_read; struct mon_cfg cfg; /* @@ -370,6 +371,7 @@ int mpam_apply_config(struct mpam_component *comp, u16 partid, int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, enum mpam_device_features, u64 *val); +void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx); int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); From e3565d1fd4dcf2c7ee6912094066e47c7500eaf2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:23:02 +0000 Subject: [PATCH 76/94] arm_mpam: Add kunit test for bitmap reset The bitmap reset code has been a source of bugs. Add a unit test. This currently has to be built in, as the rest of the driver is builtin. Suggested-by: Jonathan Cameron Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/Kconfig | 9 ++++ drivers/resctrl/mpam_devices.c | 4 ++ drivers/resctrl/test_mpam_devices.c | 69 +++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 drivers/resctrl/test_mpam_devices.c diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig index 5f7f748e611e..c808e0470394 100644 --- a/drivers/resctrl/Kconfig +++ b/drivers/resctrl/Kconfig @@ -12,4 +12,13 @@ config ARM64_MPAM_DRIVER_DEBUG help Say yes here to enable debug messages from the MPAM driver. +config MPAM_KUNIT_TEST + bool "KUnit tests for MPAM driver " if !KUNIT_ALL_TESTS + depends on KUNIT=y + default KUNIT_ALL_TESTS + help + Enable this option to run tests in the MPAM driver. + + If unsure, say N. + endif diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index b4aa81799429..0b5b158e1aaf 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -2717,3 +2717,7 @@ static int __init mpam_msc_driver_init(void) /* Must occur after arm64_mpam_register_cpus() from arch_initcall() */ subsys_initcall(mpam_msc_driver_init); + +#ifdef CONFIG_MPAM_KUNIT_TEST +#include "test_mpam_devices.c" +#endif diff --git a/drivers/resctrl/test_mpam_devices.c b/drivers/resctrl/test_mpam_devices.c new file mode 100644 index 000000000000..0cfb41b665c4 --- /dev/null +++ b/drivers/resctrl/test_mpam_devices.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. +/* This file is intended to be included into mpam_devices.c */ + +#include + +static void test_mpam_reset_msc_bitmap(struct kunit *test) +{ + char __iomem *buf = kunit_kzalloc(test, SZ_16K, GFP_KERNEL); + struct mpam_msc fake_msc = {}; + u32 *test_result; + + if (!buf) + return; + + fake_msc.mapped_hwpage = buf; + fake_msc.mapped_hwpage_sz = SZ_16K; + cpumask_copy(&fake_msc.accessibility, cpu_possible_mask); + + /* Satisfy lockdep checks */ + mutex_init(&fake_msc.part_sel_lock); + mutex_lock(&fake_msc.part_sel_lock); + + test_result = (u32 *)(buf + MPAMCFG_CPBM); + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 0); + KUNIT_EXPECT_EQ(test, test_result[0], 0); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 1); + KUNIT_EXPECT_EQ(test, test_result[0], 1); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 16); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffff); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 32); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 33); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); + KUNIT_EXPECT_EQ(test, test_result[1], 1); + test_result[0] = 0; + test_result[1] = 0; + + mutex_unlock(&fake_msc.part_sel_lock); +} + +static struct kunit_case mpam_devices_test_cases[] = { + KUNIT_CASE(test_mpam_reset_msc_bitmap), + {} +}; + +static struct kunit_suite mpam_devices_test_suite = { + .name = "mpam_devices_test_suite", + .test_cases = mpam_devices_test_cases, +}; + +kunit_test_suites(&mpam_devices_test_suite); From 2557e0eafec1547aa9e0e768d2376e66252dada4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:23:03 +0000 Subject: [PATCH 77/94] arm_mpam: Add kunit tests for props_mismatch() When features are mismatched between MSC the way features are combined to the class determines whether resctrl can support this SoC. Add some tests to illustrate the sort of thing that is expected to work, and those that must be removed. Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_internal.h | 14 +- drivers/resctrl/test_mpam_devices.c | 320 ++++++++++++++++++++++++++++ 2 files changed, 333 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 18d53c07b3d7..e79c3c47259c 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -23,6 +23,12 @@ struct platform_device; DECLARE_STATIC_KEY_FALSE(mpam_enabled); +#ifdef CONFIG_MPAM_KUNIT_TEST +#define PACKED_FOR_KUNIT __packed +#else +#define PACKED_FOR_KUNIT +#endif + static inline bool mpam_is_enabled(void) { return static_branch_likely(&mpam_enabled); @@ -186,7 +192,13 @@ struct mpam_props { u16 dspri_wd; u16 num_csu_mon; u16 num_mbwu_mon; -}; + +/* + * Kunit tests use memset() to set up feature combinations that should be + * removed, and will false-positive if the compiler introduces padding that + * isn't cleared during sanitisation. + */ +} PACKED_FOR_KUNIT; #define mpam_has_feature(_feat, x) test_bit(_feat, (x)->features) #define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) diff --git a/drivers/resctrl/test_mpam_devices.c b/drivers/resctrl/test_mpam_devices.c index 0cfb41b665c4..3e8d564a0c64 100644 --- a/drivers/resctrl/test_mpam_devices.c +++ b/drivers/resctrl/test_mpam_devices.c @@ -4,6 +4,324 @@ #include +/* + * This test catches fields that aren't being sanitised - but can't tell you + * which one... + */ +static void test__props_mismatch(struct kunit *test) +{ + struct mpam_props parent = { 0 }; + struct mpam_props child; + + memset(&child, 0xff, sizeof(child)); + __props_mismatch(&parent, &child, false); + + memset(&child, 0, sizeof(child)); + KUNIT_EXPECT_EQ(test, memcmp(&parent, &child, sizeof(child)), 0); + + memset(&child, 0xff, sizeof(child)); + __props_mismatch(&parent, &child, true); + + KUNIT_EXPECT_EQ(test, memcmp(&parent, &child, sizeof(child)), 0); +} + +static struct list_head fake_classes_list; +static struct mpam_class fake_class = { 0 }; +static struct mpam_component fake_comp1 = { 0 }; +static struct mpam_component fake_comp2 = { 0 }; +static struct mpam_vmsc fake_vmsc1 = { 0 }; +static struct mpam_vmsc fake_vmsc2 = { 0 }; +static struct mpam_msc fake_msc1 = { 0 }; +static struct mpam_msc fake_msc2 = { 0 }; +static struct mpam_msc_ris fake_ris1 = { 0 }; +static struct mpam_msc_ris fake_ris2 = { 0 }; +static struct platform_device fake_pdev = { 0 }; + +static inline void reset_fake_hierarchy(void) +{ + INIT_LIST_HEAD(&fake_classes_list); + + memset(&fake_class, 0, sizeof(fake_class)); + fake_class.level = 3; + fake_class.type = MPAM_CLASS_CACHE; + INIT_LIST_HEAD_RCU(&fake_class.components); + INIT_LIST_HEAD(&fake_class.classes_list); + + memset(&fake_comp1, 0, sizeof(fake_comp1)); + memset(&fake_comp2, 0, sizeof(fake_comp2)); + fake_comp1.comp_id = 1; + fake_comp2.comp_id = 2; + INIT_LIST_HEAD(&fake_comp1.vmsc); + INIT_LIST_HEAD(&fake_comp1.class_list); + INIT_LIST_HEAD(&fake_comp2.vmsc); + INIT_LIST_HEAD(&fake_comp2.class_list); + + memset(&fake_vmsc1, 0, sizeof(fake_vmsc1)); + memset(&fake_vmsc2, 0, sizeof(fake_vmsc2)); + INIT_LIST_HEAD(&fake_vmsc1.ris); + INIT_LIST_HEAD(&fake_vmsc1.comp_list); + fake_vmsc1.msc = &fake_msc1; + INIT_LIST_HEAD(&fake_vmsc2.ris); + INIT_LIST_HEAD(&fake_vmsc2.comp_list); + fake_vmsc2.msc = &fake_msc2; + + memset(&fake_ris1, 0, sizeof(fake_ris1)); + memset(&fake_ris2, 0, sizeof(fake_ris2)); + fake_ris1.ris_idx = 1; + INIT_LIST_HEAD(&fake_ris1.msc_list); + fake_ris2.ris_idx = 2; + INIT_LIST_HEAD(&fake_ris2.msc_list); + + fake_msc1.pdev = &fake_pdev; + fake_msc2.pdev = &fake_pdev; + + list_add(&fake_class.classes_list, &fake_classes_list); +} + +static void test_mpam_enable_merge_features(struct kunit *test) +{ + reset_fake_hierarchy(); + + mutex_lock(&mpam_list_lock); + + /* One Class+Comp, two RIS in one vMSC with common features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = NULL; + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc1; + list_add(&fake_ris2.vmsc_list, &fake_vmsc1.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two RIS in one vMSC with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = NULL; + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc1; + list_add(&fake_ris2.vmsc_list, &fake_vmsc1.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* Multiple RIS within one MSC controlling the same resource can be mismatched */ + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_vmsc1.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + KUNIT_EXPECT_EQ(test, fake_vmsc1.props.cmax_wd, 4); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple RIS in different MSC can't control the same resource, + * mismatched features can not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 0); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with incompatible overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + mpam_set_feature(mpam_feat_mbw_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_mbw_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 5; + fake_ris2.props.cpbm_wd = 3; + fake_ris1.props.mbw_pbm_bits = 5; + fake_ris2.props.mbw_pbm_bits = 3; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple RIS in different MSC can't control the same resource, + * mismatched features can not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_mbw_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.mbw_pbm_bits, 0); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with overlapping features that need tweaking */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_mbw_min, &fake_ris1.props); + mpam_set_feature(mpam_feat_mbw_min, &fake_ris2.props); + mpam_set_feature(mpam_feat_cmax_cmax, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmax, &fake_ris2.props); + fake_ris1.props.bwa_wd = 5; + fake_ris2.props.bwa_wd = 3; + fake_ris1.props.cmax_wd = 5; + fake_ris2.props.cmax_wd = 3; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * RIS with different control properties need to be sanitised so the + * class has the common set of properties. + */ + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_min, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmax, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.bwa_wd, 3); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 3); + + reset_fake_hierarchy(); + + /* One Class Two Comp with overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = &fake_class; + list_add(&fake_comp2.class_list, &fake_class.components); + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp2; + list_add(&fake_vmsc2.comp_list, &fake_comp2.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class Two Comp with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = &fake_class; + list_add(&fake_comp2.class_list, &fake_class.components); + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp2; + list_add(&fake_vmsc2.comp_list, &fake_comp2.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple components can't control the same resource, mismatched features can + * not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 0); + + mutex_unlock(&mpam_list_lock); +} + static void test_mpam_reset_msc_bitmap(struct kunit *test) { char __iomem *buf = kunit_kzalloc(test, SZ_16K, GFP_KERNEL); @@ -58,6 +376,8 @@ static void test_mpam_reset_msc_bitmap(struct kunit *test) static struct kunit_case mpam_devices_test_cases[] = { KUNIT_CASE(test_mpam_reset_msc_bitmap), + KUNIT_CASE(test_mpam_enable_merge_features), + KUNIT_CASE(test__props_mismatch), {} }; From ce1e1421f8d8cdb5e05e13dbb516caedd67e5ee8 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:23:04 +0000 Subject: [PATCH 78/94] MAINTAINERS: new entry for MPAM Driver Create a maintainer entry for the new MPAM Driver. Add myself and James Morse as maintainers. James created the driver and I have taken up the later versions of his series. Cc: James Morse Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Acked-by: Catalin Marinas Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..06368172e665 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17448,6 +17448,16 @@ S: Maintained F: Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml F: drivers/video/backlight/mp3309c.c +MPAM DRIVER +M: James Morse +M: Ben Horgan +R: Reinette Chatre +R: Fenghua Yu +S: Maintained +F: drivers/resctrl/mpam_* +F: drivers/resctrl/test_mpam_* +F: include/linux/arm_mpam.h + MPS MP2869 DRIVER M: Wensheng Wang L: linux-hwmon@vger.kernel.org From a06494adb7efba2dda3866ac2e354aeacb3992f1 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 18 Nov 2025 20:19:45 -0800 Subject: [PATCH 79/94] arm64: mm: use untagged address to calculate page index Nathan Chancellor reported the below bug: [ 0.149929] BUG: KASAN: invalid-access in change_memory_common+0x258/0x2d0 [ 0.151006] Read of size 8 at addr f96680000268a000 by task swapper/0/1 [ 0.152031] [ 0.152274] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.18.0-rc1-00012-g37cb0aab9068 #1 PREEMPT [ 0.152288] Hardware name: linux,dummy-virt (DT) [ 0.152292] Call trace: [ 0.152295] show_stack+0x18/0x30 (C) [ 0.152309] dump_stack_lvl+0x60/0x80 [ 0.152320] print_report+0x480/0x498 [ 0.152331] kasan_report+0xac/0xf0 [ 0.152343] kasan_check_range+0x90/0xb0 [ 0.152353] __hwasan_load8_noabort+0x20/0x34 [ 0.152364] change_memory_common+0x258/0x2d0 [ 0.152375] set_memory_ro+0x18/0x24 [ 0.152386] bpf_prog_pack_alloc+0x200/0x2e8 [ 0.152397] bpf_jit_binary_pack_alloc+0x78/0x188 [ 0.152409] bpf_int_jit_compile+0xa4c/0xc74 [ 0.152420] bpf_prog_select_runtime+0x1c0/0x2bc [ 0.152430] bpf_prepare_filter+0x5a4/0x7c0 [ 0.152443] bpf_prog_create+0xa4/0x100 [ 0.152454] ptp_classifier_init+0x80/0xd0 [ 0.152465] sock_init+0x12c/0x178 [ 0.152474] do_one_initcall+0xa0/0x260 [ 0.152484] kernel_init_freeable+0x2d8/0x358 [ 0.152495] kernel_init+0x20/0x140 [ 0.152510] ret_from_fork+0x10/0x20 It is because the KASAN tagged address was used when calculating the page index. The untagged address should be used. Fixes: 37cb0aab9068 ("arm64: mm: make linear mapping permission update more robust for patial range") Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Yang Shi Signed-off-by: Catalin Marinas --- arch/arm64/mm/pageattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 08ac96b9f846..fe6fdc6249e3 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -183,7 +183,8 @@ static int change_memory_common(unsigned long addr, int numpages, */ if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { - unsigned long idx = (start - (unsigned long)area->addr) >> PAGE_SHIFT; + unsigned long idx = (start - (unsigned long)kasan_reset_tag(area->addr)) + >> PAGE_SHIFT; for (; numpages; idx++, numpages--) { __change_memory_common((u64)page_address(area->pages[idx]), PAGE_SIZE, set_mask, clear_mask); From f20810157f6e971e0faca77b5c53c59f188b6ed8 Mon Sep 17 00:00:00 2001 From: Cai Xinchen Date: Thu, 20 Nov 2025 01:30:25 +0000 Subject: [PATCH 80/94] arm64: remove duplicate ARCH_HAS_MEM_ENCRYPT The commit e7bafbf717775 ("arm64: mm: Add top-level dispatcher for internal mem_encrypt API") adds ARCH_HAS_MEM_ENCRYPT. And then the commit 42be24a4178fe ("arm64: Enable memory encrypt for Realms") adds duplicate config. Just remove it. Fixes: 42be24a4178f ("arm64: Enable memory encrypt for Realms") Signed-off-by: Cai Xinchen Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6663ffd23f25..c3f9557e94fd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -47,7 +47,6 @@ config ARM64 select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY - select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_FORCE_DMA_UNENCRYPTED select ARCH_STACKWALK select ARCH_HAS_STRICT_KERNEL_RWX From de8209e55408d8dbb1e14cc90da3f63b85ea4d36 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 3 Nov 2025 16:48:28 -0500 Subject: [PATCH 81/94] dt-bindings: perf: fsl-imx-ddr: Add compatible string for i.MX8QM, i.MX8QXP and i.MX8DXL Add compatible string fsl,imx8qm-ddr-pmu, fsl,imx8qxp-ddr-pmu, which fallback to fsl,imx8-ddr-pmu and fsl,imx8dxl-db-pmu (for data bus fabric). Add clocks, clock-names for fsl,imx8dxl-db-pmu and keep the same restriction for existing compatible strings. Reviewed-by: Rob Herring (Arm) Signed-off-by: Frank Li Signed-off-by: Will Deacon --- .../devicetree/bindings/perf/fsl-imx-ddr.yaml | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml index d2e578d6b83b..103e4aec2439 100644 --- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml +++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml @@ -14,6 +14,7 @@ properties: oneOf: - enum: - fsl,imx8-ddr-pmu + - fsl,imx8dxl-db-pmu - fsl,imx8m-ddr-pmu - fsl,imx8mq-ddr-pmu - fsl,imx8mm-ddr-pmu @@ -28,7 +29,10 @@ properties: - fsl,imx8mp-ddr-pmu - const: fsl,imx8m-ddr-pmu - items: - - const: fsl,imx8dxl-ddr-pmu + - enum: + - fsl,imx8dxl-ddr-pmu + - fsl,imx8qm-ddr-pmu + - fsl,imx8qxp-ddr-pmu - const: fsl,imx8-ddr-pmu - items: - enum: @@ -43,6 +47,14 @@ properties: interrupts: maxItems: 1 + clocks: + maxItems: 2 + + clock-names: + items: + - const: ipg + - const: cnt + required: - compatible - reg @@ -50,6 +62,21 @@ required: additionalProperties: false +allOf: + - if: + properties: + compatible: + contains: + const: fsl,imx8dxl-db-pmu + then: + required: + - clocks + - clock-names + else: + properties: + clocks: false + clock-names: false + examples: - | #include From 66db99ffdfcb034d6fae212f2f473a82a842795f Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 3 Nov 2025 16:48:29 -0500 Subject: [PATCH 82/94] perf/imx_ddr: Move ida_alloc() from ddr_perf_init() to ddr_perf_probe() Move ida_alloc() from helper ddr_perf_init() into ddr_perf_probe() to clarify why ida_free() must be called at the error path. Add return value check for ida_alloc(). Rename label 'cpuhp_state_err' to 'idr_free' to make the code clearer, since two error paths now jump to this label. Signed-off-by: Frank Li Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index b989ffa95d69..5ba34c606213 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -645,8 +645,8 @@ static void ddr_perf_pmu_disable(struct pmu *pmu) { } -static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, - struct device *dev) +static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, + struct device *dev) { *pmu = (struct ddr_pmu) { .pmu = (struct pmu) { @@ -667,9 +667,6 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, .base = base, .dev = dev, }; - - pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL); - return pmu->id; } static irqreturn_t ddr_perf_irq_handler(int irq, void *p) @@ -753,15 +750,21 @@ static int ddr_perf_probe(struct platform_device *pdev) if (!pmu) return -ENOMEM; - num = ddr_perf_init(pmu, base, &pdev->dev); + ddr_perf_init(pmu, base, &pdev->dev); platform_set_drvdata(pdev, pmu); + num = ida_alloc(&ddr_ida, GFP_KERNEL); + if (num < 0) + return num; + + pmu->id = num; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", num); if (!name) { ret = -ENOMEM; - goto cpuhp_state_err; + goto idr_free; } pmu->devtype_data = of_device_get_match_data(&pdev->dev); @@ -774,7 +777,7 @@ static int ddr_perf_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n"); - goto cpuhp_state_err; + goto idr_free; } pmu->cpuhp_state = ret; @@ -821,7 +824,7 @@ static int ddr_perf_probe(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); cpuhp_instance_err: cpuhp_remove_multi_state(pmu->cpuhp_state); -cpuhp_state_err: +idr_free: ida_free(&ddr_ida, pmu->id); dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret); return ret; From 037e8cf671780426254fbacdca80d1d01c806844 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 3 Nov 2025 16:48:30 -0500 Subject: [PATCH 83/94] perf/imx_ddr: Get and enable optional clks Get and enable optional clks because fsl,imx8dxl-db-pmu have two clocks. Signed-off-by: Frank Li Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 5ba34c606213..2a8426a74af9 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -732,10 +733,12 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) static int ddr_perf_probe(struct platform_device *pdev) { + struct clk_bulk_data *clks; struct ddr_pmu *pmu; struct device_node *np; void __iomem *base; char *name; + int nclks; int num; int ret; int irq; @@ -754,6 +757,10 @@ static int ddr_perf_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pmu); + nclks = devm_clk_bulk_get_all_enabled(&pdev->dev, &clks); + if (nclks < 0) + return dev_err_probe(&pdev->dev, nclks, "Failure get clks\n"); + num = ida_alloc(&ddr_ida, GFP_KERNEL); if (num < 0) return num; From 11abb4e87b0e6afd4e4c0876d1008ddd9256a33c Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 3 Nov 2025 16:48:31 -0500 Subject: [PATCH 84/94] perf/imx_ddr: Add support for PMU in DB (system interconnects) There is a PMU in DB, which has the same function with PMU in DDR subsystem, the difference is PMU in DB only supports cycles, axid-read, axid-write events. e.g. perf stat -a -e imx8_db0/axid-read,axi_mask=0xMMMM,axi_id=0xDDDD,axi_port=0xPP,axi_channel=0xH/ cmd perf stat -a -e imx8_db0/axid-write,axi_mask=0xMMMM,axi_id=0xDDDD,axi_port=0xPP,axi_channel=0xH/ cmd Signed-off-by: Joakim Zhang Signed-off-by: Frank Li Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 67 ++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 2a8426a74af9..bcdf5575d71c 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -53,18 +53,27 @@ #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) #define DDR_PERF_DEV_NAME "imx8_ddr" +#define DB_PERF_DEV_NAME "imx8_db" #define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu" static DEFINE_IDA(ddr_ida); +static DEFINE_IDA(db_ida); /* DDR Perf hardware feature */ #define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */ #define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */ #define DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER 0x4 /* support AXI ID PORT CHANNEL filter */ +/* Perf type */ +enum fsl_ddr_type { + DDR_PERF_TYPE = 0, /* ddr Perf (default) */ + DB_PERF_TYPE, /* db Perf */ +}; + struct fsl_ddr_devtype_data { unsigned int quirks; /* quirks needed for different DDR Perf core */ const char *identifier; /* system PMU identifier for userspace */ + enum fsl_ddr_type type; /* types of Perf, ddr or db */ }; static const struct fsl_ddr_devtype_data imx8_devtype_data; @@ -98,6 +107,12 @@ static const struct fsl_ddr_devtype_data imx8dxl_devtype_data = { .identifier = "i.MX8DXL", }; +static const struct fsl_ddr_devtype_data imx8dxl_db_devtype_data = { + .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER, + .identifier = "i.MX8DXL", + .type = DB_PERF_TYPE, +}; + static const struct of_device_id imx_ddr_pmu_dt_ids[] = { { .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data}, { .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data}, @@ -106,6 +121,7 @@ static const struct of_device_id imx_ddr_pmu_dt_ids[] = { { .compatible = "fsl,imx8mn-ddr-pmu", .data = &imx8mn_devtype_data}, { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data}, { .compatible = "fsl,imx8dxl-ddr-pmu", .data = &imx8dxl_devtype_data}, + { .compatible = "fsl,imx8dxl-db-pmu", .data = &imx8dxl_db_devtype_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); @@ -285,9 +301,37 @@ static struct attribute *ddr_perf_events_attrs[] = { NULL, }; +static const int ddr_perf_db_visible_event_list[] = { + EVENT_CYCLES_ID, + 0x41, + 0x42, +}; + +static umode_t ddr_perf_events_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct ddr_pmu *pmu = dev_get_drvdata(dev); + struct perf_pmu_events_attr *pmu_attr; + unsigned int i; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + + if (pmu->devtype_data->type == DDR_PERF_TYPE) + return attr->mode; + + /* DB Type */ + for (i = 0; i < ARRAY_SIZE(ddr_perf_db_visible_event_list); i++) + if (pmu_attr->id == ddr_perf_db_visible_event_list[i]) + return attr->mode; + + return 0; +} + static const struct attribute_group ddr_perf_events_attr_group = { .name = "events", .attrs = ddr_perf_events_attrs, + .is_visible = ddr_perf_events_attrs_is_visible, }; PMU_FORMAT_ATTR(event, "config:0-7"); @@ -737,6 +781,7 @@ static int ddr_perf_probe(struct platform_device *pdev) struct ddr_pmu *pmu; struct device_node *np; void __iomem *base; + struct ida *ida; char *name; int nclks; int num; @@ -761,21 +806,25 @@ static int ddr_perf_probe(struct platform_device *pdev) if (nclks < 0) return dev_err_probe(&pdev->dev, nclks, "Failure get clks\n"); - num = ida_alloc(&ddr_ida, GFP_KERNEL); + pmu->devtype_data = of_device_get_match_data(&pdev->dev); + + ida = pmu->devtype_data->type == DDR_PERF_TYPE ? &ddr_ida : &db_ida; + num = ida_alloc(ida, GFP_KERNEL); if (num < 0) return num; pmu->id = num; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", - num); + if (pmu->devtype_data->type == DDR_PERF_TYPE) + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", num); + else + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DB_PERF_DEV_NAME "%d", num); + if (!name) { ret = -ENOMEM; goto idr_free; } - pmu->devtype_data = of_device_get_match_data(&pdev->dev); - pmu->cpu = raw_smp_processor_id(); ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DDR_CPUHP_CB_NAME, @@ -832,7 +881,7 @@ static int ddr_perf_probe(struct platform_device *pdev) cpuhp_instance_err: cpuhp_remove_multi_state(pmu->cpuhp_state); idr_free: - ida_free(&ddr_ida, pmu->id); + ida_free(ida, pmu->id); dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret); return ret; } @@ -846,7 +895,11 @@ static void ddr_perf_remove(struct platform_device *pdev) perf_pmu_unregister(&pmu->pmu); - ida_free(&ddr_ida, pmu->id); + if (pmu->devtype_data->type == DDR_PERF_TYPE) + ida_free(&ddr_ida, pmu->id); + else + ida_free(&db_ida, pmu->id); + } static struct platform_driver imx_ddr_pmu_driver = { From cbbfba4847b8a5299d36e002bf864b21bb83295d Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 11 Nov 2025 11:37:55 +0000 Subject: [PATCH 85/94] perf: Add perf_event_attr::config4 Arm FEAT_SPE_FDS adds the ability to filter on the data source of a packet using another 64-bits of event filtering control. As the existing perf_event_attr::configN fields are all used up for SPE PMU, an additional field is needed. Add a new 'config4' field. Reviewed-by: Leo Yan Tested-by: Leo Yan Reviewed-by: Ian Rogers Acked-by: Peter Zijlstra (Intel) Signed-off-by: James Clark Signed-off-by: Will Deacon --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 78a362b80027..0d0ed85ad8cb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -382,6 +382,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ #define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ #define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ +#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */ /* * 'struct perf_event_attr' contains various attributes that define @@ -543,6 +544,7 @@ struct perf_event_attr { __u64 sig_data; __u64 config3; /* extension of config2 */ + __u64 config4; /* extension of config3 */ }; /* From e6a27290d8001538fec94e91ff8c7f956ee7e3e5 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 11 Nov 2025 11:37:56 +0000 Subject: [PATCH 86/94] perf: arm_spe: Add support for filtering on data source SPE_FEAT_FDS adds the ability to filter on the data source of packets. Like the other existing filters, enable filtering with PMSFCR_EL1.FDS when any of the filter bits are set. Each bit position of the 64 bit filter maps to numerical data sources 0-63 described by bits[0:5] in the data source packet (although the full range of data source is 16 bits so higher value data sources can't be filtered on). The filter is an OR of all the filter bits, so for example clearing filter bits 0 and 3 only includes packets from data sources 0 OR 3. Invert the filter given by userspace so that the default value of 0 is equivalent to including all values (no filtering). This allows us to skip adding a new format bit to enable filtering and still support excluding all data sources which would have been a filter value of 0 if not for the inversion. Tested-by: Leo Yan Reviewed-by: Leo Yan Signed-off-by: James Clark Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index fa50645fedda..617f8a98dd63 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -87,6 +87,7 @@ struct arm_spe_pmu { #define SPE_PMU_FEAT_INV_FILT_EVT (1UL << 6) #define SPE_PMU_FEAT_DISCARD (1UL << 7) #define SPE_PMU_FEAT_EFT (1UL << 8) +#define SPE_PMU_FEAT_FDS (1UL << 9) #define SPE_PMU_FEAT_DEV_PROBED (1UL << 63) u64 features; @@ -252,6 +253,10 @@ static const struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_inv_event_filter_LO 0 #define ATTR_CFG_FLD_inv_event_filter_HI 63 +#define ATTR_CFG_FLD_inv_data_src_filter_CFG config4 /* inverse of PMSDSFR_EL1 */ +#define ATTR_CFG_FLD_inv_data_src_filter_LO 0 +#define ATTR_CFG_FLD_inv_data_src_filter_HI 63 + GEN_PMU_FORMAT_ATTR(ts_enable); GEN_PMU_FORMAT_ATTR(pa_enable); GEN_PMU_FORMAT_ATTR(pct_enable); @@ -268,6 +273,7 @@ GEN_PMU_FORMAT_ATTR(float_filter); GEN_PMU_FORMAT_ATTR(float_filter_mask); GEN_PMU_FORMAT_ATTR(event_filter); GEN_PMU_FORMAT_ATTR(inv_event_filter); +GEN_PMU_FORMAT_ATTR(inv_data_src_filter); GEN_PMU_FORMAT_ATTR(min_latency); GEN_PMU_FORMAT_ATTR(discard); @@ -288,6 +294,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = { &format_attr_float_filter_mask.attr, &format_attr_event_filter.attr, &format_attr_inv_event_filter.attr, + &format_attr_inv_data_src_filter.attr, &format_attr_min_latency.attr, &format_attr_discard.attr, NULL, @@ -306,6 +313,10 @@ static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj, if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT)) return 0; + if (attr == &format_attr_inv_data_src_filter.attr && + !(spe_pmu->features & SPE_PMU_FEAT_FDS)) + return 0; + if ((attr == &format_attr_branch_filter_mask.attr || attr == &format_attr_load_filter_mask.attr || attr == &format_attr_store_filter_mask.attr || @@ -430,6 +441,9 @@ static u64 arm_spe_event_to_pmsfcr(struct perf_event *event) if (ATTR_CFG_GET_FLD(attr, inv_event_filter)) reg |= PMSFCR_EL1_FnE; + if (ATTR_CFG_GET_FLD(attr, inv_data_src_filter)) + reg |= PMSFCR_EL1_FDS; + if (ATTR_CFG_GET_FLD(attr, min_latency)) reg |= PMSFCR_EL1_FL; @@ -454,6 +468,17 @@ static u64 arm_spe_event_to_pmslatfr(struct perf_event *event) return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency)); } +static u64 arm_spe_event_to_pmsdsfr(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + /* + * Data src filter is inverted so that the default value of 0 is + * equivalent to no filtering. + */ + return ~ATTR_CFG_GET_FLD(attr, inv_data_src_filter); +} + static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len) { struct arm_spe_pmu_buf *buf = perf_get_aux(handle); @@ -791,6 +816,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event) if (arm_spe_event_to_pmsnevfr(event) & spe_pmu->pmsevfr_res0) return -EOPNOTSUPP; + if (arm_spe_event_to_pmsdsfr(event) != U64_MAX && + !(spe_pmu->features & SPE_PMU_FEAT_FDS)) + return -EOPNOTSUPP; + if (attr->exclude_idle) return -EOPNOTSUPP; @@ -866,6 +895,11 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags) write_sysreg_s(reg, SYS_PMSNEVFR_EL1); } + if (spe_pmu->features & SPE_PMU_FEAT_FDS) { + reg = arm_spe_event_to_pmsdsfr(event); + write_sysreg_s(reg, SYS_PMSDSFR_EL1); + } + reg = arm_spe_event_to_pmslatfr(event); write_sysreg_s(reg, SYS_PMSLATFR_EL1); @@ -1125,6 +1159,9 @@ static void __arm_spe_pmu_dev_probe(void *info) if (FIELD_GET(PMSIDR_EL1_EFT, reg)) spe_pmu->features |= SPE_PMU_FEAT_EFT; + if (FIELD_GET(PMSIDR_EL1_FDS, reg)) + spe_pmu->features |= SPE_PMU_FEAT_FDS; + /* This field has a spaced out encoding, so just use a look-up */ fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg); switch (fld) { From 155f8d4ef0b78afbf25b1449bbd654fd1327cc7a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Oct 2025 11:01:37 +0000 Subject: [PATCH 87/94] ACPI: GTDT: Get rid of acpi_arch_timer_mem_init() Since 0f67b56d84b4c ("clocksource/drivers/arm_arch_timer_mmio: Switch over to standalone driver"), acpi_arch_timer_mem_init() is unused. Remove it. Signed-off-by: Marc Zyngier Cc: Hanjun Guo Cc: Sudeep Holla Cc: Rafael J. Wysocki Cc: Daniel Lezcano Cc: Thomas Gleixner Cc: Mark Rutland Acked-by: Hanjun Guo Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/gtdt.c | 34 ---------------------------------- include/linux/acpi.h | 1 - 2 files changed, 35 deletions(-) diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index fd995a1d3d24..2f100c1fa2d4 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -303,40 +303,6 @@ static int __init gtdt_parse_timer_block(struct acpi_gtdt_timer_block *block, return -EINVAL; } -/** - * acpi_arch_timer_mem_init() - Get the info of all GT blocks in GTDT table. - * @timer_mem: The pointer to the array of struct arch_timer_mem for returning - * the result of parsing. The element number of this array should - * be platform_timer_count(the total number of platform timers). - * @timer_count: It points to a integer variable which is used for storing the - * number of GT blocks we have parsed. - * - * Return: 0 if success, -EINVAL/-ENODEV if error. - */ -int __init acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, - int *timer_count) -{ - int ret; - void *platform_timer; - - *timer_count = 0; - for_each_platform_timer(platform_timer) { - if (is_timer_block(platform_timer)) { - ret = gtdt_parse_timer_block(platform_timer, timer_mem); - if (ret) - return ret; - timer_mem++; - (*timer_count)++; - } - } - - if (*timer_count) - pr_info("found %d memory-mapped timer block(s).\n", - *timer_count); - - return 0; -} - /* * Initialize a SBSA generic Watchdog platform device info from GTDT */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5ff5d99f6ead..22b377c3a319 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -755,7 +755,6 @@ int acpi_reconfig_notifier_unregister(struct notifier_block *nb); int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count); int acpi_gtdt_map_ppi(int type); bool acpi_gtdt_c3stop(int type); -int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); #endif #ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER From 4b7a59fa700f422217d83a7212ccc6074ebe9cbc Mon Sep 17 00:00:00 2001 From: Zenon Xiu Date: Tue, 11 Nov 2025 17:35:39 +0800 Subject: [PATCH 88/94] Documentation/arm64: Fix the typo of register names The register name 'HWFGWTR_EL2' and 'HWFGRTR_EL2' is wrong, should be 'HFGWTR_EL2' and 'HFGRTR_EL2'. Find the register description on arm website here, https://developer.arm.com/documentation/ddi0601/2025-09/AArch64-Registers/HFGWTR-EL2--Hypervisor-Fine-Grained-Write-Trap-Register https://developer.arm.com/documentation/ddi0601/2025-09/AArch64-Registers/HFGRTR-EL2--Hypervisor-Fine-Grained-Read-Trap-Register?lang=en Signed-off-by: Zenon Xiu Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/booting.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index e4f953839f71..26efca09aef3 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -391,13 +391,13 @@ Before jumping into the kernel, the following conditions must be met: - SMCR_EL2.LEN must be initialised to the same value for all CPUs the kernel will execute on. - - HWFGRTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01. + - HFGRTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01. - - HWFGWTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01. + - HFGWTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01. - - HWFGRTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01. + - HFGRTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01. - - HWFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01. + - HFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01. For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64): From c86d9f8764ba2ffa4e19cca40918c12ccc3ad909 Mon Sep 17 00:00:00 2001 From: Seongsu Park Date: Wed, 26 Nov 2025 11:10:25 +0900 Subject: [PATCH 89/94] arm64: atomics: lse: Remove unused parameters from ATOMIC_FETCH_OP_AND macros The ATOMIC_FETCH_OP_AND and ATOMIC64_FETCH_OP_AND macros accept 'mb' and 'cl' parameters but never use them in their implementation. These macros simply delegate to the corresponding andnot functions, which handle the actual atomic operations and memory barriers. Signed-off-by: Seongsu Park Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic_lse.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 87f568a94e55..afad1849c4cf 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -103,17 +103,17 @@ static __always_inline void __lse_atomic_and(int i, atomic_t *v) return __lse_atomic_andnot(~i, v); } -#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +#define ATOMIC_FETCH_OP_AND(name) \ static __always_inline int \ __lse_atomic_fetch_and##name(int i, atomic_t *v) \ { \ return __lse_atomic_fetch_andnot##name(~i, v); \ } -ATOMIC_FETCH_OP_AND(_relaxed, ) -ATOMIC_FETCH_OP_AND(_acquire, a, "memory") -ATOMIC_FETCH_OP_AND(_release, l, "memory") -ATOMIC_FETCH_OP_AND( , al, "memory") +ATOMIC_FETCH_OP_AND(_relaxed) +ATOMIC_FETCH_OP_AND(_acquire) +ATOMIC_FETCH_OP_AND(_release) +ATOMIC_FETCH_OP_AND( ) #undef ATOMIC_FETCH_OP_AND @@ -210,17 +210,17 @@ static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v) return __lse_atomic64_andnot(~i, v); } -#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ +#define ATOMIC64_FETCH_OP_AND(name) \ static __always_inline long \ __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ return __lse_atomic64_fetch_andnot##name(~i, v); \ } -ATOMIC64_FETCH_OP_AND(_relaxed, ) -ATOMIC64_FETCH_OP_AND(_acquire, a, "memory") -ATOMIC64_FETCH_OP_AND(_release, l, "memory") -ATOMIC64_FETCH_OP_AND( , al, "memory") +ATOMIC64_FETCH_OP_AND(_relaxed) +ATOMIC64_FETCH_OP_AND(_acquire) +ATOMIC64_FETCH_OP_AND(_release) +ATOMIC64_FETCH_OP_AND( ) #undef ATOMIC64_FETCH_OP_AND From bf09ee918053edec7efeb2f9977b53b02e029553 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Tue, 25 Nov 2025 10:12:06 +0000 Subject: [PATCH 90/94] KVM: arm64: selftests: Remove ARM64_FEATURE_FIELD_BITS and its last user ARM64_FEATURE_FIELD_BITS is set to 4 but not all ID register fields are 4 bits. See for instance ID_AA64SMFR0_EL1. The last user of this define, ARM64_FEATURE_FIELD_BITS, is the set_id_regs selftest. Its logic assumes the fields aren't a single bits; assert that's the case and stop using the define. As there are no more users, ARM64_FEATURE_FIELD_BITS is removed from the arm64 tools sysreg.h header. A separate commit removes this from the kernel version of the header. Signed-off-by: Ben Horgan Acked-by: Marc Zyngier Acked-by: Oliver Upton Signed-off-by: Catalin Marinas --- tools/arch/arm64/include/asm/sysreg.h | 2 -- tools/testing/selftests/kvm/arm64/set_id_regs.c | 8 ++++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index 65f2759ea27a..0f60b68eac1b 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -1078,8 +1078,6 @@ #define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \ GCS_CAP_VALID_TOKEN) -#define ARM64_FEATURE_FIELD_BITS 4 - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8ff1e853f7f8..465d2331196a 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -265,7 +265,9 @@ static void guest_code(void) /* Return a safe value to a given ftr_bits an ftr value */ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) { - uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift; + + TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); if (ftr_bits->sign == FTR_UNSIGNED) { switch (ftr_bits->type) { @@ -317,7 +319,9 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) /* Return an invalid value to a given ftr_bits an ftr value */ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) { - uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift; + + TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); if (ftr_bits->sign == FTR_UNSIGNED) { switch (ftr_bits->type) { From 4138cc63d3efdd77de799aafb0bd183e1d655a0f Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Tue, 25 Nov 2025 10:12:07 +0000 Subject: [PATCH 91/94] KVM: arm64: selftests: Consider all 7 possible levels of cache In test_clidr() if an empty cache level is not found then the TEST_ASSERT will not fire. Fix this by considering all 7 possible levels when iterating through the hierarchy. Found by inspection. Signed-off-by: Ben Horgan Acked-by: Marc Zyngier Acked-by: Oliver Upton Signed-off-by: Catalin Marinas --- tools/testing/selftests/kvm/arm64/set_id_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 465d2331196a..01f9e417cea1 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -673,7 +673,7 @@ static void test_clidr(struct kvm_vcpu *vcpu) clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); /* find the first empty level in the cache hierarchy */ - for (level = 1; level < 7; level++) { + for (level = 1; level <= 7; level++) { if (!CLIDR_CTYPE(clidr, level)) break; } From 27abb1ee5a4e02a5314423371dafaf41499314a5 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Tue, 25 Nov 2025 10:27:32 +0000 Subject: [PATCH 92/94] arm64/sysreg: Remove unused define ARM64_FEATURE_FIELD_BITS The define ARM64_FEATURE_FIELD_BITS is now unused and feature id fields don't always have 4 bits. Remove it. Signed-off-by: Ben Horgan Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6455db1b54fd..d9aa76d08e13 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1129,8 +1129,6 @@ #define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn) #define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn) -#define ARM64_FEATURE_FIELD_BITS 4 - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg From e5efd56fa157d2e7d789949d1d64eccbac18a897 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Wed, 12 Nov 2025 11:57:15 +0530 Subject: [PATCH 93/94] arm64/pageattr: Propagate return value from __change_memory_common The rodata=on security measure requires that any code path which does vmalloc -> set_memory_ro/set_memory_rox must protect the linear map alias too. Therefore, if such a call fails, we must abort set_memory_* and caller must take appropriate action; currently we are suppressing the error, and there is a real chance of such an error arising post commit a166563e7ec3 ("arm64: mm: support large block mapping when rodata=full"). Therefore, propagate any error to the caller. Fixes: a166563e7ec3 ("arm64: mm: support large block mapping when rodata=full") Signed-off-by: Dev Jain Reviewed-by: Ryan Roberts Signed-off-by: Catalin Marinas --- arch/arm64/mm/pageattr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 5135f2d66958..b4ea86cd3a71 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -148,6 +148,7 @@ static int change_memory_common(unsigned long addr, int numpages, unsigned long size = PAGE_SIZE * numpages; unsigned long end = start + size; struct vm_struct *area; + int ret; int i; if (!PAGE_ALIGNED(addr)) { @@ -185,8 +186,10 @@ static int change_memory_common(unsigned long addr, int numpages, if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { - __change_memory_common((u64)page_address(area->pages[i]), + ret = __change_memory_common((u64)page_address(area->pages[i]), PAGE_SIZE, set_mask, clear_mask); + if (ret) + return ret; } } From 0c2988aaa4d3eda94b738d5a7acae7838d52fe4d Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Wed, 12 Nov 2025 11:57:16 +0530 Subject: [PATCH 94/94] arm64/mm: Document why linear map split failure upon vm_reset_perms is not problematic Consider the following code path: (1) vmalloc -> (2) set_vm_flush_reset_perms -> (3) set_memory_ro/set_memory_rox -> .... (4) use the mapping .... -> (5) vfree -> (6) vm_reset_perms -> (7) set_area_direct_map. Or, it may happen that we encounter failure at (3) and directly jump to (5). In both cases, (7) may fail due to linear map split failure. But, we care about its success *only* for the region which got successfully changed by (3). Such a region is guaranteed to be pte-mapped. The TLDR is that (7) will surely succeed for the regions we care about. Signed-off-by: Dev Jain Reviewed-by: Ryan Roberts Signed-off-by: Catalin Marinas --- arch/arm64/mm/pageattr.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index b4ea86cd3a71..dc05f06a47f2 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -185,6 +185,15 @@ static int change_memory_common(unsigned long addr, int numpages, */ if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { + /* + * Note: One may wonder what happens if the calls to + * set_area_direct_map() in vm_reset_perms() fail due ENOMEM on + * linear map split failure. Observe that we care about those + * calls to succeed *only* for the region whose permissions + * are not default. Such a region is guaranteed to be + * pte-mapped, because the below call can change those + * permissions to non-default only after splitting that region. + */ for (i = 0; i < area->nr_pages; i++) { ret = __change_memory_common((u64)page_address(area->pages[i]), PAGE_SIZE, set_mask, clear_mask);