From c91ddab5799109124ccdfa88fd381c613a766a89 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Jan 2025 18:51:33 +0800 Subject: [PATCH 01/12] LoongArch: Migrate to the generic rule for built-in DTB Commit 654102df2ac2 ("kbuild: add generic support for built-in boot DTBs") introduced generic support for built-in DTBs. Select GENERIC_BUILTIN_DTB when built-in DTB support is enabled. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen --- arch/loongarch/Kbuild | 1 - arch/loongarch/Kconfig | 1 + arch/loongarch/boot/dts/Makefile | 2 -- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index bfa21465d83a..beb8499dd8ed 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -4,7 +4,6 @@ obj-y += net/ obj-y += vdso/ obj-$(CONFIG_KVM) += kvm/ -obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ # for cleaning subdir- += boot diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index dae3a9104ca6..98e099be912d 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -396,6 +396,7 @@ endchoice config BUILTIN_DTB bool "Enable built-in dtb in kernel" depends on OF + select GENERIC_BUILTIN_DTB help Some existing systems do not provide a canonical device tree to the kernel at boot time. Let's provide a device tree table in the diff --git a/arch/loongarch/boot/dts/Makefile b/arch/loongarch/boot/dts/Makefile index 747d0c3f6389..15d5e14fe418 100644 --- a/arch/loongarch/boot/dts/Makefile +++ b/arch/loongarch/boot/dts/Makefile @@ -1,5 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only dtb-y = loongson-2k0500-ref.dtb loongson-2k1000-ref.dtb loongson-2k2000-ref.dtb - -obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_NAME)) From 341cf992d331a34d764bf943e4e177daff94fc92 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sat, 25 Jan 2025 18:51:33 +0800 Subject: [PATCH 02/12] LoongArch: Disable FIX_EARLYCON_MEM when ARCH_IOREMAP is enabled When ARCH_IOREMAP is enabled, we are using always accessible DMW for ioremap(). It makes no sense to create a dedicated mapping for earlycon given that we can access the region via DMW. Disable FIX_EARLYCON_MEM when ARCH_IOREMAP is selected. This can ease debugging for early mapping issues. Signed-off-by: Jiaxun Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 98e099be912d..677cd45de70a 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -245,7 +245,7 @@ config MACH_LOONGSON64 def_bool 64BIT config FIX_EARLYCON_MEM - def_bool y + def_bool !ARCH_IOREMAP config PGTABLE_2LEVEL bool From 98e720f77dba6bb702a57f3d8479e6445e34f38c Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sat, 25 Jan 2025 18:51:33 +0800 Subject: [PATCH 03/12] LoongArch: Derive timer max_delta from PRCFG1's timer_bits As per arch spec, maximum timer bits is configurable and should not be hardcoded in any way. Probe timer bits from PRCFG1 and use that to determine the clockevent's max_delta to be conformance. Signed-off-by: Jiaxun Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-info.h | 1 + arch/loongarch/include/asm/loongarch.h | 1 - arch/loongarch/kernel/cpu-probe.c | 1 + arch/loongarch/kernel/time.c | 2 +- 4 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index 900589cb159d..35e0a230a484 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -57,6 +57,7 @@ struct cpuinfo_loongarch { int global_id; /* physical global thread number */ int vabits; /* Virtual Address size in bits */ int pabits; /* Physical Address size in bits */ + int timerbits; /* Width of arch timer in bits */ unsigned int ksave_mask; /* Usable KSave mask. */ unsigned int watch_dreg_count; /* Number data breakpoints */ unsigned int watch_ireg_count; /* Number instruction breakpoints */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 64ad277e096e..a3cc4f8d4c4a 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -466,7 +466,6 @@ #define LOONGARCH_CSR_TCFG 0x41 /* Timer config */ #define CSR_TCFG_VAL_SHIFT 2 -#define CSR_TCFG_VAL_WIDTH 48 #define CSR_TCFG_VAL (_ULCAST_(0x3fffffffffff) << CSR_TCFG_VAL_SHIFT) #define CSR_TCFG_PERIOD_SHIFT 1 #define CSR_TCFG_PERIOD (_ULCAST_(0x1) << CSR_TCFG_PERIOD_SHIFT) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index cbce099037b2..fedaa67cde41 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -190,6 +190,7 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) set_cpu_asid_mask(c, asid_mask); config = read_csr_prcfg1(); + c->timerbits = (config & CSR_CONF1_TMRBITS) >> CSR_CONF1_TMRBITS_SHIFT; c->ksave_mask = GENMASK((config & CSR_CONF1_KSNUM) - 1, 0); c->ksave_mask &= ~(EXC_KSAVE_MASK | PERCPU_KSAVE_MASK | KVM_KSAVE_MASK); diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index a07d7eff4dc5..e2d3bfeb6366 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -132,7 +132,7 @@ int constant_clockevent_init(void) #else unsigned long min_delta = 1000; #endif - unsigned long max_delta = (1UL << 48) - 1; + unsigned long max_delta = GENMASK_ULL(boot_cpu_data.timerbits, 0); struct clock_event_device *cd; static int irq = 0, timer_irq_installed = 0; From b62a03049f141061d0397bfa86b74f65985d9338 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 25 Jan 2025 18:51:33 +0800 Subject: [PATCH 04/12] LoongArch: Correct the cacheinfo sharing information SMT cores and their sibling cores share the same L1 and L2 private caches (of course last level cache is also shared), so correct the cacheinfo sharing information to let shared_cpu_map correctly reflect this relationship. Below is the output of "lscpu" on Loongson-3A6000 (4 cores, 8 threads). 1. Before patch: L1d: 512 KiB (8 instances) L1i: 512 KiB (8 instances) L2: 2 MiB (8 instances) L3: 16 MiB (1 instance) 2. After patch: L1d: 256 KiB (4 instances) L1i: 256 KiB (4 instances) L2: 1 MiB (4 instances) L3: 16 MiB (1 instance) Reported-by: Chao Li Signed-off-by: Juxin Gao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/cacheinfo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c index c7988f757281..8e231b0d2cd6 100644 --- a/arch/loongarch/kernel/cacheinfo.c +++ b/arch/loongarch/kernel/cacheinfo.c @@ -51,6 +51,12 @@ static void cache_cpumap_setup(unsigned int cpu) continue; sib_leaf = sib_cpu_ci->info_list + index; + /* SMT cores share all caches */ + if (cpus_are_siblings(i, cpu)) { + cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); + cpumask_set_cpu(i, &this_leaf->shared_cpu_map); + } + /* Node's cores share shared caches */ if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_set_cpu(i, &this_leaf->shared_cpu_map); From 613d4164f5a5032716f79106f48a59286c7b1c24 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 25 Jan 2025 18:51:42 +0800 Subject: [PATCH 05/12] LoongArch: Correct the __switch_to() prototype in comments Correct the __switch_to() prototype in comments, keep it be the same as the declaration in switch_to.h. Signed-off-by: Huacai Chen --- arch/loongarch/kernel/switch.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index 31dd8199b245..9c23cb7e432f 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -12,7 +12,7 @@ /* * task_struct *__switch_to(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) + * struct thread_info *next_ti, void *sched_ra, void *sched_cfa) */ .align 5 SYM_FUNC_START(__switch_to) From 0816b2ea18284e2598faec024a4df6a93591bd17 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 25 Jan 2025 18:51:42 +0800 Subject: [PATCH 06/12] LoongArch: Add pgprot_nx() implementation Commit cca98e9f8b5ebcd964 ("mm: enforce that vmap can't map pages executable") enforces the W^X protection by not allowing remapping existing pages as executable. Add LoongArch bits so that LoongArch can benefit the same protection. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable-bits.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 82cd3a9f094b..45bfc65a0c9f 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -96,6 +96,13 @@ #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC) +#define pgprot_nx pgprot_nx + +static inline pgprot_t pgprot_nx(pgprot_t _prot) +{ + return __pgprot(pgprot_val(_prot) | _PAGE_NO_EXEC); +} + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t _prot) From 5d0cc7e585432b64a18aefab8876a8c433394560 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 25 Jan 2025 18:51:42 +0800 Subject: [PATCH 07/12] LoongArch: Refactor bug_handler() implementation 1. Early return for user mode triggered exception with all types. 2. Give a chance to call fixup_exception() for default types (like S390). Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index c57b4134f3e8..2ec3106c0da3 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -597,17 +597,24 @@ int is_valid_bugaddr(unsigned long addr) static void bug_handler(struct pt_regs *regs) { + if (user_mode(regs)) { + force_sig(SIGTRAP); + return; + } + switch (report_bug(regs->csr_era, regs)) { case BUG_TRAP_TYPE_BUG: - case BUG_TRAP_TYPE_NONE: - die_if_kernel("Oops - BUG", regs); - force_sig(SIGTRAP); + die("Oops - BUG", regs); break; case BUG_TRAP_TYPE_WARN: /* Skip the BUG instruction and continue */ regs->csr_era += LOONGARCH_INSN_SIZE; break; + + default: + if (!fixup_exception(regs)) + die("Oops - BUG", regs); } } From 307094c9e26ef704d7061733f8d6fab9ca2ad09a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 25 Jan 2025 18:51:43 +0800 Subject: [PATCH 08/12] LoongArch: Adjust SETUP_SLEEP and SETUP_WAKEUP SETUP_SLEEP should only save the GPR context, which is symmetric to SETUP_WAKEUP, so move the acpi_saved_sp handling out of SETUP_SLEEP. Move "addi.d sp, sp, PT_SIZE" into SETUP_WAKEUP for the same reason. No functional changes. Signed-off-by: Huacai Chen --- arch/loongarch/power/suspend_asm.S | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S index 9fe28d5a0270..df0865df26fa 100644 --- a/arch/loongarch/power/suspend_asm.S +++ b/arch/loongarch/power/suspend_asm.S @@ -30,9 +30,6 @@ st.d $r29, sp, PT_R29 st.d $r30, sp, PT_R30 st.d $r31, sp, PT_R31 - - la.pcrel t0, acpi_saved_sp - st.d sp, t0, 0 .endm .macro SETUP_WAKEUP @@ -51,6 +48,7 @@ ld.d $r29, sp, PT_R29 ld.d $r30, sp, PT_R30 ld.d $r31, sp, PT_R31 + addi.d sp, sp, PT_SIZE .endm .text @@ -59,6 +57,10 @@ /* Sleep/wakeup code for Loongson-3 */ SYM_FUNC_START(loongarch_suspend_enter) SETUP_SLEEP + + la.pcrel t0, acpi_saved_sp + st.d sp, t0, 0 + bl __flush_cache_all /* Pass RA and SP to BIOS */ @@ -82,7 +84,7 @@ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL) la.pcrel t0, acpi_saved_sp ld.d sp, t0, 0 + SETUP_WAKEUP - addi.d sp, sp, PT_SIZE jr ra SYM_FUNC_END(loongarch_suspend_enter) From 26c0a2d93af55d30a46d5f45d3e9c42cde730168 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 26 Jan 2025 21:49:59 +0800 Subject: [PATCH 09/12] LoongArch: Fix warnings during S3 suspend The enable_gpe_wakeup() function calls acpi_enable_all_wakeup_gpes(), and the later one may call the preempt_schedule_common() function, resulting in a thread switch and causing the CPU to be in an interrupt enabled state after the enable_gpe_wakeup() function returns, leading to the warnings as follow. [ C0] WARNING: ... at kernel/time/timekeeping.c:845 ktime_get+0xbc/0xc8 [ C0] ... [ C0] Call Trace: [ C0] [<90000000002243b4>] show_stack+0x64/0x188 [ C0] [<900000000164673c>] dump_stack_lvl+0x60/0x88 [ C0] [<90000000002687e4>] __warn+0x8c/0x148 [ C0] [<90000000015e9978>] report_bug+0x1c0/0x2b0 [ C0] [<90000000016478e4>] do_bp+0x204/0x3b8 [ C0] [<90000000025b1924>] exception_handlers+0x1924/0x10000 [ C0] [<9000000000343bbc>] ktime_get+0xbc/0xc8 [ C0] [<9000000000354c08>] tick_sched_timer+0x30/0xb0 [ C0] [<90000000003408e0>] __hrtimer_run_queues+0x160/0x378 [ C0] [<9000000000341f14>] hrtimer_interrupt+0x144/0x388 [ C0] [<9000000000228348>] constant_timer_interrupt+0x38/0x48 [ C0] [<90000000002feba4>] __handle_irq_event_percpu+0x64/0x1e8 [ C0] [<90000000002fed48>] handle_irq_event_percpu+0x20/0x80 [ C0] [<9000000000306b9c>] handle_percpu_irq+0x5c/0x98 [ C0] [<90000000002fd4a0>] generic_handle_domain_irq+0x30/0x48 [ C0] [<9000000000d0c7b0>] handle_cpu_irq+0x70/0xa8 [ C0] [<9000000001646b30>] handle_loongarch_irq+0x30/0x48 [ C0] [<9000000001646bc8>] do_vint+0x80/0xe0 [ C0] [<90000000002aea1c>] finish_task_switch.isra.0+0x8c/0x2a8 [ C0] [<900000000164e34c>] __schedule+0x314/0xa48 [ C0] [<900000000164ead8>] schedule+0x58/0xf0 [ C0] [<9000000000294a2c>] worker_thread+0x224/0x498 [ C0] [<900000000029d2f0>] kthread+0xf8/0x108 [ C0] [<9000000000221f28>] ret_from_kernel_thread+0xc/0xa4 [ C0] [ C0] ---[ end trace 0000000000000000 ]--- The root cause is acpi_enable_all_wakeup_gpes() uses a mutex to protect acpi_hw_enable_all_wakeup_gpes(), and acpi_ut_acquire_mutex() may cause a thread switch. Since there is no longer concurrent execution during loongarch_acpi_suspend(), we can call acpi_hw_enable_all_wakeup_gpes() directly in enable_gpe_wakeup(). The solution is similar to commit 22db06337f590d01 ("ACPI: sleep: Avoid breaking S3 wakeup due to might_sleep()"). Fixes: 366bb35a8e48 ("LoongArch: Add suspend (ACPI S3) support") Signed-off-by: Qunqin Zhao Signed-off-by: Huacai Chen --- arch/loongarch/power/platform.c | 2 +- drivers/acpi/acpica/achware.h | 2 -- include/acpi/acpixf.h | 1 + 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/power/platform.c b/arch/loongarch/power/platform.c index 0909729dc2e1..5bbdb9fd76e5 100644 --- a/arch/loongarch/power/platform.c +++ b/arch/loongarch/power/platform.c @@ -17,7 +17,7 @@ void enable_gpe_wakeup(void) if (acpi_gbl_reduced_hardware) return; - acpi_enable_all_wakeup_gpes(); + acpi_hw_enable_all_wakeup_gpes(); } void enable_pci_wakeup(void) diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h index 79bbfe00d241..b8543a34caea 100644 --- a/drivers/acpi/acpica/achware.h +++ b/drivers/acpi/acpica/achware.h @@ -103,8 +103,6 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info, acpi_status acpi_hw_enable_all_runtime_gpes(void); -acpi_status acpi_hw_enable_all_wakeup_gpes(void); - u8 acpi_hw_check_all_gpes(acpi_handle gpe_skip_device, u32 gpe_skip_number); acpi_status diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index d076ebd19a61..78b24b090488 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -763,6 +763,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status *event_status)) ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_hw_disable_all_gpes(void)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_hw_enable_all_wakeup_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) From 04816c1507b46baccd17a4bc948440b3634d5d13 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 26 Jan 2025 21:49:59 +0800 Subject: [PATCH 10/12] LoongArch: Add debugfs entries to switch SFB/TSO state We need to switch SFB (Store Fill Buffer) and TSO (Total Store Order) state at runtime to debug memory management and KVM virtualization, so add two debugfs entries "sfb_state" and "tso_state" under the directory /sys/kernel/debug/loongarch. Query SFB: cat /sys/kernel/debug/loongarch/sfb_state Enable SFB: echo 1 > /sys/kernel/debug/loongarch/sfb_state Disable SFB: echo 0 > /sys/kernel/debug/loongarch/sfb_state Query TSO: cat /sys/kernel/debug/loongarch/tso_state Switch TSO: echo [TSO] > /sys/kernel/debug/loongarch/tso_state Available [TSO] states: 0 (No Load No Store) 1 (All Load No Store) 3 (Same Load No Store) 4 (No Load All Store) 5 (All Load All Store) 7 (Same Load All Store) Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarch.h | 15 +++ arch/loongarch/kernel/Makefile | 2 +- arch/loongarch/kernel/kdebugfs.c | 168 +++++++++++++++++++++++++ arch/loongarch/kernel/unaligned.c | 8 +- 4 files changed, 186 insertions(+), 7 deletions(-) create mode 100644 arch/loongarch/kernel/kdebugfs.c diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index a3cc4f8d4c4a..15c245636176 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -108,6 +108,12 @@ #define CPUCFG3_SPW_HG_HF BIT(11) #define CPUCFG3_RVA BIT(12) #define CPUCFG3_RVAMAX GENMASK(16, 13) +#define CPUCFG3_ALDORDER_CAP BIT(18) /* All address load ordered, capability */ +#define CPUCFG3_ASTORDER_CAP BIT(19) /* All address store ordered, capability */ +#define CPUCFG3_ALDORDER_STA BIT(20) /* All address load ordered, status */ +#define CPUCFG3_ASTORDER_STA BIT(21) /* All address store ordered, status */ +#define CPUCFG3_SLDORDER_CAP BIT(22) /* Same address load ordered, capability */ +#define CPUCFG3_SLDORDER_STA BIT(23) /* Same address load ordered, status */ #define LOONGARCH_CPUCFG4 0x4 #define CPUCFG4_CCFREQ GENMASK(31, 0) @@ -565,6 +571,15 @@ /* Implement dependent */ #define LOONGARCH_CSR_IMPCTL1 0x80 /* Loongson config1 */ +#define CSR_LDSTORDER_SHIFT 28 +#define CSR_LDSTORDER_WIDTH 3 +#define CSR_LDSTORDER_MASK (_ULCAST_(0x7) << CSR_LDSTORDER_SHIFT) +#define CSR_LDSTORDER_NLD_NST (_ULCAST_(0x0) << CSR_LDSTORDER_SHIFT) /* 000 = No Load No Store */ +#define CSR_LDSTORDER_ALD_NST (_ULCAST_(0x1) << CSR_LDSTORDER_SHIFT) /* 001 = All Load No Store */ +#define CSR_LDSTORDER_SLD_NST (_ULCAST_(0x3) << CSR_LDSTORDER_SHIFT) /* 011 = Same Load No Store */ +#define CSR_LDSTORDER_NLD_AST (_ULCAST_(0x4) << CSR_LDSTORDER_SHIFT) /* 100 = No Load All Store */ +#define CSR_LDSTORDER_ALD_AST (_ULCAST_(0x5) << CSR_LDSTORDER_SHIFT) /* 101 = All Load All Store */ +#define CSR_LDSTORDER_SLD_AST (_ULCAST_(0x7) << CSR_LDSTORDER_SHIFT) /* 111 = Same Load All Store */ #define CSR_MISPEC_SHIFT 20 #define CSR_MISPEC_WIDTH 8 #define CSR_MISPEC (_ULCAST_(0xff) << CSR_MISPEC_SHIFT) diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 9497968ee158..4853e8b04c6f 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -10,7 +10,7 @@ extra-y := vmlinux.lds obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o reset.o switch.o \ elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \ - alternative.o unwind.o + alternative.o kdebugfs.o unwind.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/kdebugfs.c b/arch/loongarch/kernel/kdebugfs.c new file mode 100644 index 000000000000..80cf64772399 --- /dev/null +++ b/arch/loongarch/kernel/kdebugfs.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +struct dentry *arch_debugfs_dir; +EXPORT_SYMBOL(arch_debugfs_dir); + +static int sfb_state, tso_state; + +static void set_sfb_state(void *info) +{ + int val = *(int *)info << CSR_STFILL_SHIFT; + + csr_xchg32(val, CSR_STFILL, LOONGARCH_CSR_IMPCTL1); +} + +static ssize_t sfb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + int s, state; + char str[32]; + + state = (csr_read32(LOONGARCH_CSR_IMPCTL1) & CSR_STFILL) >> CSR_STFILL_SHIFT; + + s = snprintf(str, sizeof(str), "Boot State: %x\nCurrent State: %x\n", sfb_state, state); + + if (*ppos >= s) + return 0; + + s -= *ppos; + s = min_t(u32, s, count); + + if (copy_to_user(buf, &str[*ppos], s)) + return -EFAULT; + + *ppos += s; + + return s; +} + +static ssize_t sfb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +{ + int state; + + if (kstrtoint_from_user(buf, count, 10, &state)) + return -EFAULT; + + switch (state) { + case 0: case 1: + on_each_cpu(set_sfb_state, &state, 1); + break; + default: + return -EINVAL; + } + + return count; +} + +static const struct file_operations sfb_fops = { + .read = sfb_read, + .write = sfb_write, + .open = simple_open, + .llseek = default_llseek +}; + +#define LDSTORDER_NLD_NST 0x0 /* 000 = No Load No Store */ +#define LDSTORDER_ALD_NST 0x1 /* 001 = All Load No Store */ +#define LDSTORDER_SLD_NST 0x3 /* 011 = Same Load No Store */ +#define LDSTORDER_NLD_AST 0x4 /* 100 = No Load All Store */ +#define LDSTORDER_ALD_AST 0x5 /* 101 = All Load All Store */ +#define LDSTORDER_SLD_AST 0x7 /* 111 = Same Load All Store */ + +static char *tso_hints[] = { + "No Load No Store", + "All Load No Store", + "Invalid Config", + "Same Load No Store", + "No Load All Store", + "All Load All Store", + "Invalid Config", + "Same Load All Store" +}; + +static void set_tso_state(void *info) +{ + int val = *(int *)info << CSR_LDSTORDER_SHIFT; + + csr_xchg32(val, CSR_LDSTORDER_MASK, LOONGARCH_CSR_IMPCTL1); +} + +static ssize_t tso_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + int s, state; + char str[240]; + + state = (csr_read32(LOONGARCH_CSR_IMPCTL1) & CSR_LDSTORDER_MASK) >> CSR_LDSTORDER_SHIFT; + + s = snprintf(str, sizeof(str), "Boot State: %d (%s)\n" + "Current State: %d (%s)\n\n" + "Available States:\n" + "0 (%s)\t" "1 (%s)\t" "3 (%s)\n" + "4 (%s)\t" "5 (%s)\t" "7 (%s)\n", + tso_state, tso_hints[tso_state], state, tso_hints[state], + tso_hints[0], tso_hints[1], tso_hints[3], tso_hints[4], tso_hints[5], tso_hints[7]); + + if (*ppos >= s) + return 0; + + s -= *ppos; + s = min_t(u32, s, count); + + if (copy_to_user(buf, &str[*ppos], s)) + return -EFAULT; + + *ppos += s; + + return s; +} + +static ssize_t tso_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +{ + int state; + + if (kstrtoint_from_user(buf, count, 10, &state)) + return -EFAULT; + + switch (state) { + case 0: case 1: case 3: + case 4: case 5: case 7: + on_each_cpu(set_tso_state, &state, 1); + break; + default: + return -EINVAL; + } + + return count; +} + +static const struct file_operations tso_fops = { + .read = tso_read, + .write = tso_write, + .open = simple_open, + .llseek = default_llseek +}; + +static int __init arch_kdebugfs_init(void) +{ + unsigned int config = read_cpucfg(LOONGARCH_CPUCFG3); + + arch_debugfs_dir = debugfs_create_dir("loongarch", NULL); + + if (config & CPUCFG3_SFB) { + debugfs_create_file("sfb_state", S_IRUGO | S_IWUSR, + arch_debugfs_dir, &sfb_state, &sfb_fops); + sfb_state = (csr_read32(LOONGARCH_CSR_IMPCTL1) & CSR_STFILL) >> CSR_STFILL_SHIFT; + } + + if (config & (CPUCFG3_ALDORDER_CAP | CPUCFG3_ASTORDER_CAP)) { + debugfs_create_file("tso_state", S_IRUGO | S_IWUSR, + arch_debugfs_dir, &tso_state, &tso_fops); + tso_state = (csr_read32(LOONGARCH_CSR_IMPCTL1) & CSR_LDSTORDER_MASK) >> CSR_LDSTORDER_SHIFT; + } + + return 0; +} +postcore_initcall(arch_kdebugfs_init); diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c index 3abf163dda05..487be604b96a 100644 --- a/arch/loongarch/kernel/unaligned.c +++ b/arch/loongarch/kernel/unaligned.c @@ -482,14 +482,10 @@ void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned i #ifdef CONFIG_DEBUG_FS static int __init debugfs_unaligned(void) { - struct dentry *d; - - d = debugfs_create_dir("loongarch", NULL); - debugfs_create_u32("unaligned_instructions_user", - S_IRUGO, d, &unaligned_instructions_user); + S_IRUGO, arch_debugfs_dir, &unaligned_instructions_user); debugfs_create_u32("unaligned_instructions_kernel", - S_IRUGO, d, &unaligned_instructions_kernel); + S_IRUGO, arch_debugfs_dir, &unaligned_instructions_kernel); return 0; } From f502ea618bf16d615d7dc6138c8988d3118fe750 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sun, 26 Jan 2025 21:49:59 +0800 Subject: [PATCH 11/12] LoongArch: Change 8 to 14 for LOONGARCH_MAX_{BRP,WRP} The maximum number of load/store watchpoints and fetch instruction watchpoints is 14 each according to LoongArch Reference Manual, so change 8 to 14 for the related code. Link: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#control-and-status-registers-related-to-watchpoints Cc: stable@vger.kernel.org Fixes: edffa33c7bb5 ("LoongArch: Add hardware breakpoints/watchpoints support") Reviewed-by: WANG Xuerui Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/hw_breakpoint.h | 4 +- arch/loongarch/include/asm/loongarch.h | 60 ++++++++++++++++++++++ arch/loongarch/kernel/hw_breakpoint.c | 16 +++++- 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index d78330916bd1..13b2462f3d8c 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -38,8 +38,8 @@ struct arch_hw_breakpoint { * Limits. * Changing these will require modifications to the register accessors. */ -#define LOONGARCH_MAX_BRP 8 -#define LOONGARCH_MAX_WRP 8 +#define LOONGARCH_MAX_BRP 14 +#define LOONGARCH_MAX_WRP 14 /* Virtual debug register bases. */ #define CSR_CFG_ADDR 0 diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 15c245636176..52651aa0e583 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -973,6 +973,36 @@ #define LOONGARCH_CSR_DB7CTRL 0x34a /* data breakpoint 7 control */ #define LOONGARCH_CSR_DB7ASID 0x34b /* data breakpoint 7 asid */ +#define LOONGARCH_CSR_DB8ADDR 0x350 /* data breakpoint 8 address */ +#define LOONGARCH_CSR_DB8MASK 0x351 /* data breakpoint 8 mask */ +#define LOONGARCH_CSR_DB8CTRL 0x352 /* data breakpoint 8 control */ +#define LOONGARCH_CSR_DB8ASID 0x353 /* data breakpoint 8 asid */ + +#define LOONGARCH_CSR_DB9ADDR 0x358 /* data breakpoint 9 address */ +#define LOONGARCH_CSR_DB9MASK 0x359 /* data breakpoint 9 mask */ +#define LOONGARCH_CSR_DB9CTRL 0x35a /* data breakpoint 9 control */ +#define LOONGARCH_CSR_DB9ASID 0x35b /* data breakpoint 9 asid */ + +#define LOONGARCH_CSR_DB10ADDR 0x360 /* data breakpoint 10 address */ +#define LOONGARCH_CSR_DB10MASK 0x361 /* data breakpoint 10 mask */ +#define LOONGARCH_CSR_DB10CTRL 0x362 /* data breakpoint 10 control */ +#define LOONGARCH_CSR_DB10ASID 0x363 /* data breakpoint 10 asid */ + +#define LOONGARCH_CSR_DB11ADDR 0x368 /* data breakpoint 11 address */ +#define LOONGARCH_CSR_DB11MASK 0x369 /* data breakpoint 11 mask */ +#define LOONGARCH_CSR_DB11CTRL 0x36a /* data breakpoint 11 control */ +#define LOONGARCH_CSR_DB11ASID 0x36b /* data breakpoint 11 asid */ + +#define LOONGARCH_CSR_DB12ADDR 0x370 /* data breakpoint 12 address */ +#define LOONGARCH_CSR_DB12MASK 0x371 /* data breakpoint 12 mask */ +#define LOONGARCH_CSR_DB12CTRL 0x372 /* data breakpoint 12 control */ +#define LOONGARCH_CSR_DB12ASID 0x373 /* data breakpoint 12 asid */ + +#define LOONGARCH_CSR_DB13ADDR 0x378 /* data breakpoint 13 address */ +#define LOONGARCH_CSR_DB13MASK 0x379 /* data breakpoint 13 mask */ +#define LOONGARCH_CSR_DB13CTRL 0x37a /* data breakpoint 13 control */ +#define LOONGARCH_CSR_DB13ASID 0x37b /* data breakpoint 13 asid */ + #define LOONGARCH_CSR_FWPC 0x380 /* instruction breakpoint config */ #define LOONGARCH_CSR_FWPS 0x381 /* instruction breakpoint status */ @@ -1016,6 +1046,36 @@ #define LOONGARCH_CSR_IB7CTRL 0x3ca /* inst breakpoint 7 control */ #define LOONGARCH_CSR_IB7ASID 0x3cb /* inst breakpoint 7 asid */ +#define LOONGARCH_CSR_IB8ADDR 0x3d0 /* inst breakpoint 8 address */ +#define LOONGARCH_CSR_IB8MASK 0x3d1 /* inst breakpoint 8 mask */ +#define LOONGARCH_CSR_IB8CTRL 0x3d2 /* inst breakpoint 8 control */ +#define LOONGARCH_CSR_IB8ASID 0x3d3 /* inst breakpoint 8 asid */ + +#define LOONGARCH_CSR_IB9ADDR 0x3d8 /* inst breakpoint 9 address */ +#define LOONGARCH_CSR_IB9MASK 0x3d9 /* inst breakpoint 9 mask */ +#define LOONGARCH_CSR_IB9CTRL 0x3da /* inst breakpoint 9 control */ +#define LOONGARCH_CSR_IB9ASID 0x3db /* inst breakpoint 9 asid */ + +#define LOONGARCH_CSR_IB10ADDR 0x3e0 /* inst breakpoint 10 address */ +#define LOONGARCH_CSR_IB10MASK 0x3e1 /* inst breakpoint 10 mask */ +#define LOONGARCH_CSR_IB10CTRL 0x3e2 /* inst breakpoint 10 control */ +#define LOONGARCH_CSR_IB10ASID 0x3e3 /* inst breakpoint 10 asid */ + +#define LOONGARCH_CSR_IB11ADDR 0x3e8 /* inst breakpoint 11 address */ +#define LOONGARCH_CSR_IB11MASK 0x3e9 /* inst breakpoint 11 mask */ +#define LOONGARCH_CSR_IB11CTRL 0x3ea /* inst breakpoint 11 control */ +#define LOONGARCH_CSR_IB11ASID 0x3eb /* inst breakpoint 11 asid */ + +#define LOONGARCH_CSR_IB12ADDR 0x3f0 /* inst breakpoint 12 address */ +#define LOONGARCH_CSR_IB12MASK 0x3f1 /* inst breakpoint 12 mask */ +#define LOONGARCH_CSR_IB12CTRL 0x3f2 /* inst breakpoint 12 control */ +#define LOONGARCH_CSR_IB12ASID 0x3f3 /* inst breakpoint 12 asid */ + +#define LOONGARCH_CSR_IB13ADDR 0x3f8 /* inst breakpoint 13 address */ +#define LOONGARCH_CSR_IB13MASK 0x3f9 /* inst breakpoint 13 mask */ +#define LOONGARCH_CSR_IB13CTRL 0x3fa /* inst breakpoint 13 control */ +#define LOONGARCH_CSR_IB13ASID 0x3fb /* inst breakpoint 13 asid */ + #define LOONGARCH_CSR_DEBUG 0x500 /* debug config */ #define LOONGARCH_CSR_DERA 0x501 /* debug era */ #define LOONGARCH_CSR_DESAVE 0x502 /* debug save */ diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index a6e4b605bfa8..c35f9bf38033 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -51,7 +51,13 @@ int hw_breakpoint_slots(int type) READ_WB_REG_CASE(OFF, 4, REG, T, VAL); \ READ_WB_REG_CASE(OFF, 5, REG, T, VAL); \ READ_WB_REG_CASE(OFF, 6, REG, T, VAL); \ - READ_WB_REG_CASE(OFF, 7, REG, T, VAL); + READ_WB_REG_CASE(OFF, 7, REG, T, VAL); \ + READ_WB_REG_CASE(OFF, 8, REG, T, VAL); \ + READ_WB_REG_CASE(OFF, 9, REG, T, VAL); \ + READ_WB_REG_CASE(OFF, 10, REG, T, VAL); \ + READ_WB_REG_CASE(OFF, 11, REG, T, VAL); \ + READ_WB_REG_CASE(OFF, 12, REG, T, VAL); \ + READ_WB_REG_CASE(OFF, 13, REG, T, VAL); #define GEN_WRITE_WB_REG_CASES(OFF, REG, T, VAL) \ WRITE_WB_REG_CASE(OFF, 0, REG, T, VAL); \ @@ -61,7 +67,13 @@ int hw_breakpoint_slots(int type) WRITE_WB_REG_CASE(OFF, 4, REG, T, VAL); \ WRITE_WB_REG_CASE(OFF, 5, REG, T, VAL); \ WRITE_WB_REG_CASE(OFF, 6, REG, T, VAL); \ - WRITE_WB_REG_CASE(OFF, 7, REG, T, VAL); + WRITE_WB_REG_CASE(OFF, 7, REG, T, VAL); \ + WRITE_WB_REG_CASE(OFF, 8, REG, T, VAL); \ + WRITE_WB_REG_CASE(OFF, 9, REG, T, VAL); \ + WRITE_WB_REG_CASE(OFF, 10, REG, T, VAL); \ + WRITE_WB_REG_CASE(OFF, 11, REG, T, VAL); \ + WRITE_WB_REG_CASE(OFF, 12, REG, T, VAL); \ + WRITE_WB_REG_CASE(OFF, 13, REG, T, VAL); static u64 read_wb_reg(int reg, int n, int t) { From 531936dee53e471a3ec668de3c94ca357f54b7e8 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sun, 26 Jan 2025 21:49:59 +0800 Subject: [PATCH 12/12] LoongArch: Extend the maximum number of watchpoints The maximum number of load/store watchpoints and fetch instruction watchpoints is 14 each according to LoongArch Reference Manual, so extend the maximum number of watchpoints from 8 to 14 for ptrace. By the way, just simply change 8 to 14 for the definition in struct user_watch_state at the beginning, but it may corrupt uapi, then add a new struct user_watch_state_v2 directly. As far as I can tell, the only users for this struct in the userspace are GDB and LLDB, there are no any problems of software compatibility between the application and kernel according to the analysis. The compatibility problem has been considered while developing and testing. When the applications in the userspace get watchpoint state, the length will be specified which is no bigger than the sizeof struct user_watch_state or user_watch_state_v2, the actual length is assigned as the minimal value of the application and kernel in the generic code of ptrace: kernel/ptrace.c: ptrace_regset(): kiov->iov_len = min(kiov->iov_len, (__kernel_size_t) (regset->n * regset->size)); if (req == PTRACE_GETREGSET) return copy_regset_to_user(task, view, regset_no, 0, kiov->iov_len, kiov->iov_base); else return copy_regset_from_user(task, view, regset_no, 0, kiov->iov_len, kiov->iov_base); For example, there are four kind of combinations, all of them work well. (1) "older kernel + older gdb", the actual length is 8+(8+8+4+4)*8=200; (2) "newer kernel + newer gdb", the actual length is 8+(8+8+4+4)*14=344; (3) "older kernel + newer gdb", the actual length is 8+(8+8+4+4)*8=200; (4) "newer kernel + older gdb", the actual length is 8+(8+8+4+4)*8=200. Link: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#control-and-status-registers-related-to-watchpoints Cc: stable@vger.kernel.org Fixes: 1a69f7a161a7 ("LoongArch: ptrace: Expose hardware breakpoints to debuggers") Reviewed-by: WANG Xuerui Reviewed-by: Xi Ruoyao Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/uapi/asm/ptrace.h | 10 ++++++++++ arch/loongarch/kernel/ptrace.c | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index ac915f841650..aafb3cd9e943 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -72,6 +72,16 @@ struct user_watch_state { } dbg_regs[8]; }; +struct user_watch_state_v2 { + uint64_t dbg_info; + struct { + uint64_t addr; + uint64_t mask; + uint32_t ctrl; + uint32_t pad; + } dbg_regs[14]; +}; + #define PTRACE_SYSEMU 0x1f #define PTRACE_SYSEMU_SINGLESTEP 0x20 diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 19dc6eff45cc..5e2402cfcab0 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -720,7 +720,7 @@ static int hw_break_set(struct task_struct *target, unsigned int note_type = regset->core_note_type; /* Resource info */ - offset = offsetof(struct user_watch_state, dbg_regs); + offset = offsetof(struct user_watch_state_v2, dbg_regs); user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset); /* (address, mask, ctrl) registers */ @@ -920,7 +920,7 @@ static const struct user_regset loongarch64_regsets[] = { #ifdef CONFIG_HAVE_HW_BREAKPOINT [REGSET_HW_BREAK] = { .core_note_type = NT_LOONGARCH_HW_BREAK, - .n = sizeof(struct user_watch_state) / sizeof(u32), + .n = sizeof(struct user_watch_state_v2) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .regset_get = hw_break_get, @@ -928,7 +928,7 @@ static const struct user_regset loongarch64_regsets[] = { }, [REGSET_HW_WATCH] = { .core_note_type = NT_LOONGARCH_HW_WATCH, - .n = sizeof(struct user_watch_state) / sizeof(u32), + .n = sizeof(struct user_watch_state_v2) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .regset_get = hw_break_get,