Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf after 6.18-rc5+

Cross-merge BPF and other fixes after downstream PR.

Minor conflict in kernel/bpf/helpers.c

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2025-11-14 17:43:41 -08:00
commit e47b68bda4
511 changed files with 6083 additions and 2343 deletions

View File

@ -605,7 +605,8 @@ Oleksij Rempel <o.rempel@pengutronix.de>
Oleksij Rempel <o.rempel@pengutronix.de> <ore@pengutronix.de>
Oliver Hartkopp <socketcan@hartkopp.net> <oliver.hartkopp@volkswagen.de>
Oliver Hartkopp <socketcan@hartkopp.net> <oliver@hartkopp.net>
Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
Oliver Upton <oupton@kernel.org> <oupton@google.com>
Oliver Upton <oupton@kernel.org> <oliver.upton@linux.dev>
Ondřej Jirman <megi@xff.cz> <megous@megous.com>
Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org>
Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com>

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/ti,twl4030-gpio.yaml#
$id: http://devicetree.org/schemas/gpio/ti,twl4030-gpio.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: TI TWL4030 GPIO controller

View File

@ -37,8 +37,8 @@ which corresponds to the following ASL (in the scope of \_SB)::
Name (_HID, ...)
Name (_CRS, ResourceTemplate () {
I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
AddressingMode7Bit, "\\_SB.SMB1.CH00", 0x00,
ResourceConsumer,,)
AddressingMode7Bit, "\\_SB.SMB1.MUX0.CH00",
0x00, ResourceConsumer,,)
}
}
}
@ -52,8 +52,8 @@ which corresponds to the following ASL (in the scope of \_SB)::
Name (_HID, ...)
Name (_CRS, ResourceTemplate () {
I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
AddressingMode7Bit, "\\_SB.SMB1.CH01", 0x00,
ResourceConsumer,,)
AddressingMode7Bit, "\\_SB.SMB1.MUX0.CH01",
0x00, ResourceConsumer,,)
}
}
}

View File

@ -105,10 +105,10 @@ In this example the SSID is 10280c63.
The format of the firmware file names is:
SoundWire (except CS35L56 Rev B0):
SoundWire:
cs35lxx-b0-dsp1-misc-SSID[-spkidX]-l?u?
SoundWire CS35L56 Rev B0:
SoundWire CS35L56 Rev B0 firmware released before kernel version 6.16:
cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN
Non-SoundWire (HDA and I2S):
@ -127,9 +127,8 @@ Where:
* spkidX is an optional part, used for laptops that have firmware
configurations for different makes and models of internal speakers.
The CS35L56 Rev B0 continues to use the old filename scheme because a
large number of firmware files have already been published with these
names.
Early firmware for CS35L56 Rev B0 used the ALSA prefix (ampN) as the
filename qualifier. Support for the l?u? qualifier was added in kernel 6.16.
Sound Open Firmware and ALSA topology files
-------------------------------------------

View File

@ -13,10 +13,10 @@ Simple CLI
Kernel comes with a simple CLI tool which should be useful when
developing Netlink related code. The tool is implemented in Python
and can use a YAML specification to issue Netlink requests
to the kernel. Only Generic Netlink is supported.
to the kernel.
The tool is located at ``tools/net/ynl/pyynl/cli.py``. It accepts
a handul of arguments, the most important ones are:
a handful of arguments, the most important ones are:
- ``--spec`` - point to the spec file
- ``--do $name`` / ``--dump $name`` - issue request ``$name``

View File

@ -915,6 +915,7 @@ F: drivers/staging/media/sunxi/cedrus/
ALPHA PORT
M: Richard Henderson <richard.henderson@linaro.org>
M: Matt Turner <mattst88@gmail.com>
M: Magnus Lindholm <linmag7@gmail.com>
L: linux-alpha@vger.kernel.org
S: Odd Fixes
F: arch/alpha/
@ -4398,7 +4399,7 @@ BLOCK LAYER
M: Jens Axboe <axboe@kernel.dk>
L: linux-block@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git
F: Documentation/ABI/stable/sysfs-block
F: Documentation/block/
F: block/
@ -4819,6 +4820,7 @@ F: drivers/net/dsa/b53/*
F: drivers/net/dsa/bcm_sf2*
F: include/linux/dsa/brcm.h
F: include/linux/platform_data/b53.h
F: net/dsa/tag_brcm.c
BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
M: Florian Fainelli <florian.fainelli@broadcom.com>
@ -9208,6 +9210,7 @@ R: Yue Hu <zbestahu@gmail.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
R: Sandeep Dhavale <dhavale@google.com>
R: Hongbo Li <lihongbo22@huawei.com>
R: Chunhai Guo <guochunhai@vivo.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
W: https://erofs.docs.kernel.org
@ -12522,6 +12525,7 @@ F: include/linux/avf/virtchnl.h
F: include/linux/net/intel/*/
INTEL ETHERNET PROTOCOL DRIVER FOR RDMA
M: Krzysztof Czurylo <krzysztof.czurylo@intel.com>
M: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
@ -12862,7 +12866,8 @@ F: tools/testing/selftests/sgx/*
K: \bSGX_
INTEL SKYLAKE INT3472 ACPI DEVICE DRIVER
M: Daniel Scally <djrscally@gmail.com>
M: Daniel Scally <dan.scally@ideasonboard.com>
M: Sakari Ailus <sakari.ailus@linux.intel.com>
S: Maintained
F: drivers/platform/x86/intel/int3472/
F: include/linux/platform_data/x86/int3472.h
@ -13657,7 +13662,7 @@ F: virt/kvm/*
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org>
M: Oliver Upton <oliver.upton@linux.dev>
M: Oliver Upton <oupton@kernel.org>
R: Joey Gouly <joey.gouly@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Zenghui Yu <yuzenghui@huawei.com>
@ -16496,12 +16501,12 @@ F: mm/secretmem.c
MEMORY MANAGEMENT - SWAP
M: Andrew Morton <akpm@linux-foundation.org>
M: Chris Li <chrisl@kernel.org>
M: Kairui Song <kasong@tencent.com>
R: Kemeng Shi <shikemeng@huaweicloud.com>
R: Kairui Song <kasong@tencent.com>
R: Nhat Pham <nphamcs@gmail.com>
R: Baoquan He <bhe@redhat.com>
R: Barry Song <baohua@kernel.org>
R: Chris Li <chrisl@kernel.org>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/mm/swap-table.rst
@ -20161,6 +20166,7 @@ R: Alexander Shishkin <alexander.shishkin@linux.intel.com>
R: Jiri Olsa <jolsa@kernel.org>
R: Ian Rogers <irogers@google.com>
R: Adrian Hunter <adrian.hunter@intel.com>
R: James Clark <james.clark@linaro.org>
L: linux-perf-users@vger.kernel.org
L: linux-kernel@vger.kernel.org
S: Supported

View File

@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 18
SUBLEVEL = 0
EXTRAVERSION = -rc4
EXTRAVERSION = -rc5
NAME = Baby Opossum Posse
# *DOCUMENTATION*

View File

@ -917,6 +917,13 @@ config ARCH_USES_CFI_TRAPS
An architecture should select this option if it requires the
.kcfi_traps section for KCFI trap handling.
config ARCH_USES_CFI_GENERIC_LLVM_PASS
bool
help
An architecture should select this option if it uses the generic
KCFIPass in LLVM to expand kCFI bundles instead of architecture-specific
lowering.
config CFI
bool "Use Kernel Control Flow Integrity (kCFI)"
default CFI_CLANG

View File

@ -44,6 +44,8 @@ config ARM
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
# https://github.com/llvm/llvm-project/commit/d130f402642fba3d065aacb506cb061c899558de
select ARCH_USES_CFI_GENERIC_LLVM_PASS if CLANG_VERSION < 220000
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_GENERAL_HUGETLB
select ARCH_WANT_IPC_PARSE_VERSION

View File

@ -26,9 +26,12 @@ void __init apply_alternatives_all(void);
bool alternative_is_applied(u16 cpucap);
#ifdef CONFIG_MODULES
void apply_alternatives_module(void *start, size_t length);
int apply_alternatives_module(void *start, size_t length);
#else
static inline void apply_alternatives_module(void *start, size_t length) { }
static inline int apply_alternatives_module(void *start, size_t length)
{
return 0;
}
#endif
void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,

View File

@ -10,8 +10,6 @@
#include <asm/set_memory.h>
static inline bool arch_kfence_init_pool(void) { return true; }
static inline bool kfence_protect_page(unsigned long addr, bool protect)
{
set_memory_valid(addr, 1, !protect);
@ -25,6 +23,7 @@ static inline bool arm64_kfence_can_set_direct_map(void)
{
return !kfence_early_init;
}
bool arch_kfence_init_pool(void);
#else /* CONFIG_KFENCE */
static inline bool arm64_kfence_can_set_direct_map(void) { return false; }
#endif /* CONFIG_KFENCE */

View File

@ -77,7 +77,7 @@ __percpu_##name##_case_##sz(void *ptr, unsigned long val) \
" stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \
" cbnz %w[loop], 1b", \
/* LSE atomics */ \
#op_lse "\t%" #w "[val], %[ptr]\n" \
#op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \
__nops(3)) \
: [loop] "=&r" (loop), [tmp] "=&r" (tmp), \
[ptr] "+Q"(*(u##sz *)ptr) \
@ -124,9 +124,16 @@ PERCPU_RW_OPS(8)
PERCPU_RW_OPS(16)
PERCPU_RW_OPS(32)
PERCPU_RW_OPS(64)
PERCPU_OP(add, add, stadd)
PERCPU_OP(andnot, bic, stclr)
PERCPU_OP(or, orr, stset)
/*
* Use value-returning atomics for CPU-local ops as they are more likely
* to execute "near" to the CPU (e.g. in L1$).
*
* https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop
*/
PERCPU_OP(add, add, ldadd)
PERCPU_OP(andnot, bic, ldclr)
PERCPU_OP(or, orr, ldset)
PERCPU_RET_OP(add, add, ldadd)
#undef PERCPU_RW_OPS

View File

@ -53,7 +53,7 @@ enum {
EDYNSCS_INVALID_CFA_OPCODE = 4,
};
int __pi_scs_patch(const u8 eh_frame[], int size);
int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
#endif /* __ASSEMBLY __ */

View File

@ -117,6 +117,7 @@ void spectre_bhb_patch_wa3(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void spectre_bhb_patch_clearbhb(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void spectre_print_disabled_mitigations(void);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SPECTRE_H */

View File

@ -197,8 +197,6 @@ static int __init acpi_fadt_sanity_check(void)
*/
void __init acpi_boot_table_init(void)
{
int ret;
/*
* Enable ACPI instead of device tree unless
* - ACPI has been disabled explicitly (acpi=off), or
@ -252,12 +250,8 @@ void __init acpi_boot_table_init(void)
* behaviour, use acpi=nospcr to disable console in ACPI SPCR
* table as default serial console.
*/
ret = acpi_parse_spcr(earlycon_acpi_spcr_enable,
acpi_parse_spcr(earlycon_acpi_spcr_enable,
!param_acpi_nospcr);
if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE))
pr_info("Use ACPI SPCR as default console: No\n");
else
pr_info("Use ACPI SPCR as default console: Yes\n");
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);

View File

@ -139,7 +139,7 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
static void __apply_alternatives(const struct alt_region *region,
static int __apply_alternatives(const struct alt_region *region,
bool is_module,
unsigned long *cpucap_mask)
{
@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region,
updptr = is_module ? origptr : lm_alias(origptr);
nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
if (ALT_HAS_CB(alt))
if (ALT_HAS_CB(alt)) {
alt_cb = ALT_REPL_PTR(alt);
else
if (is_module && !core_kernel_text((unsigned long)alt_cb))
return -ENOEXEC;
} else {
alt_cb = patch_alternative;
}
alt_cb(alt, origptr, updptr, nr_inst);
@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region,
bitmap_and(applied_alternatives, applied_alternatives,
system_cpucaps, ARM64_NCAPS);
}
return 0;
}
static void __init apply_alternatives_vdso(void)
@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void)
}
#ifdef CONFIG_MODULES
void apply_alternatives_module(void *start, size_t length)
int apply_alternatives_module(void *start, size_t length)
{
struct alt_region region = {
.begin = start,
@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length)
bitmap_fill(all_capabilities, ARM64_NCAPS);
__apply_alternatives(&region, true, &all_capabilities[0]);
return __apply_alternatives(&region, true, &all_capabilities[0]);
}
#endif

View File

@ -95,6 +95,7 @@
#include <asm/vectors.h>
#include <asm/virt.h>
#include <asm/spectre.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
@ -3875,6 +3876,11 @@ static void __init setup_system_capabilities(void)
*/
if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
/*
* Report Spectre mitigations status.
*/
spectre_print_disabled_mitigations();
}
void __init setup_system_features(void)

View File

@ -489,16 +489,29 @@ int module_finalize(const Elf_Ehdr *hdr,
int ret;
s = find_section(hdr, sechdrs, ".altinstructions");
if (s)
apply_alternatives_module((void *)s->sh_addr, s->sh_size);
if (s) {
ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size);
if (ret < 0) {
pr_err("module %s: error occurred when applying alternatives\n", me->name);
return ret;
}
}
if (scs_is_dynamic()) {
s = find_section(hdr, sechdrs, ".init.eh_frame");
if (s) {
ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size);
if (ret)
/*
* Because we can reject modules that are malformed
* so SCS patching fails, skip dry run and try to patch
* it in place. If patching fails, the module would not
* be loaded anyway.
*/
ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true);
if (ret) {
pr_err("module %s: error occurred during dynamic SCS patching (%d)\n",
me->name, ret);
return -ENOEXEC;
}
}
}

View File

@ -476,7 +476,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
folio = page_folio(page);
if (folio_test_hugetlb(folio))
WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio));
WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) &&
!is_huge_zero_folio(folio));
else
WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));

View File

@ -104,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
if (enable_scs) {
scs_patch(__eh_frame_start + va_offset,
__eh_frame_end - __eh_frame_start);
__eh_frame_end - __eh_frame_start, false);
asm("ic ialluis");
dynamic_scs_is_enabled = true;

View File

@ -225,7 +225,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame,
return 0;
}
int scs_patch(const u8 eh_frame[], int size)
int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run)
{
int code_alignment_factor = 1;
bool fde_use_sdata8 = false;
@ -277,9 +277,11 @@ int scs_patch(const u8 eh_frame[], int size)
}
} else {
ret = scs_handle_fde_frame(frame, code_alignment_factor,
fde_use_sdata8, true);
fde_use_sdata8, !skip_dry_run);
if (ret)
return ret;
if (!skip_dry_run)
scs_handle_fde_frame(frame, code_alignment_factor,
fde_use_sdata8, false);
}

View File

@ -27,7 +27,7 @@ extern pgd_t init_pg_dir[], init_pg_end[];
void init_feature_override(u64 boot_status, const void *fdt, int chosen);
u64 kaslr_early_init(void *fdt, int chosen);
void relocate_kernel(u64 offset);
int scs_patch(const u8 eh_frame[], int size);
int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,

View File

@ -49,7 +49,10 @@ void *alloc_insn_page(void)
addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!addr)
return NULL;
set_memory_rox((unsigned long)addr, 1);
if (set_memory_rox((unsigned long)addr, 1)) {
execmem_free(addr);
return NULL;
}
return addr;
}

View File

@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param);
static bool spectre_v2_mitigations_off(void)
{
bool ret = __nospectre_v2 || cpu_mitigations_off();
if (ret)
pr_info_once("spectre-v2 mitigation disabled by command line option\n");
return ret;
return __nospectre_v2 || cpu_mitigations_off();
}
static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param);
*/
static bool spectre_v4_mitigations_off(void)
{
bool ret = cpu_mitigations_off() ||
return cpu_mitigations_off() ||
__spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
if (ret)
pr_info_once("spectre-v4 mitigation disabled by command-line option\n");
return ret;
}
/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */
@ -1042,10 +1032,6 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
/* No point mitigating Spectre-BHB alone. */
} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
} else if (cpu_mitigations_off() || __nospectre_bhb) {
pr_info_once("spectre-bhb mitigation disabled by command line option\n");
} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
state = SPECTRE_MITIGATED;
set_bit(BHB_HW, &system_bhb_mitigations);
@ -1199,3 +1185,18 @@ void unpriv_ebpf_notify(int new_state)
pr_err("WARNING: %s", EBPF_WARN);
}
#endif
void spectre_print_disabled_mitigations(void)
{
/* Keep a single copy of the common message suffix to avoid duplication. */
const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n";
if (spectre_v2_mitigations_off())
pr_info("spectre-v2 %s", spectre_disabled_suffix);
if (spectre_v4_mitigations_off())
pr_info("spectre-v4 %s", spectre_disabled_suffix);
if (__nospectre_bhb || cpu_mitigations_off())
pr_info("spectre-bhb %s", spectre_disabled_suffix);
}

View File

@ -479,7 +479,7 @@ static void __do_ffa_mem_xfer(const u64 func_id,
struct ffa_mem_region_attributes *ep_mem_access;
struct ffa_composite_mem_region *reg;
struct ffa_mem_region *buf;
u32 offset, nr_ranges;
u32 offset, nr_ranges, checked_offset;
int ret = 0;
if (addr_mbz || npages_mbz || fraglen > len ||
@ -516,7 +516,12 @@ static void __do_ffa_mem_xfer(const u64 func_id,
goto out_unlock;
}
if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) {
if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}
if (fraglen < checked_offset) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}

View File

@ -367,6 +367,19 @@ static int host_stage2_unmap_dev_all(void)
return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
}
/*
* Ensure the PFN range is contained within PA-range.
*
* This check is also robust to overflows and is therefore a requirement before
* using a pfn/nr_pages pair from an untrusted source.
*/
static bool pfn_range_is_valid(u64 pfn, u64 nr_pages)
{
u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT);
return pfn < limit && ((limit - pfn) >= nr_pages);
}
struct kvm_mem_range {
u64 start;
u64 end;
@ -776,6 +789,9 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
void *virt = __hyp_va(phys);
int ret;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
host_lock_component();
hyp_lock_component();
@ -804,6 +820,9 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
u64 virt = (u64)__hyp_va(phys);
int ret;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
host_lock_component();
hyp_lock_component();
@ -887,6 +906,9 @@ int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
u64 size = PAGE_SIZE * nr_pages;
int ret;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
host_lock_component();
ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
if (!ret)
@ -902,6 +924,9 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
u64 size = PAGE_SIZE * nr_pages;
int ret;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
host_lock_component();
ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
if (!ret)
@ -945,6 +970,9 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu
if (prot & ~KVM_PGTABLE_PROT_RWX)
return -EINVAL;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
if (ret)
return ret;

View File

@ -2595,19 +2595,23 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
.val = 0, \
}
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define AA32_ID_SANITISED(name) { \
ID_DESC(name), \
.visibility = aa32_id_visibility, \
.val = 0, \
}
/* sys_reg_desc initialiser for writable ID registers */
#define ID_WRITABLE(name, mask) { \
ID_DESC(name), \
.val = mask, \
}
/*
* 32bit ID regs are fully writable when the guest is 32bit
* capable. Nothing in the KVM code should rely on 32bit features
* anyway, only 64bit, so let the VMM do its worse.
*/
#define AA32_ID_WRITABLE(name) { \
ID_DESC(name), \
.visibility = aa32_id_visibility, \
.val = GENMASK(31, 0), \
}
/* sys_reg_desc initialiser for cpufeature ID registers that need filtering */
#define ID_FILTERED(sysreg, name, mask) { \
ID_DESC(sysreg), \
@ -3128,40 +3132,39 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* AArch64 mappings of the AArch32 ID registers */
/* CRm=1 */
AA32_ID_SANITISED(ID_PFR0_EL1),
AA32_ID_SANITISED(ID_PFR1_EL1),
AA32_ID_WRITABLE(ID_PFR0_EL1),
AA32_ID_WRITABLE(ID_PFR1_EL1),
{ SYS_DESC(SYS_ID_DFR0_EL1),
.access = access_id_reg,
.get_user = get_id_reg,
.set_user = set_id_dfr0_el1,
.visibility = aa32_id_visibility,
.reset = read_sanitised_id_dfr0_el1,
.val = ID_DFR0_EL1_PerfMon_MASK |
ID_DFR0_EL1_CopDbg_MASK, },
.val = GENMASK(31, 0) },
ID_HIDDEN(ID_AFR0_EL1),
AA32_ID_SANITISED(ID_MMFR0_EL1),
AA32_ID_SANITISED(ID_MMFR1_EL1),
AA32_ID_SANITISED(ID_MMFR2_EL1),
AA32_ID_SANITISED(ID_MMFR3_EL1),
AA32_ID_WRITABLE(ID_MMFR0_EL1),
AA32_ID_WRITABLE(ID_MMFR1_EL1),
AA32_ID_WRITABLE(ID_MMFR2_EL1),
AA32_ID_WRITABLE(ID_MMFR3_EL1),
/* CRm=2 */
AA32_ID_SANITISED(ID_ISAR0_EL1),
AA32_ID_SANITISED(ID_ISAR1_EL1),
AA32_ID_SANITISED(ID_ISAR2_EL1),
AA32_ID_SANITISED(ID_ISAR3_EL1),
AA32_ID_SANITISED(ID_ISAR4_EL1),
AA32_ID_SANITISED(ID_ISAR5_EL1),
AA32_ID_SANITISED(ID_MMFR4_EL1),
AA32_ID_SANITISED(ID_ISAR6_EL1),
AA32_ID_WRITABLE(ID_ISAR0_EL1),
AA32_ID_WRITABLE(ID_ISAR1_EL1),
AA32_ID_WRITABLE(ID_ISAR2_EL1),
AA32_ID_WRITABLE(ID_ISAR3_EL1),
AA32_ID_WRITABLE(ID_ISAR4_EL1),
AA32_ID_WRITABLE(ID_ISAR5_EL1),
AA32_ID_WRITABLE(ID_MMFR4_EL1),
AA32_ID_WRITABLE(ID_ISAR6_EL1),
/* CRm=3 */
AA32_ID_SANITISED(MVFR0_EL1),
AA32_ID_SANITISED(MVFR1_EL1),
AA32_ID_SANITISED(MVFR2_EL1),
AA32_ID_WRITABLE(MVFR0_EL1),
AA32_ID_WRITABLE(MVFR1_EL1),
AA32_ID_WRITABLE(MVFR2_EL1),
ID_UNALLOCATED(3,3),
AA32_ID_SANITISED(ID_PFR2_EL1),
AA32_ID_WRITABLE(ID_PFR2_EL1),
ID_HIDDEN(ID_DFR1_EL1),
AA32_ID_SANITISED(ID_MMFR5_EL1),
AA32_ID_WRITABLE(ID_MMFR5_EL1),
ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
@ -5606,11 +5609,13 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
guard(mutex)(&kvm->arch.config_lock);
if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) &&
irqchip_in_kernel(kvm) &&
kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) {
kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK;
kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK;
if (!irqchip_in_kernel(kvm)) {
u64 val;
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val);
}
if (vcpu_has_nv(vcpu)) {

View File

@ -64,29 +64,37 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
static int iter_mark_lpis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long intid, flags;
struct vgic_irq *irq;
unsigned long intid;
int nr_lpis = 0;
xa_lock_irqsave(&dist->lpi_xa, flags);
xa_for_each(&dist->lpi_xa, intid, irq) {
if (!vgic_try_get_irq_ref(irq))
continue;
xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
__xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
nr_lpis++;
}
xa_unlock_irqrestore(&dist->lpi_xa, flags);
return nr_lpis;
}
static void iter_unmark_lpis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long intid, flags;
struct vgic_irq *irq;
unsigned long intid;
xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) {
xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
xa_lock_irqsave(&dist->lpi_xa, flags);
__xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
/* vgic_put_irq() expects to be called outside of the xa_lock */
vgic_put_irq(kvm, irq);
}
}

View File

@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
xa_init(&dist->lpi_xa);
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
/* CREATION */
@ -71,6 +71,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type);
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
struct kvm_vcpu *vcpu;
u64 aa64pfr0, pfr1;
unsigned long i;
int ret;
@ -161,10 +162,19 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2) {
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
else
} else {
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3);
}
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
kvm->arch.vgic.nassgicap = system_supports_direct_sgis();

View File

@ -78,6 +78,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq;
unsigned long flags;
int ret;
/* In this case there is no put, since we keep the reference. */
@ -88,7 +89,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (!irq)
return ERR_PTR(-ENOMEM);
ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
if (ret) {
kfree(irq);
return ERR_PTR(ret);
@ -103,7 +104,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
irq->target_vcpu = vcpu;
irq->group = 1;
xa_lock(&dist->lpi_xa);
xa_lock_irqsave(&dist->lpi_xa, flags);
/*
* There could be a race with another vgic_add_lpi(), so we need to
@ -114,21 +115,18 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
/* Someone was faster with adding this LPI, lets use that. */
kfree(irq);
irq = oldirq;
goto out_unlock;
} else {
ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0));
}
ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0));
xa_unlock_irqrestore(&dist->lpi_xa, flags);
if (ret) {
xa_release(&dist->lpi_xa, intid);
kfree(irq);
}
out_unlock:
xa_unlock(&dist->lpi_xa);
if (ret)
return ERR_PTR(ret);
}
/*
* We "cache" the configuration table entries in our struct vgic_irq's.

View File

@ -301,7 +301,8 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
return;
/* Hide GICv3 sysreg if necessary */
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 ||
!irqchip_in_kernel(vcpu->kvm)) {
vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
ICH_HCR_EL2_TC);
return;

View File

@ -28,7 +28,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* kvm->arch.config_lock (mutex)
* its->cmd_lock (mutex)
* its->its_lock (mutex)
* vgic_dist->lpi_xa.xa_lock
* vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
* vgic_cpu->ap_list_lock must be taken with IRQs disabled
* vgic_irq->irq_lock must be taken with IRQs disabled
*
@ -141,32 +141,39 @@ static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long flags;
if (irq->intid >= VGIC_MIN_LPI)
might_lock(&dist->lpi_xa.xa_lock);
/*
* Normally the lock is only taken when the refcount drops to 0.
* Acquire/release it early on lockdep kernels to make locking issues
* in rare release paths a bit more obvious.
*/
if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) {
guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock);
}
if (!__vgic_put_irq(kvm, irq))
return;
xa_lock(&dist->lpi_xa);
xa_lock_irqsave(&dist->lpi_xa, flags);
vgic_release_lpi_locked(dist, irq);
xa_unlock(&dist->lpi_xa);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
}
static void vgic_release_deleted_lpis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long intid;
unsigned long flags, intid;
struct vgic_irq *irq;
xa_lock(&dist->lpi_xa);
xa_lock_irqsave(&dist->lpi_xa, flags);
xa_for_each(&dist->lpi_xa, intid, irq) {
if (irq->pending_release)
vgic_release_lpi_locked(dist, irq);
}
xa_unlock(&dist->lpi_xa);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
}
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)

View File

@ -969,6 +969,16 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
void tag_clear_highpage(struct page *page)
{
/*
* Check if MTE is supported and fall back to clear_highpage().
* get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and
* post_alloc_hook() will invoke tag_clear_highpage().
*/
if (!system_supports_mte()) {
clear_highpage(page);
return;
}
/* Newly allocated page, shouldn't have been tagged yet */
WARN_ON_ONCE(!try_page_mte_tagging(page));
mte_zero_clear_page_tags(page_address(page));

View File

@ -708,6 +708,30 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
return ret;
}
static inline bool force_pte_mapping(void)
{
const bool bbml2 = system_capabilities_finalized() ?
system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
if (debug_pagealloc_enabled())
return true;
if (bbml2)
return false;
return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world();
}
static inline bool split_leaf_mapping_possible(void)
{
/*
* !BBML2_NOABORT systems should never run into scenarios where we would
* have to split. So exit early and let calling code detect it and raise
* a warning.
*/
if (!system_supports_bbml2_noabort())
return false;
return !force_pte_mapping();
}
static DEFINE_MUTEX(pgtable_split_lock);
int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
@ -715,12 +739,11 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
int ret;
/*
* !BBML2_NOABORT systems should not be trying to change permissions on
* anything that is not pte-mapped in the first place. Just return early
* and let the permission change code raise a warning if not already
* pte-mapped.
* Exit early if the region is within a pte-mapped area or if we can't
* split. For the latter case, the permission change code will raise a
* warning if not already pte-mapped.
*/
if (!system_supports_bbml2_noabort())
if (!split_leaf_mapping_possible() || is_kfence_address((void *)start))
return 0;
/*
@ -758,30 +781,30 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
return ret;
}
static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
gfp_t gfp = *(gfp_t *)walk->private;
pud_t pud = pudp_get(pudp);
int ret = 0;
if (pud_leaf(pud))
ret = split_pud(pudp, pud, GFP_ATOMIC, false);
ret = split_pud(pudp, pud, gfp, false);
return ret;
}
static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
gfp_t gfp = *(gfp_t *)walk->private;
pmd_t pmd = pmdp_get(pmdp);
int ret = 0;
if (pmd_leaf(pmd)) {
if (pmd_cont(pmd))
split_contpmd(pmdp);
ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false);
ret = split_pmd(pmdp, pmd, gfp, false);
/*
* We have split the pmd directly to ptes so there is no need to
@ -793,9 +816,8 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
return ret;
}
static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
pte_t pte = __ptep_get(ptep);
@ -805,12 +827,24 @@ static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
return 0;
}
static const struct mm_walk_ops split_to_ptes_ops __initconst = {
static const struct mm_walk_ops split_to_ptes_ops = {
.pud_entry = split_to_ptes_pud_entry,
.pmd_entry = split_to_ptes_pmd_entry,
.pte_entry = split_to_ptes_pte_entry,
};
static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp)
{
int ret;
arch_enter_lazy_mmu_mode();
ret = walk_kernel_page_table_range_lockless(start, end,
&split_to_ptes_ops, NULL, &gfp);
arch_leave_lazy_mmu_mode();
return ret;
}
static bool linear_map_requires_bbml2 __initdata;
u32 idmap_kpti_bbml2_flag;
@ -847,11 +881,9 @@ static int __init linear_map_split_to_ptes(void *__unused)
* PTE. The kernel alias remains static throughout runtime so
* can continue to be safely mapped with large mappings.
*/
ret = walk_kernel_page_table_range_lockless(lstart, kstart,
&split_to_ptes_ops, NULL, NULL);
ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC);
if (!ret)
ret = walk_kernel_page_table_range_lockless(kend, lend,
&split_to_ptes_ops, NULL, NULL);
ret = range_split_to_ptes(kend, lend, GFP_ATOMIC);
if (ret)
panic("Failed to split linear map\n");
flush_tlb_kernel_range(lstart, lend);
@ -1002,6 +1034,33 @@ static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp)
memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
__kfence_pool = phys_to_virt(kfence_pool);
}
bool arch_kfence_init_pool(void)
{
unsigned long start = (unsigned long)__kfence_pool;
unsigned long end = start + KFENCE_POOL_SIZE;
int ret;
/* Exit early if we know the linear map is already pte-mapped. */
if (!split_leaf_mapping_possible())
return true;
/* Kfence pool is already pte-mapped for the early init case. */
if (kfence_early_init)
return true;
mutex_lock(&pgtable_split_lock);
ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL);
mutex_unlock(&pgtable_split_lock);
/*
* Since the system supports bbml2_noabort, tlb invalidation is not
* required here; the pgtable mappings have been split to pte but larger
* entries may safely linger in the TLB.
*/
return !ret;
}
#else /* CONFIG_KFENCE */
static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; }
@ -1009,16 +1068,6 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) {
#endif /* CONFIG_KFENCE */
static inline bool force_pte_mapping(void)
{
bool bbml2 = system_capabilities_finalized() ?
system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() ||
is_realm_world())) ||
debug_pagealloc_enabled();
}
static void __init map_mem(pgd_t *pgdp)
{
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);

View File

@ -109,7 +109,7 @@ endif
ifdef CONFIG_RUSTC_HAS_ANNOTATE_TABLEJUMP
KBUILD_RUSTFLAGS += -Cllvm-args=--loongarch-annotate-tablejump
else
KBUILD_RUSTFLAGS += -Zno-jump-tables # keep compatibility with older compilers
KBUILD_RUSTFLAGS += $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) # keep compatibility with older compilers
endif
ifdef CONFIG_LTO_CLANG
# The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled.

View File

@ -67,6 +67,8 @@
#define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR)
#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW)
#define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW)
#define cpu_has_msgint cpu_opt(LOONGARCH_CPU_MSGINT)
#define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT)
#define cpu_has_redirectint cpu_opt(LOONGARCH_CPU_REDIRECTINT)
#endif /* __ASM_CPU_FEATURES_H */

View File

@ -101,7 +101,9 @@ enum cpu_type_enum {
#define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */
#define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */
#define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */
#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */
#define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */
#define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */
#define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */
#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG)
#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM)
@ -132,6 +134,8 @@ enum cpu_type_enum {
#define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR)
#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW)
#define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW)
#define LOONGARCH_CPU_MSGINT BIT_ULL(CPU_FEATURE_MSGINT)
#define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT)
#define LOONGARCH_CPU_REDIRECTINT BIT_ULL(CPU_FEATURE_REDIRECTINT)
#endif /* _ASM_CPU_H */

View File

@ -134,13 +134,13 @@ static inline void hw_breakpoint_thread_switch(struct task_struct *next)
/* Determine number of BRP registers available. */
static inline int get_num_brps(void)
{
return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
}
/* Determine number of WRP registers available. */
static inline int get_num_wrps(void)
{
return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
}
#endif /* __KERNEL__ */

View File

@ -14,7 +14,7 @@
#include <asm/pgtable-bits.h>
#include <asm/string.h>
extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size);
extern void __init early_iounmap(void __iomem *addr, unsigned long size);
#define early_memremap early_ioremap
@ -25,6 +25,9 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size);
static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
pgprot_t prot)
{
if (offset > TO_PHYS_MASK)
return NULL;
switch (pgprot_val(prot) & _CACHE_MASK) {
case _CACHE_CC:
return (void __iomem *)(unsigned long)(CACHE_BASE + offset);

View File

@ -128,6 +128,7 @@
#define CPUCFG6_PMNUM GENMASK(7, 4)
#define CPUCFG6_PMNUM_SHIFT 4
#define CPUCFG6_PMBITS GENMASK(13, 8)
#define CPUCFG6_PMBITS_SHIFT 8
#define CPUCFG6_UPM BIT(14)
#define LOONGARCH_CPUCFG16 0x10
@ -1137,6 +1138,7 @@
#define IOCSRF_FLATMODE BIT_ULL(10)
#define IOCSRF_VM BIT_ULL(11)
#define IOCSRF_AVEC BIT_ULL(15)
#define IOCSRF_REDIRECT BIT_ULL(16)
#define LOONGARCH_IOCSR_VENDOR 0x10

View File

@ -88,7 +88,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
pud_t *pud;
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (!ptdesc)
return NULL;

View File

@ -424,6 +424,9 @@ static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a)
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
if (pte_val(pte) & _PAGE_DIRTY)
pte_val(pte) |= _PAGE_MODIFIED;
return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
(pgprot_val(newprot) & ~_PAGE_CHG_MASK));
}
@ -547,9 +550,11 @@ static inline struct page *pmd_page(pmd_t pmd)
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) |
(pgprot_val(newprot) & ~_HPAGE_CHG_MASK);
return pmd;
if (pmd_val(pmd) & _PAGE_DIRTY)
pmd_val(pmd) |= _PAGE_MODIFIED;
return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) |
(pgprot_val(newprot) & ~_HPAGE_CHG_MASK));
}
static inline pmd_t pmd_mkinvalid(pmd_t pmd)

View File

@ -157,6 +157,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_TLB;
if (config & CPUCFG1_IOCSR)
c->options |= LOONGARCH_CPU_IOCSR;
if (config & CPUCFG1_MSGINT)
c->options |= LOONGARCH_CPU_MSGINT;
if (config & CPUCFG1_UAL) {
c->options |= LOONGARCH_CPU_UAL;
elf_hwcap |= HWCAP_LOONGARCH_UAL;
@ -331,6 +333,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int
c->options |= LOONGARCH_CPU_EIODECODE;
if (config & IOCSRF_AVEC)
c->options |= LOONGARCH_CPU_AVECINT;
if (config & IOCSRF_REDIRECT)
c->options |= LOONGARCH_CPU_REDIRECTINT;
if (config & IOCSRF_VM)
c->options |= LOONGARCH_CPU_HYPERVISOR;
}

View File

@ -42,7 +42,7 @@ static void *efi_kexec_load(struct kimage *image,
{
int ret;
unsigned long text_offset, kernel_segment_number;
struct kexec_buf kbuf;
struct kexec_buf kbuf = {};
struct kexec_segment *kernel_segment;
struct loongarch_image_header *h;

View File

@ -59,7 +59,7 @@ static void *elf_kexec_load(struct kimage *image,
int ret;
unsigned long text_offset, kernel_segment_number;
struct elfhdr ehdr;
struct kexec_buf kbuf;
struct kexec_buf kbuf = {};
struct kexec_elf_info elf_info;
struct kexec_segment *kernel_segment;

View File

@ -39,34 +39,12 @@ static unsigned long systable_ptr;
static unsigned long start_addr;
static unsigned long first_ind_entry;
static void kexec_image_info(const struct kimage *kimage)
{
unsigned long i;
pr_debug("kexec kimage info:\n");
pr_debug("\ttype: %d\n", kimage->type);
pr_debug("\tstart: %lx\n", kimage->start);
pr_debug("\thead: %lx\n", kimage->head);
pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug("\t segment[%lu]: %016lx - %016lx", i,
kimage->segment[i].mem,
kimage->segment[i].mem + kimage->segment[i].memsz);
pr_debug("\t\t0x%lx bytes, %lu pages\n",
(unsigned long)kimage->segment[i].memsz,
(unsigned long)kimage->segment[i].memsz / PAGE_SIZE);
}
}
int machine_kexec_prepare(struct kimage *kimage)
{
int i;
char *bootloader = "kexec";
void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR;
kexec_image_info(kimage);
kimage->arch.efi_boot = fw_arg0;
kimage->arch.systable_ptr = fw_arg2;

View File

@ -143,7 +143,7 @@ int load_other_segments(struct kimage *image,
unsigned long initrd_load_addr = 0;
unsigned long orig_segments = image->nr_segments;
char *modified_cmdline = NULL;
struct kexec_buf kbuf;
struct kexec_buf kbuf = {};
kbuf.image = image;
/* Don't allocate anything below the kernel */

View File

@ -13,7 +13,7 @@
void __init memblock_init(void)
{
u32 mem_type;
u64 mem_start, mem_end, mem_size;
u64 mem_start, mem_size;
efi_memory_desc_t *md;
/* Parse memory information */
@ -21,7 +21,6 @@ void __init memblock_init(void)
mem_type = md->type;
mem_start = md->phys_addr;
mem_size = md->num_pages << EFI_PAGE_SHIFT;
mem_end = mem_start + mem_size;
switch (mem_type) {
case EFI_LOADER_CODE:
@ -31,8 +30,6 @@ void __init memblock_init(void)
case EFI_PERSISTENT_MEMORY:
case EFI_CONVENTIONAL_MEMORY:
memblock_add(mem_start, mem_size);
if (max_low_pfn < (mem_end >> PAGE_SHIFT))
max_low_pfn = mem_end >> PAGE_SHIFT;
break;
case EFI_PAL_CODE:
case EFI_UNUSABLE_MEMORY:
@ -49,6 +46,8 @@ void __init memblock_init(void)
}
}
max_pfn = PFN_DOWN(memblock_end_of_DRAM());
max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn);
memblock_set_current_limit(PFN_PHYS(max_low_pfn));
/* Reserve the first 2MB */

View File

@ -272,7 +272,8 @@ int __init init_numa_memory(void)
node_mem_init(node);
node_set_online(node);
}
max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
max_pfn = PFN_DOWN(memblock_end_of_DRAM());
max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn);
setup_nr_node_ids();
loongson_sysconf.nr_nodes = nr_node_ids;
@ -283,26 +284,6 @@ int __init init_numa_memory(void)
#endif
void __init paging_init(void)
{
unsigned int node;
unsigned long zones_size[MAX_NR_ZONES] = {0, };
for_each_online_node(node) {
unsigned long start_pfn, end_pfn;
get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
if (end_pfn > max_low_pfn)
max_low_pfn = end_pfn;
}
#ifdef CONFIG_ZONE_DMA32
zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
#endif
zones_size[ZONE_NORMAL] = max_low_pfn;
free_area_init(zones_size);
}
int pcibus_to_node(struct pci_bus *bus)
{
return dev_to_node(&bus->dev);

View File

@ -845,13 +845,14 @@ static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config
static int __init init_hw_perf_events(void)
{
int counters;
int bits, counters;
if (!cpu_has_pmp)
return -ENODEV;
pr_info("Performance counters: ");
counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1;
bits = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1;
counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1;
loongarch_pmu.num_counters = counters;
loongarch_pmu.max_period = (1ULL << 63) - 1;
@ -867,7 +868,7 @@ static int __init init_hw_perf_events(void)
on_each_cpu(reset_counters, NULL, 1);
pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n",
loongarch_pmu.name, counters, 64);
loongarch_pmu.name, counters, bits);
perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);

View File

@ -294,8 +294,6 @@ static void __init fdt_setup(void)
early_init_dt_scan(fdt_pointer, __pa(fdt_pointer));
early_init_fdt_reserve_self();
max_low_pfn = PFN_PHYS(memblock_end_of_DRAM());
#endif
}
@ -390,7 +388,8 @@ static void __init check_kernel_sections_mem(void)
static void __init arch_mem_init(char **cmdline_p)
{
/* Recalculate max_low_pfn for "mem=xxx" */
max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
max_pfn = PFN_DOWN(memblock_end_of_DRAM());
max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn);
if (usermem)
pr_info("User-defined physical RAM map overwrite\n");

View File

@ -1131,8 +1131,8 @@ static void configure_exception_vector(void)
tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE;
csr_write64(eentry, LOONGARCH_CSR_EENTRY);
csr_write64(eentry, LOONGARCH_CSR_MERRENTRY);
csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY);
csr_write64(__pa(eentry), LOONGARCH_CSR_MERRENTRY);
csr_write64(__pa(tlbrentry), LOONGARCH_CSR_TLBRENTRY);
}
void per_cpu_trap_init(int cpu)

View File

@ -439,7 +439,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
spin_lock_irqsave(&s->lock, flags);
switch (type) {
case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
if (val >= EIOINTC_ROUTE_MAX_VCPUS)
if (val > EIOINTC_ROUTE_MAX_VCPUS)
ret = -EINVAL;
else
s->num_cpu = val;

View File

@ -857,7 +857,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
if (writeable) {
prot_bits = kvm_pte_mkwriteable(prot_bits);
if (write)
if (write || !kvm_slot_dirty_track_enabled(memslot))
prot_bits = kvm_pte_mkdirty(prot_bits);
}

View File

@ -4,6 +4,7 @@
*/
#include <linux/kvm_host.h>
#include <asm/delay.h>
#include <asm/kvm_csr.h>
#include <asm/kvm_vcpu.h>
@ -95,6 +96,7 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
* and set CSR TVAL with -1
*/
write_gcsr_timertick(0);
__delay(2); /* Wait cycles until timer interrupt injected */
/*
* Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear

View File

@ -132,6 +132,9 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu)
* Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when
* exiting the guest, so that the next time trap into the guest.
* We don't need to deal with PMU CSRs contexts.
*
* Otherwise set the request bit KVM_REQ_PMU to restore guest PMU
* before entering guest VM
*/
val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
@ -139,16 +142,12 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu)
val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
if (!(val & KVM_PMU_EVENT_ENABLED))
vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU;
else
kvm_make_request(KVM_REQ_PMU, vcpu);
kvm_restore_host_pmu(vcpu);
}
static void kvm_restore_pmu(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU))
kvm_make_request(KVM_REQ_PMU, vcpu);
}
static void kvm_check_pmu(struct kvm_vcpu *vcpu)
{
if (kvm_check_request(KVM_REQ_PMU, vcpu)) {
@ -299,7 +298,10 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST;
if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) {
if (vcpu->arch.aux_inuse & KVM_LARCH_PMU) {
kvm_lose_pmu(vcpu);
kvm_make_request(KVM_REQ_PMU, vcpu);
}
/* make sure the vcpu mode has been written */
smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE);
local_irq_enable();
@ -1604,9 +1606,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_restore_timer(vcpu);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
/* Restore hardware PMU CSRs */
kvm_restore_pmu(vcpu);
/* Don't bother restoring registers multiple times unless necessary */
if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
return 0;

View File

@ -60,7 +60,6 @@ int __ref page_is_ram(unsigned long pfn)
return memblock_is_memory(addr) && !memblock_is_reserved(addr);
}
#ifndef CONFIG_NUMA
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
@ -72,7 +71,6 @@ void __init paging_init(void)
free_area_init(max_zone_pfns);
}
#endif /* !CONFIG_NUMA */
void __ref free_initmem(void)
{

View File

@ -6,7 +6,7 @@
#include <asm/io.h>
#include <asm-generic/early_ioremap.h>
void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size)
void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size)
{
return ((void __iomem *)TO_CACHE(phys_addr));
}

View File

@ -35,6 +35,8 @@
#define KERNEL_START (KERNEL_BINARY_TEXT_START)
#define ALIGNMENT_OK(ptr, type) (((ptr) & (sizeof(type) - 1)) == 0)
extern struct unwind_table_entry __start___unwind[];
extern struct unwind_table_entry __stop___unwind[];
@ -257,12 +259,15 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
if (pc_is_kernel_fn(pc, _switch_to) ||
pc == (unsigned long)&_switch_to_ret) {
info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE;
if (ALIGNMENT_OK(info->prev_sp, long))
info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET);
else
info->prev_ip = info->prev_sp = 0;
return 1;
}
#ifdef CONFIG_IRQSTACKS
if (pc == (unsigned long)&_call_on_stack) {
if (pc == (unsigned long)&_call_on_stack && ALIGNMENT_OK(info->sp, long)) {
info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ);
info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET);
return 1;
@ -370,8 +375,10 @@ static void unwind_frame_regs(struct unwind_frame_info *info)
info->prev_sp = info->sp - frame_size;
if (e->Millicode)
info->rp = info->r31;
else if (rpoffset)
else if (rpoffset && ALIGNMENT_OK(info->prev_sp, long))
info->rp = *(unsigned long *)(info->prev_sp - rpoffset);
else
info->rp = 0;
info->prev_ip = info->rp;
info->rp = 0;
}

View File

@ -367,7 +367,7 @@ config RISCV_NONSTANDARD_CACHE_OPS
systems to handle cache management.
config AS_HAS_INSN
def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero)
def_bool $(as-instr,.insn 0x100000f)
config AS_HAS_OPTION_ARCH
# https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4

View File

@ -134,21 +134,6 @@ endif
CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)
# Default target when executing plain make
boot := arch/riscv/boot
ifeq ($(CONFIG_XIP_KERNEL),y)
KBUILD_IMAGE := $(boot)/xipImage
else
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy)
KBUILD_IMAGE := $(boot)/loader.bin
else
ifeq ($(CONFIG_EFI_ZBOOT),)
KBUILD_IMAGE := $(boot)/Image.gz
else
KBUILD_IMAGE := $(boot)/vmlinuz.efi
endif
endif
endif
boot := arch/riscv/boot
boot-image-y := Image
boot-image-$(CONFIG_KERNEL_BZIP2) := Image.bz2
@ -159,7 +144,7 @@ boot-image-$(CONFIG_KERNEL_LZO) := Image.lzo
boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst
boot-image-$(CONFIG_KERNEL_XZ) := Image.xz
ifdef CONFIG_RISCV_M_MODE
boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin
boot-image-$(CONFIG_SOC_CANAAN_K210) := loader.bin
endif
boot-image-$(CONFIG_EFI_ZBOOT) := vmlinuz.efi
boot-image-$(CONFIG_XIP_KERNEL) := xipImage

View File

@ -12,6 +12,12 @@
#define __ASM_STR(x) #x
#endif
#ifdef CONFIG_AS_HAS_INSN
#define ASM_INSN_I(__x) ".insn " __x
#else
#define ASM_INSN_I(__x) ".4byte " __x
#endif
#if __riscv_xlen == 64
#define __REG_SEL(a, b) __ASM_STR(a)
#elif __riscv_xlen == 32

View File

@ -256,10 +256,10 @@
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \
SIMM12((offset) & 0xfe0), RS1(base))
#define RISCV_PAUSE ".4byte 0x100000f"
#define ZAWRS_WRS_NTO ".4byte 0x00d00073"
#define ZAWRS_WRS_STO ".4byte 0x01d00073"
#define RISCV_NOP4 ".4byte 0x00000013"
#define RISCV_PAUSE ASM_INSN_I("0x100000f")
#define ZAWRS_WRS_NTO ASM_INSN_I("0x00d00073")
#define ZAWRS_WRS_STO ASM_INSN_I("0x01d00073")
#define RISCV_NOP4 ASM_INSN_I("0x00000013")
#define RISCV_INSN_NOP4 _AC(0x00000013, U)

View File

@ -30,8 +30,8 @@ extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_mips;
* allowing any subsequent instructions to fetch.
*/
#define MIPS_PAUSE ".4byte 0x00501013\n\t"
#define MIPS_EHB ".4byte 0x00301013\n\t"
#define MIPS_IHB ".4byte 0x00101013\n\t"
#define MIPS_PAUSE ASM_INSN_I("0x00501013\n\t")
#define MIPS_EHB ASM_INSN_I("0x00301013\n\t")
#define MIPS_IHB ASM_INSN_I("0x00101013\n\t")
#endif // _ASM_RISCV_VENDOR_EXTENSIONS_MIPS_H

View File

@ -265,10 +265,10 @@ void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer,
{
if (!strncmp(remcom_in_buffer, gdb_xfer_read_target,
sizeof(gdb_xfer_read_target)))
strcpy(remcom_out_buffer, riscv_gdb_stub_target_desc);
strscpy(remcom_out_buffer, riscv_gdb_stub_target_desc, BUFMAX);
else if (!strncmp(remcom_in_buffer, gdb_xfer_read_cpuxml,
sizeof(gdb_xfer_read_cpuxml)))
strcpy(remcom_out_buffer, riscv_gdb_stub_cpuxml);
strscpy(remcom_out_buffer, riscv_gdb_stub_cpuxml, BUFMAX);
}
static inline void kgdb_arch_update_addr(struct pt_regs *regs,

View File

@ -119,6 +119,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned int num_plts = 0;
unsigned int num_gots = 0;
Elf_Rela *scratch = NULL;
Elf_Rela *new_scratch;
size_t scratch_size = 0;
int i;
@ -168,10 +169,13 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch);
if (scratch_size_needed > scratch_size) {
scratch_size = scratch_size_needed;
scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
if (!scratch)
new_scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
if (!new_scratch) {
kvfree(scratch);
return -ENOMEM;
}
scratch = new_scratch;
}
for (size_t j = 0; j < num_relas; j++)
if (rela_needs_plt_got_entry(&relas[j]))

View File

@ -16,6 +16,22 @@
#ifdef CONFIG_FRAME_POINTER
/*
* This disables KASAN checking when reading a value from another task's stack,
* since the other task could be running on another CPU and could have poisoned
* the stack in the meantime.
*/
#define READ_ONCE_TASK_STACK(task, x) \
({ \
unsigned long val; \
unsigned long addr = x; \
if ((task) == current) \
val = READ_ONCE(addr); \
else \
val = READ_ONCE_NOCHECK(addr); \
val; \
})
extern asmlinkage void handle_exception(void);
extern unsigned long ret_from_exception_end;
@ -69,8 +85,9 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
fp = frame->ra;
pc = regs->ra;
} else {
fp = frame->fp;
pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
fp = READ_ONCE_TASK_STACK(task, frame->fp);
pc = READ_ONCE_TASK_STACK(task, frame->ra);
pc = ftrace_graph_ret_addr(current, &graph_idx, pc,
&frame->ra);
if (pc >= (unsigned long)handle_exception &&
pc < (unsigned long)&ret_from_exception_end) {

View File

@ -31,7 +31,7 @@ config RISCV_MODULE_LINKING_KUNIT
If unsure, say N.
config RISCV_KPROBES_KUNIT
bool "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS
tristate "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS
depends on KUNIT
depends on KPROBES
default KUNIT_ALL_TESTS

View File

@ -1 +1,3 @@
obj-y += test-kprobes.o test-kprobes-asm.o
obj-$(CONFIG_RISCV_KPROBES_KUNIT) += kprobes_riscv_kunit.o
kprobes_riscv_kunit-objs := test-kprobes.o test-kprobes-asm.o

View File

@ -49,8 +49,11 @@ static struct kunit_case kprobes_testcases[] = {
};
static struct kunit_suite kprobes_test_suite = {
.name = "kprobes_test_riscv",
.name = "kprobes_riscv",
.test_cases = kprobes_testcases,
};
kunit_test_suites(&kprobes_test_suite);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("KUnit test for riscv kprobes");

View File

@ -689,8 +689,20 @@ bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu)
*/
read_lock_irqsave(&imsic->vsfile_lock, flags);
if (imsic->vsfile_cpu > -1)
if (imsic->vsfile_cpu > -1) {
/*
* This function is typically called from kvm_vcpu_block() via
* kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block()
* can be preempted and the blocking VCPU might resume on a
* different CPU. This means it is possible that current CPU
* does not match the imsic->vsfile_cpu hence this function
* must check imsic->vsfile_cpu before accessing HGEIP CSR.
*/
if (imsic->vsfile_cpu != vcpu->cpu)
ret = true;
else
ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei));
}
read_unlock_irqrestore(&imsic->vsfile_lock, flags);
return ret;

View File

@ -171,7 +171,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
enum kvm_mr_change change)
{
hva_t hva, reg_end, size;
gpa_t base_gpa;
bool writable;
int ret = 0;
@ -190,15 +189,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
hva = new->userspace_addr;
size = new->npages << PAGE_SHIFT;
reg_end = hva + size;
base_gpa = new->base_gfn << PAGE_SHIFT;
writable = !(new->flags & KVM_MEM_READONLY);
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and
* any holes between them, so iterate over all of them to find
* out if we can map any of them right now.
* any holes between them, so iterate over all of them.
*
* +--------------------------------------------+
* +---------------+----------------+ +----------------+
@ -209,7 +206,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
*/
do {
struct vm_area_struct *vma;
hva_t vm_start, vm_end;
hva_t vm_end;
vma = find_vma_intersection(current->mm, hva, reg_end);
if (!vma)
@ -225,36 +222,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
}
/* Take the intersection of this VMA with the memory region */
vm_start = max(hva, vma->vm_start);
vm_end = min(reg_end, vma->vm_end);
if (vma->vm_flags & VM_PFNMAP) {
gpa_t gpa = base_gpa + (vm_start - hva);
phys_addr_t pa;
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
pa += vm_start - vma->vm_start;
/* IO region dirty page logging not allowed */
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
goto out;
}
ret = kvm_riscv_mmu_ioremap(kvm, gpa, pa, vm_end - vm_start,
writable, false);
if (ret)
break;
}
hva = vm_end;
} while (hva < reg_end);
if (change == KVM_MR_FLAGS_ONLY)
goto out;
if (ret)
kvm_riscv_mmu_iounmap(kvm, base_gpa, size);
out:
mmap_read_unlock(current->mm);
return ret;

View File

@ -212,7 +212,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) &&
!kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
}

View File

@ -21,7 +21,7 @@
#define pt_dump_seq_puts(m, fmt) \
({ \
if (m) \
seq_printf(m, fmt); \
seq_puts(m, fmt); \
})
/*

View File

@ -98,7 +98,7 @@ ifeq ($(CONFIG_X86_KERNEL_IBT),y)
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816
#
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables)
KBUILD_RUSTFLAGS += -Zcf-protection=branch -Zno-jump-tables
KBUILD_RUSTFLAGS += -Zcf-protection=branch $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables)
else
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
endif

View File

@ -2789,13 +2789,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
return;
}
if (perf_hw_regs(regs)) {
if (perf_callchain_store(entry, regs->ip))
return;
if (perf_hw_regs(regs))
unwind_start(&state, current, regs, NULL);
else
} else {
unwind_start(&state, current, NULL, (void *)regs->sp);
}
for (; !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);

View File

@ -23,7 +23,6 @@
#define AMD_NODE0_PCI_SLOT 0x18
struct pci_dev *amd_node_get_func(u16 node, u8 func);
struct pci_dev *amd_node_get_root(u16 node);
static inline u16 amd_num_nodes(void)
{

View File

@ -56,6 +56,11 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
return &arch_ftrace_regs(fregs)->regs;
}
#define arch_ftrace_partial_regs(regs) do { \
regs->flags &= ~X86_EFLAGS_FIXED; \
regs->cs = __KERNEL_CS; \
} while (0)
#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
(_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \
(_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \

View File

@ -2,6 +2,10 @@
#ifndef _ASM_RUNTIME_CONST_H
#define _ASM_RUNTIME_CONST_H
#ifdef MODULE
#error "Cannot use runtime-const infrastructure from modules"
#endif
#ifdef __ASSEMBLY__
.macro RUNTIME_CONST_PTR sym reg

View File

@ -12,12 +12,12 @@
#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/percpu.h>
#include <asm/runtime-const.h>
/*
* Virtual variable: there's no actual backing store for this,
* it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
*/
#ifdef MODULE
#define runtime_const_ptr(sym) (sym)
#else
#include <asm/runtime-const.h>
#endif
extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING

View File

@ -93,6 +93,7 @@
#define EXIT_REASON_TPAUSE 68
#define EXIT_REASON_BUS_LOCK 74
#define EXIT_REASON_NOTIFY 75
#define EXIT_REASON_SEAMCALL 76
#define EXIT_REASON_TDCALL 77
#define EXIT_REASON_MSR_READ_IMM 84
#define EXIT_REASON_MSR_WRITE_IMM 85

View File

@ -196,7 +196,7 @@ int amd_detect_prefcore(bool *detected)
break;
}
for_each_present_cpu(cpu) {
for_each_online_cpu(cpu) {
u32 tmp;
int ret;

View File

@ -34,62 +34,6 @@ struct pci_dev *amd_node_get_func(u16 node, u8 func)
return pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(AMD_NODE0_PCI_SLOT + node, func));
}
#define DF_BLK_INST_CNT 0x040
#define DF_CFG_ADDR_CNTL_LEGACY 0x084
#define DF_CFG_ADDR_CNTL_DF4 0xC04
#define DF_MAJOR_REVISION GENMASK(27, 24)
static u16 get_cfg_addr_cntl_offset(struct pci_dev *df_f0)
{
u32 reg;
/*
* Revision fields added for DF4 and later.
*
* Major revision of '0' is found pre-DF4. Field is Read-as-Zero.
*/
if (pci_read_config_dword(df_f0, DF_BLK_INST_CNT, &reg))
return 0;
if (reg & DF_MAJOR_REVISION)
return DF_CFG_ADDR_CNTL_DF4;
return DF_CFG_ADDR_CNTL_LEGACY;
}
struct pci_dev *amd_node_get_root(u16 node)
{
struct pci_dev *root;
u16 cntl_off;
u8 bus;
if (!cpu_feature_enabled(X86_FEATURE_ZEN))
return NULL;
/*
* D18F0xXXX [Config Address Control] (DF::CfgAddressCntl)
* Bits [7:0] (SecBusNum) holds the bus number of the root device for
* this Data Fabric instance. The segment, device, and function will be 0.
*/
struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0);
if (!df_f0)
return NULL;
cntl_off = get_cfg_addr_cntl_offset(df_f0);
if (!cntl_off)
return NULL;
if (pci_read_config_byte(df_f0, cntl_off, &bus))
return NULL;
/* Grab the pointer for the actual root device instance. */
root = pci_get_domain_bus_and_slot(0, bus, 0);
pci_dbg(root, "is root for AMD node %u\n", node);
return root;
}
static struct pci_dev **amd_roots;
/* Protect the PCI config register pairs used for SMN. */
@ -274,51 +218,21 @@ DEFINE_SHOW_STORE_ATTRIBUTE(smn_node);
DEFINE_SHOW_STORE_ATTRIBUTE(smn_address);
DEFINE_SHOW_STORE_ATTRIBUTE(smn_value);
static int amd_cache_roots(void)
static struct pci_dev *get_next_root(struct pci_dev *root)
{
u16 node, num_nodes = amd_num_nodes();
amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL);
if (!amd_roots)
return -ENOMEM;
for (node = 0; node < num_nodes; node++)
amd_roots[node] = amd_node_get_root(node);
return 0;
}
static int reserve_root_config_spaces(void)
{
struct pci_dev *root = NULL;
struct pci_bus *bus = NULL;
while ((bus = pci_find_next_bus(bus))) {
/* Root device is Device 0 Function 0 on each Primary Bus. */
root = pci_get_slot(bus, 0);
if (!root)
while ((root = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, root))) {
/* Root device is Device 0 Function 0. */
if (root->devfn)
continue;
if (root->vendor != PCI_VENDOR_ID_AMD &&
root->vendor != PCI_VENDOR_ID_HYGON)
continue;
pci_dbg(root, "Reserving PCI config space\n");
/*
* There are a few SMN index/data pairs and other registers
* that shouldn't be accessed by user space.
* So reserve the entire PCI config space for simplicity rather
* than covering specific registers piecemeal.
*/
if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) {
pci_err(root, "Failed to reserve config space\n");
return -EEXIST;
}
break;
}
smn_exclusive = true;
return 0;
return root;
}
static bool enable_dfs;
@ -332,7 +246,8 @@ __setup("amd_smn_debugfs_enable", amd_smn_enable_dfs);
static int __init amd_smn_init(void)
{
int err;
u16 count, num_roots, roots_per_node, node, num_nodes;
struct pci_dev *root;
if (!cpu_feature_enabled(X86_FEATURE_ZEN))
return 0;
@ -342,13 +257,48 @@ static int __init amd_smn_init(void)
if (amd_roots)
return 0;
err = amd_cache_roots();
if (err)
return err;
num_roots = 0;
root = NULL;
while ((root = get_next_root(root))) {
pci_dbg(root, "Reserving PCI config space\n");
err = reserve_root_config_spaces();
if (err)
return err;
/*
* There are a few SMN index/data pairs and other registers
* that shouldn't be accessed by user space. So reserve the
* entire PCI config space for simplicity rather than covering
* specific registers piecemeal.
*/
if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) {
pci_err(root, "Failed to reserve config space\n");
return -EEXIST;
}
num_roots++;
}
pr_debug("Found %d AMD root devices\n", num_roots);
if (!num_roots)
return -ENODEV;
num_nodes = amd_num_nodes();
amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL);
if (!amd_roots)
return -ENOMEM;
roots_per_node = num_roots / num_nodes;
count = 0;
node = 0;
root = NULL;
while (node < num_nodes && (root = get_next_root(root))) {
/* Use one root for each node and skip the rest. */
if (count++ % roots_per_node)
continue;
pci_dbg(root, "is root for AMD node %u\n", node);
amd_roots[node++] = root;
}
if (enable_dfs) {
debugfs_dir = debugfs_create_dir("amd_smn", arch_debugfs_dir);
@ -358,6 +308,8 @@ static int __init amd_smn_init(void)
debugfs_create_file("value", 0600, debugfs_dir, NULL, &smn_value_fops);
}
smn_exclusive = true;
return 0;
}

View File

@ -1038,6 +1038,7 @@ static void init_amd_zen4(struct cpuinfo_x86 *c)
static const struct x86_cpu_id zen5_rdseed_microcode[] = {
ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a),
ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054),
{},
};
static void init_amd_zen5(struct cpuinfo_x86 *c)

View File

@ -78,6 +78,10 @@
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
/* Used for modules: built-in code uses runtime constants */
unsigned long USER_PTR_MAX;
EXPORT_SYMBOL(USER_PTR_MAX);
u32 elf_hwcap2 __read_mostly;
/* Number of siblings per CPU package */
@ -2579,7 +2583,7 @@ void __init arch_cpu_finalize_init(void)
alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) {
unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
USER_PTR_MAX = TASK_SIZE_MAX;
/*
* Enable this when LAM is gated on LASS support

View File

@ -220,10 +220,12 @@ static bool need_sha_check(u32 cur_rev)
case 0xaa001: return cur_rev <= 0xaa00116; break;
case 0xaa002: return cur_rev <= 0xaa00218; break;
case 0xb0021: return cur_rev <= 0xb002146; break;
case 0xb0081: return cur_rev <= 0xb008111; break;
case 0xb1010: return cur_rev <= 0xb101046; break;
case 0xb2040: return cur_rev <= 0xb204031; break;
case 0xb4040: return cur_rev <= 0xb404031; break;
case 0xb6000: return cur_rev <= 0xb600031; break;
case 0xb6080: return cur_rev <= 0xb608031; break;
case 0xb7000: return cur_rev <= 0xb700031; break;
default: break;
}

View File

@ -354,12 +354,17 @@ SYM_CODE_START(return_to_handler)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
/* Restore return_to_handler value that got eaten by previous ret instruction. */
subq $8, %rsp
UNWIND_HINT_FUNC
/* Save ftrace_regs for function exit context */
subq $(FRAME_SIZE), %rsp
movq %rax, RAX(%rsp)
movq %rdx, RDX(%rsp)
movq %rbp, RBP(%rsp)
movq %rsp, RSP(%rsp)
movq %rsp, %rdi
call ftrace_return_to_handler
@ -368,7 +373,8 @@ SYM_CODE_START(return_to_handler)
movq RDX(%rsp), %rdx
movq RAX(%rsp), %rax
addq $(FRAME_SIZE), %rsp
addq $(FRAME_SIZE) + 8, %rsp
/*
* Jump back to the old return address. This cannot be JMP_NOSPEC rdi
* since IBT would demand that contain ENDBR, which simply isn't so for

View File

@ -216,7 +216,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm)
* This function is called from IOMMU driver to notify
* SVM to schedule in a particular vCPU of a particular VM.
*/
int avic_ga_log_notifier(u32 ga_tag)
static int avic_ga_log_notifier(u32 ga_tag)
{
unsigned long flags;
struct kvm_svm *kvm_svm;
@ -788,7 +788,7 @@ int avic_init_vcpu(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
INIT_LIST_HEAD(&svm->ir_list);
spin_lock_init(&svm->ir_list_lock);
raw_spin_lock_init(&svm->ir_list_lock);
if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
return 0;
@ -816,9 +816,9 @@ static void svm_ir_list_del(struct kvm_kernel_irqfd *irqfd)
if (!vcpu)
return;
spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags);
raw_spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags);
list_del(&irqfd->vcpu_list);
spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags);
raw_spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags);
}
int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
@ -855,7 +855,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
* list of IRQs being posted to the vCPU, to ensure the IRTE
* isn't programmed with stale pCPU/IsRunning information.
*/
guard(spinlock_irqsave)(&svm->ir_list_lock);
guard(raw_spinlock_irqsave)(&svm->ir_list_lock);
/*
* Update the target pCPU for IOMMU doorbells if the vCPU is
@ -972,7 +972,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu,
* up-to-date entry information, or that this task will wait until
* svm_ir_list_add() completes to set the new target pCPU.
*/
spin_lock_irqsave(&svm->ir_list_lock, flags);
raw_spin_lock_irqsave(&svm->ir_list_lock, flags);
entry = svm->avic_physical_id_entry;
WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
@ -997,7 +997,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu,
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action);
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@ -1035,7 +1035,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action)
* or that this task will wait until svm_ir_list_add() completes to
* mark the vCPU as not running.
*/
spin_lock_irqsave(&svm->ir_list_lock, flags);
raw_spin_lock_irqsave(&svm->ir_list_lock, flags);
avic_update_iommu_vcpu_affinity(vcpu, -1, action);
@ -1059,7 +1059,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action)
svm->avic_physical_id_entry = entry;
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}
void avic_vcpu_put(struct kvm_vcpu *vcpu)
@ -1243,3 +1243,9 @@ bool __init avic_hardware_setup(void)
return true;
}
void avic_hardware_unsetup(void)
{
if (avic)
amd_iommu_register_ga_log_notifier(NULL);
}

View File

@ -677,11 +677,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
*/
svm_copy_lbrs(vmcb02, vmcb12);
vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
svm_update_lbrv(&svm->vcpu);
} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
} else {
svm_copy_lbrs(vmcb02, vmcb01);
}
svm_update_lbrv(&svm->vcpu);
}
static inline bool is_evtinj_soft(u32 evtinj)
@ -833,11 +832,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
svm->soft_int_next_rip = vmcb12_rip;
}
vmcb02->control.virt_ext = vmcb01->control.virt_ext &
LBR_CTL_ENABLE_MASK;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV))
vmcb02->control.virt_ext |=
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
/* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
@ -1189,13 +1184,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
svm_copy_lbrs(vmcb12, vmcb02);
svm_update_lbrv(vcpu);
} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
else
svm_copy_lbrs(vmcb01, vmcb02);
svm_update_lbrv(vcpu);
}
if (vnmi) {
if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)

View File

@ -806,60 +806,43 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
vmcb_mark_dirty(to_vmcb, VMCB_LBR);
}
static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
}
void svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
__svm_enable_lbrv(vcpu);
svm_recalc_lbr_msr_intercepts(vcpu);
/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
if (is_guest_mode(vcpu))
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
}
static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
static void __svm_disable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
svm_recalc_lbr_msr_intercepts(vcpu);
/*
* Move the LBR msrs back to the vmcb01 to avoid copying them
* on nested guest entries.
*/
if (is_guest_mode(vcpu))
svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
}
static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
{
/*
* If LBR virtualization is disabled, the LBR MSRs are always kept in
* vmcb01. If LBR virtualization is enabled and L1 is running VMs of
* its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
*/
return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
svm->vmcb01.ptr;
to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
}
void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) ||
(is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
if (enable_lbrv == current_enable_lbrv)
return;
if (enable_lbrv && !current_enable_lbrv)
__svm_enable_lbrv(vcpu);
else if (!enable_lbrv && current_enable_lbrv)
__svm_disable_lbrv(vcpu);
if (enable_lbrv)
svm_enable_lbrv(vcpu);
else
svm_disable_lbrv(vcpu);
/*
* During nested transitions, it is possible that the current VMCB has
* LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa).
* In this case, even though LBR_CTL does not need an update, intercepts
* do, so always recalculate the intercepts here.
*/
svm_recalc_lbr_msr_intercepts(vcpu);
}
void disable_nmi_singlestep(struct vcpu_svm *svm)
@ -921,6 +904,8 @@ static void svm_hardware_unsetup(void)
{
int cpu;
avic_hardware_unsetup();
sev_hardware_unsetup();
for_each_possible_cpu(cpu)
@ -2722,19 +2707,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->tsc_aux;
break;
case MSR_IA32_DEBUGCTLMSR:
msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
msr_info->data = svm->vmcb->save.dbgctl;
break;
case MSR_IA32_LASTBRANCHFROMIP:
msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
msr_info->data = svm->vmcb->save.br_from;
break;
case MSR_IA32_LASTBRANCHTOIP:
msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
msr_info->data = svm->vmcb->save.br_to;
break;
case MSR_IA32_LASTINTFROMIP:
msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
msr_info->data = svm->vmcb->save.last_excp_from;
break;
case MSR_IA32_LASTINTTOIP:
msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
msr_info->data = svm->vmcb->save.last_excp_to;
break;
case MSR_VM_HSAVE_PA:
msr_info->data = svm->nested.hsave_msr;
@ -3002,7 +2987,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & DEBUGCTL_RESERVED_BITS)
return 1;
svm_get_lbr_vmcb(svm)->save.dbgctl = data;
if (svm->vmcb->save.dbgctl == data)
break;
svm->vmcb->save.dbgctl = data;
vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
svm_update_lbrv(vcpu);
break;
case MSR_VM_HSAVE_PA:
@ -5386,12 +5375,6 @@ static __init int svm_hardware_setup(void)
svm_hv_hardware_setup();
for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
goto err;
}
enable_apicv = avic_hardware_setup();
if (!enable_apicv) {
enable_ipiv = false;
@ -5435,6 +5418,13 @@ static __init int svm_hardware_setup(void)
svm_set_cpu_caps();
kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_CD_NW_CLEARED;
for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
goto err;
}
return 0;
err:

View File

@ -329,7 +329,7 @@ struct vcpu_svm {
* back into remapped mode).
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
raw_spinlock_t ir_list_lock;
struct vcpu_sev_es_state sev_es;
@ -805,7 +805,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
)
bool __init avic_hardware_setup(void);
int avic_ga_log_notifier(u32 ga_tag);
void avic_hardware_unsetup(void);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);

View File

@ -98,7 +98,7 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK)
? PFERR_PRESENT_MASK : 0;
if (error_code & EPT_VIOLATION_GVA_IS_VALID)
if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID)
error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ?
PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;

View File

@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
case EXIT_REASON_NOTIFY:
/* Notify VM exit is not exposed to L1 */
return false;
case EXIT_REASON_SEAMCALL:
case EXIT_REASON_TDCALL:
/*
* SEAMCALL and TDCALL unconditionally VM-Exit, but aren't
* virtualized by KVM for L1 hypervisors, i.e. L1 should
* never want or expect such an exit.
*/
return false;
default:
return true;
}

View File

@ -6032,6 +6032,12 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
return 1;
}
static int handle_tdx_instruction(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
#ifndef CONFIG_X86_SGX_KVM
static int handle_encls(struct kvm_vcpu *vcpu)
{
@ -6157,6 +6163,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_ENCLS] = handle_encls,
[EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit,
[EXIT_REASON_NOTIFY] = handle_notify,
[EXIT_REASON_SEAMCALL] = handle_tdx_instruction,
[EXIT_REASON_TDCALL] = handle_tdx_instruction,
[EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm,
[EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm,
};

View File

@ -3874,15 +3874,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
/*
* Returns true if the MSR in question is managed via XSTATE, i.e. is context
* switched with the rest of guest FPU state. Note! S_CET is _not_ context
* switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS.
* Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields,
* the value saved/restored via XSTATE is always the host's value. That detail
* is _extremely_ important, as the guest's S_CET must _never_ be resident in
* hardware while executing in the host. Loading guest values for U_CET and
* PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to
* userspace, and PL[0-3]_SSP are only consumed when transitioning to lower
* privilege levels, i.e. are effectively only consumed by userspace as well.
* switched with the rest of guest FPU state.
*
* Note, S_CET is _not_ saved/restored via XSAVES/XRSTORS.
*/
static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr)
{
@ -3905,6 +3899,11 @@ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr)
* MSR that is managed via XSTATE. Note, the caller is responsible for doing
* the initial FPU load, this helper only ensures that guest state is resident
* in hardware (the kernel can load its FPU state in IRQ context).
*
* Note, loading guest values for U_CET and PL[0-3]_SSP while executing in the
* kernel is safe, as U_CET is specific to userspace, and PL[0-3]_SSP are only
* consumed when transitioning to lower privilege levels, i.e. are effectively
* only consumed by userspace as well.
*/
static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu,
struct msr_data *msr_info,
@ -11807,6 +11806,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
if (KVM_BUG_ON(vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm))
return;
/* Exclude PKRU, it's restored separately immediately after VM-Exit. */
fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true);
trace_kvm_fpu(1);
@ -11815,6 +11817,9 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
/* When vcpu_run ends, restore user space FPU context. */
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
if (KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm))
return;
fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false);
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
@ -12137,9 +12142,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
int r;
vcpu_load(vcpu);
if (kvm_mpx_supported())
kvm_load_guest_fpu(vcpu);
kvm_vcpu_srcu_read_lock(vcpu);
r = kvm_apic_accept_events(vcpu);
@ -12156,9 +12158,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
out:
kvm_vcpu_srcu_read_unlock(vcpu);
if (kvm_mpx_supported())
kvm_put_guest_fpu(vcpu);
vcpu_put(vcpu);
return r;
}
@ -12788,6 +12787,7 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
u64 xfeatures_mask;
bool fpu_in_use;
int i;
/*
@ -12811,12 +12811,22 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX);
/*
* All paths that lead to INIT are required to load the guest's FPU
* state (because most paths are buried in KVM_RUN).
* Unload guest FPU state (if necessary) before zeroing XSTATE fields
* as the kernel can only modify the state when its resident in memory,
* i.e. when it's not loaded into hardware.
*
* WARN if the vCPU's desire to run, i.e. whether or not its in KVM_RUN,
* doesn't match the loaded/in-use state of the FPU, as KVM_RUN is the
* only path that can trigger INIT emulation _and_ loads FPU state, and
* KVM_RUN should _always_ load FPU state.
*/
WARN_ON_ONCE(vcpu->wants_to_run != fpstate->in_use);
fpu_in_use = fpstate->in_use;
if (fpu_in_use)
kvm_put_guest_fpu(vcpu);
for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX)
fpstate_clear_xstate_component(fpstate, i);
if (fpu_in_use)
kvm_load_guest_fpu(vcpu);
}

View File

@ -152,26 +152,49 @@ ATTRIBUTE_GROUPS(memory_range);
static __init int add_boot_memory_ranges(void)
{
struct kobject *pkobj, *kobj;
struct kobject *pkobj, *kobj, **kobjs;
int ret = -EINVAL;
char *name;
char name[16];
int i;
pkobj = kobject_create_and_add("memory_ranges", acpi_kobj);
if (!pkobj)
return -ENOMEM;
for (int i = 0; i < mrrm_mem_entry_num; i++) {
name = kasprintf(GFP_KERNEL, "range%d", i);
if (!name) {
ret = -ENOMEM;
break;
kobjs = kcalloc(mrrm_mem_entry_num, sizeof(*kobjs), GFP_KERNEL);
if (!kobjs) {
kobject_put(pkobj);
return -ENOMEM;
}
for (i = 0; i < mrrm_mem_entry_num; i++) {
scnprintf(name, sizeof(name), "range%d", i);
kobj = kobject_create_and_add(name, pkobj);
if (!kobj) {
ret = -ENOMEM;
goto cleanup;
}
ret = sysfs_create_groups(kobj, memory_range_groups);
if (ret)
return ret;
if (ret) {
kobject_put(kobj);
goto cleanup;
}
kobjs[i] = kobj;
}
kfree(kobjs);
return 0;
cleanup:
for (int j = 0; j < i; j++) {
if (kobjs[j]) {
sysfs_remove_groups(kobjs[j], memory_range_groups);
kobject_put(kobjs[j]);
}
}
kfree(kobjs);
kobject_put(pkobj);
return ret;
}

View File

@ -460,7 +460,7 @@ bool acpi_cpc_valid(void)
if (acpi_disabled)
return false;
for_each_present_cpu(cpu) {
for_each_online_cpu(cpu) {
cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
if (!cpc_ptr)
return false;
@ -476,7 +476,7 @@ bool cppc_allow_fast_switch(void)
struct cpc_desc *cpc_ptr;
int cpu;
for_each_present_cpu(cpu) {
for_each_online_cpu(cpu) {
cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF];
if (!CPC_IN_SYSTEM_MEMORY(desired_reg) &&
@ -750,7 +750,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
}
/*
* Disregard _CPC if the number of entries in the return pachage is not
* Disregard _CPC if the number of entries in the return package is not
* as expected, but support future revisions being proper supersets of
* the v3 and only causing more entries to be returned by _CPC.
*/
@ -1435,7 +1435,7 @@ bool cppc_perf_ctrs_in_pcc(void)
{
int cpu;
for_each_present_cpu(cpu) {
for_each_online_cpu(cpu) {
struct cpc_register_resource *ref_perf_reg;
struct cpc_desc *cpc_desc;

View File

@ -874,10 +874,32 @@ static void hmat_register_target_devices(struct memory_target *target)
}
}
static void hmat_register_target(struct memory_target *target)
static void hmat_hotplug_target(struct memory_target *target)
{
int nid = pxm_to_node(target->memory_pxm);
/*
* Skip offline nodes. This can happen when memory marked EFI_MEMORY_SP,
* "specific purpose", is applied to all the memory in a proximity
* domain leading to * the node being marked offline / unplugged, or if
* memory-only "hotplug" node is offline.
*/
if (nid == NUMA_NO_NODE || !node_online(nid))
return;
guard(mutex)(&target_lock);
if (target->registered)
return;
hmat_register_target_initiators(target);
hmat_register_target_cache(target);
hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL);
hmat_register_target_perf(target, ACCESS_COORDINATE_CPU);
target->registered = true;
}
static void hmat_register_target(struct memory_target *target)
{
/*
* Devices may belong to either an offline or online
* node, so unconditionally add them.
@ -895,25 +917,7 @@ static void hmat_register_target(struct memory_target *target)
}
mutex_unlock(&target_lock);
/*
* Skip offline nodes. This can happen when memory
* marked EFI_MEMORY_SP, "specific purpose", is applied
* to all the memory in a proximity domain leading to
* the node being marked offline / unplugged, or if
* memory-only "hotplug" node is offline.
*/
if (nid == NUMA_NO_NODE || !node_online(nid))
return;
mutex_lock(&target_lock);
if (!target->registered) {
hmat_register_target_initiators(target);
hmat_register_target_cache(target);
hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL);
hmat_register_target_perf(target, ACCESS_COORDINATE_CPU);
target->registered = true;
}
mutex_unlock(&target_lock);
hmat_hotplug_target(target);
}
static void hmat_register_targets(void)
@ -939,7 +943,7 @@ static int hmat_callback(struct notifier_block *self,
if (!target)
return NOTIFY_OK;
hmat_register_target(target);
hmat_hotplug_target(target);
return NOTIFY_OK;
}

View File

@ -237,7 +237,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header)
struct acpi_srat_generic_affinity *p =
(struct acpi_srat_generic_affinity *)header;
if (p->device_handle_type == 0) {
if (p->device_handle_type == 1) {
/*
* For pci devices this may be the only place they
* are assigned a proximity domain

View File

@ -487,7 +487,7 @@ static int acpi_battery_read(struct acpi_battery *battery)
if (result)
return result;
battery->present = state & (1 << battery->id);
battery->present = !!(state & (1 << battery->id));
if (!battery->present)
return 0;

Some files were not shown because too many files have changed in this diff Show More