From e3baa5d4b361276efeb87b20d8beced451a7dbd5 Mon Sep 17 00:00:00 2001 From: Jinqian Yang Date: Sat, 27 Dec 2025 17:24:48 +0800 Subject: [PATCH 01/86] arm64: Add support for TSV110 Spectre-BHB mitigation The TSV110 processor is vulnerable to the Spectre-BHB (Branch History Buffer) attack, which can be exploited to leak information through branch prediction side channels. This commit adds the MIDR of TSV110 to the list for software mitigation. Signed-off-by: Jinqian Yang Reviewed-by: Zenghui Yu Signed-off-by: Will Deacon --- arch/arm64/kernel/proton-pack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 80a580e019c5..b3801f532b10 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -887,6 +887,7 @@ static u8 spectre_bhb_loop_affected(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), {}, }; static const struct midr_range spectre_bhb_k24_list[] = { From 334a1a1e1a5f8b4b172683e7507cfec566313a7c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 6 Jan 2026 17:37:05 +0000 Subject: [PATCH 02/86] KVM: arm64: Fix comment in fpsimd_lazy_switch_to_host() The comment in fpsimd_lazy_switch_to_host() erroneously says guest traps for FPSIMD/SVE/SME are disabled by fpsimd_lazy_switch_to_guest(). In reality, the traps are disabled by __activate_cptr_traps(), and fpsimd_lazy_switch_to_guest() only manipulates the SVE vector length. This was mistake; I accidentally copy+pasted the wrong function name in commit: 59419f10045b ("KVM: arm64: Eagerly switch ZCR_EL{1,2}") Fix the comment. Fixes: 59419f10045b ("KVM: arm64: Eagerly switch ZCR_EL{1,2}") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Link: https://patch.msgid.link/20260106173707.3292074-2-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index c5d5e5b86eaf..8dce3da85da3 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -495,7 +495,7 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) /* * When the guest owns the FP regs, we know that guest+hyp traps for * any FPSIMD/SVE/SME features exposed to the guest have been disabled - * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() + * by either __activate_cptr_traps() or kvm_hyp_handle_fpsimd() * prior to __guest_entry(). As __guest_entry() guarantees a context * synchronization event, we don't need an ISB here to avoid taking * traps for anything that was exposed to the guest. From acd8bfaa9384300a509c8aff5ee4a59a06eb2b2b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 6 Jan 2026 17:37:06 +0000 Subject: [PATCH 03/86] KVM: arm64: Shuffle KVM_HOST_DATA_FLAG_* indices There's a gap in the KVM_HOST_DATA_FLAG_* indices since the removal of KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED and KVM_HOST_DATA_FLAG_HOST_SME_ENABLED in commits: * 459f059be702 ("KVM: arm64: Remove VHE host restore of CPACR_EL1.ZEN") * 407a99c4654e ("KVM: arm64: Remove VHE host restore of CPACR_EL1.SMEN") Shuffle the indices to remove the gap, as Oliver requested at the time of the removals: https://lore.kernel.org/linux-arm-kernel/Z6qC4qn47ONfDCSH@linux.dev/ There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Reviewed-by: Mark Brown Link: https://patch.msgid.link/20260106173707.3292074-3-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac7f970c7883..4c8b4274f669 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -710,11 +710,11 @@ struct cpu_sve_state { struct kvm_host_data { #define KVM_HOST_DATA_FLAG_HAS_SPE 0 #define KVM_HOST_DATA_FLAG_HAS_TRBE 1 -#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 -#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 -#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6 -#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7 -#define KVM_HOST_DATA_FLAG_HAS_BRBE 8 +#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 2 +#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 3 +#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 4 +#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 5 +#define KVM_HOST_DATA_FLAG_HAS_BRBE 6 unsigned long flags; struct kvm_cpu_context host_ctxt; From b1a9a9b96169bf1f9cb94b8aa33601996fad1e9c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 6 Jan 2026 17:37:07 +0000 Subject: [PATCH 04/86] KVM: arm64: Remove ISB after writing FPEXC32_EL2 The value of FPEX32_EL2 has no effect on execution in AArch64 state, and consequently there's no need for an ISB after writing to it in the hyp code (which executes in AArch64 state). When performing an exception return to AArch32 state, the exception return will provide the necessary context synchronization event. Remove the redundant ISB. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Link: https://patch.msgid.link/20260106173707.3292074-4-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 8dce3da85da3..91aa1862349c 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -59,10 +59,8 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to * it will cause an exception. */ - if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) write_sysreg(1 << 30, fpexc32_el2); - isb(); - } } static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) From 8e8eb10c107e67f22f87cd8c963d30ea73f04d5f Mon Sep 17 00:00:00 2001 From: Petteri Kangaslampi Date: Tue, 13 Jan 2026 19:44:09 +0000 Subject: [PATCH 05/86] KVM: arm64: Calculate hyp VA size only once Calculate the hypervisor's VA size only once to maintain consistency between the memory layout and MMU initialization logic. Previously the two would be inconsistent when the kernel is configured for less than IDMAP_VA_BITS of VA space. Signed-off-by: Petteri Kangaslampi Tested-by: Vincent Donnefort Link: https://patch.msgid.link/20260113194409.2970324-2-pekangas@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_mmu.h | 3 ++- arch/arm64/kvm/arm.c | 4 ++-- arch/arm64/kvm/mmu.c | 28 ++++----------------------- arch/arm64/kvm/va_layout.c | 33 +++++++++++++++++++++++++++----- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 2dc5e6e742bb..d968aca0461a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -103,6 +103,7 @@ alternative_cb_end void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void kvm_compute_layout(void); +u32 kvm_hyp_va_bits(void); void kvm_apply_hyp_relocations(void); #define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset) @@ -185,7 +186,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -int __init kvm_mmu_init(u32 *hyp_va_bits); +int __init kvm_mmu_init(u32 hyp_va_bits); static inline void *__kvm_vector_slot2addr(void *base, enum arm64_hyp_spectre_vector slot) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4f80da0c0d1d..4703f0e15102 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2568,7 +2568,7 @@ static void pkvm_hyp_init_ptrauth(void) /* Inits Hyp-mode on all online CPUs */ static int __init init_hyp_mode(void) { - u32 hyp_va_bits; + u32 hyp_va_bits = kvm_hyp_va_bits(); int cpu; int err = -ENOMEM; @@ -2582,7 +2582,7 @@ static int __init init_hyp_mode(void) /* * Allocate Hyp PGD and setup Hyp identity mapping */ - err = kvm_mmu_init(&hyp_va_bits); + err = kvm_mmu_init(hyp_va_bits); if (err) goto out_err; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 48d7c372a4cd..d5a506c99f73 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2284,11 +2284,9 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { .virt_to_phys = kvm_host_pa, }; -int __init kvm_mmu_init(u32 *hyp_va_bits) +int __init kvm_mmu_init(u32 hyp_va_bits) { int err; - u32 idmap_bits; - u32 kernel_bits; hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); @@ -2302,25 +2300,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - /* - * The ID map is always configured for 48 bits of translation, which - * may be fewer than the number of VA bits used by the regular kernel - * stage 1, when VA_BITS=52. - * - * At EL2, there is only one TTBR register, and we can't switch between - * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom - * line: we need to use the extended range with *both* our translation - * tables. - * - * So use the maximum of the idmap VA bits and the regular kernel stage - * 1 VA bits to assure that the hypervisor can both ID map its code page - * and map any kernel memory. - */ - idmap_bits = IDMAP_VA_BITS; - kernel_bits = vabits_actual; - *hyp_va_bits = max(idmap_bits, kernel_bits); - - kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits); + kvm_debug("Using %u-bit virtual addresses at EL2\n", hyp_va_bits); kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); kvm_debug("HYP VA range: %lx:%lx\n", kern_hyp_va(PAGE_OFFSET), @@ -2345,7 +2325,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) goto out; } - err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops); + err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits, &kvm_hyp_mm_ops); if (err) goto out_free_pgtable; @@ -2354,7 +2334,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) goto out_destroy_pgtable; io_map_base = hyp_idmap_start; - __hyp_va_bits = *hyp_va_bits; + __hyp_va_bits = hyp_va_bits; return 0; out_destroy_pgtable: diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 91b22a014610..2346f9435a71 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -46,9 +46,31 @@ static void init_hyp_physvirt_offset(void) hyp_physvirt_offset = (s64)__pa(kern_va) - (s64)hyp_va; } +/* + * Calculate the actual VA size used by the hypervisor + */ +__init u32 kvm_hyp_va_bits(void) +{ + /* + * The ID map is always configured for 48 bits of translation, which may + * be different from the number of VA bits used by the regular kernel + * stage 1. + * + * At EL2, there is only one TTBR register, and we can't switch between + * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom + * line: we need to use the extended range with *both* our translation + * tables. + * + * So use the maximum of the idmap VA bits and the regular kernel stage + * 1 VA bits as the hypervisor VA size to assure that the hypervisor can + * both ID map its code page and map any kernel memory. + */ + return max(IDMAP_VA_BITS, vabits_actual); +} + /* * We want to generate a hyp VA with the following format (with V == - * vabits_actual): + * hypervisor VA bits): * * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 * --------------------------------------------------------- @@ -61,10 +83,11 @@ __init void kvm_compute_layout(void) { phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); u64 hyp_va_msb; + u32 hyp_va_bits = kvm_hyp_va_bits(); /* Where is my RAM region? */ - hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); - hyp_va_msb ^= BIT(vabits_actual - 1); + hyp_va_msb = idmap_addr & BIT(hyp_va_bits - 1); + hyp_va_msb ^= BIT(hyp_va_bits - 1); tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); @@ -72,9 +95,9 @@ __init void kvm_compute_layout(void) va_mask = GENMASK_ULL(tag_lsb - 1, 0); tag_val = hyp_va_msb; - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (vabits_actual - 1)) { + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (hyp_va_bits - 1)) { /* We have some free bits to insert a random tag. */ - tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); + tag_val |= get_random_long() & GENMASK_ULL(hyp_va_bits - 2, tag_lsb); } tag_val >>= tag_lsb; From 4b16ad0bf821d4aceb050e9f569dc329883f1c5b Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 12 Jan 2026 16:04:13 +0000 Subject: [PATCH 06/86] KVM: arm64: Fix missing include Include for kvm_arm_hyp_stack_base declaration which fixes the following sparse warning: arch/arm64/kvm/arm.c:63:1: warning: symbol 'kvm_arm_hyp_stack_base' was not declared. Should it be static? Signed-off-by: Ben Dooks Link: https://patch.msgid.link/20260112160413.603493-1-ben.dooks@codethink.co.uk Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4703f0e15102..0e1d18b90376 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include From f1640174c8a769511641bfd5b7da16c4943e2c64 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 17:30:20 +0000 Subject: [PATCH 07/86] arm64: Convert ID_AA64MMFR0_EL1.TGRAN{4,16,64}_2 to UnsignedEnum ID_AA64MMFR0_EL1.TGRAN{4,16,64}_2 are currently represented as unordered enumerations. However, the architecture treats them as Unsigned, as hinted to by the MRS data: (FEAT_S2TGran4K <=> (((UInt(ID_AA64MMFR0_EL1.TGran4_2) == 0) && FEAT_TGran4K) || (UInt(ID_AA64MMFR0_EL1.TGran4_2) >= 2)))) and similar descriptions exist for 16 and 64k. This is also confirmed by D24.1.3.3 ("Alternative ID scheme used for ID_AA64MMFR0_EL1 stage 2 granule sizes") in the L.b revision of the ARM ARM. Turn these fields into UnsignedEnum so that we can use the above description more or less literally. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20251210173024.561160-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/tools/sysreg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 8921b51866d6..063b13b7c731 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2098,18 +2098,18 @@ UnsignedEnum 47:44 EXS 0b0000 NI 0b0001 IMP EndEnum -Enum 43:40 TGRAN4_2 +UnsignedEnum 43:40 TGRAN4_2 0b0000 TGRAN4 0b0001 NI 0b0010 IMP 0b0011 52_BIT EndEnum -Enum 39:36 TGRAN64_2 +UnsignedEnum 39:36 TGRAN64_2 0b0000 TGRAN64 0b0001 NI 0b0010 IMP EndEnum -Enum 35:32 TGRAN16_2 +UnsignedEnum 35:32 TGRAN16_2 0b0000 TGRAN16 0b0001 NI 0b0010 IMP From a035001dea37b885efb934e25057430ae1193d0a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 17:30:21 +0000 Subject: [PATCH 08/86] arm64: Convert VTCR_EL2 to sysreg infratructure Our definition of VTCR_EL2 is both partial (tons of fields are missing) and totally inconsistent (some constants are shifted, some are not). They are also expressed in terms of TCR, which is rather inconvenient. Replace the ad-hoc definitions with the the generated version. This results in a bunch of additional changes to make the code with the unshifted nature of generated enumerations. The register data was extracted from the BSD licenced AARCHMRS (AARCHMRS_OPENSOURCE_A_profile_FAT-2025-09_ASL0). Reviewed-by: Alexandru Elisei Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20251210173024.561160-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 52 +++++++---------------------- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/kvm/hyp/pgtable.c | 8 ++--- arch/arm64/kvm/nested.c | 8 ++--- arch/arm64/tools/sysreg | 57 ++++++++++++++++++++++++++++++++ 5 files changed, 76 insertions(+), 50 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e500600e4b9b..dfdbd2b65db9 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -124,37 +124,7 @@ #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK) -/* VTCR_EL2 Registers bits */ -#define VTCR_EL2_DS TCR_EL2_DS -#define VTCR_EL2_RES1 (1U << 31) -#define VTCR_EL2_HD (1 << 22) -#define VTCR_EL2_HA (1 << 21) -#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT -#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK -#define VTCR_EL2_TG0_MASK TCR_TG0_MASK -#define VTCR_EL2_TG0_4K TCR_TG0_4K -#define VTCR_EL2_TG0_16K TCR_TG0_16K -#define VTCR_EL2_TG0_64K TCR_TG0_64K -#define VTCR_EL2_SH0_MASK TCR_SH0_MASK -#define VTCR_EL2_SH0_INNER TCR_SH0_INNER -#define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK -#define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA -#define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK -#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA -#define VTCR_EL2_SL0_SHIFT 6 -#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) -#define VTCR_EL2_T0SZ_MASK 0x3f -#define VTCR_EL2_VS_SHIFT 19 -#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT) -#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT) - -#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x) - /* - * We configure the Stage-2 page tables to always restrict the IPA space to be - * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are - * not known to exist and will break with this configuration. - * * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu. * * Note that when using 4K pages, we concatenate two first level page tables @@ -162,9 +132,6 @@ * */ -#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ - VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) - /* * VTCR_EL2:SL0 indicates the entry level for Stage2 translation. * Interestingly, it depends on the page size. @@ -196,30 +163,35 @@ */ #ifdef CONFIG_ARM64_64K_PAGES -#define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K +#define VTCR_EL2_TGRAN 64K #define VTCR_EL2_TGRAN_SL0_BASE 3UL #elif defined(CONFIG_ARM64_16K_PAGES) -#define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K +#define VTCR_EL2_TGRAN 16K #define VTCR_EL2_TGRAN_SL0_BASE 3UL #else /* 4K */ -#define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K +#define VTCR_EL2_TGRAN 4K #define VTCR_EL2_TGRAN_SL0_BASE 2UL #endif #define VTCR_EL2_LVLS_TO_SL0(levels) \ - ((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT) + FIELD_PREP(VTCR_EL2_SL0, (VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels)))) #define VTCR_EL2_SL0_TO_LVLS(sl0) \ ((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE) #define VTCR_EL2_LVLS(vtcr) \ - VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT) + VTCR_EL2_SL0_TO_LVLS(FIELD_GET(VTCR_EL2_SL0, (vtcr))) -#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN) -#define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK)) +#define VTCR_EL2_FLAGS (SYS_FIELD_PREP_ENUM(VTCR_EL2, SH0, INNER) | \ + SYS_FIELD_PREP_ENUM(VTCR_EL2, ORGN0, WBWA) | \ + SYS_FIELD_PREP_ENUM(VTCR_EL2, IRGN0, WBWA) | \ + SYS_FIELD_PREP_ENUM(VTCR_EL2, TG0, VTCR_EL2_TGRAN) | \ + VTCR_EL2_RES1) + +#define VTCR_EL2_IPA(vtcr) (64 - FIELD_GET(VTCR_EL2_T0SZ, (vtcr))) /* * ARM VMSAv8-64 defines an algorithm for finding the translation table diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 106b15eb232a..939f9c5bbae6 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -517,7 +517,6 @@ #define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1) #define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2) #define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) -#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 9abc0a6cf448..a99a7d6e7d0c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -583,8 +583,8 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) u64 vtcr = VTCR_EL2_FLAGS; s8 lvls; - vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT; - vtcr |= VTCR_EL2_T0SZ(phys_shift); + vtcr |= FIELD_PREP(VTCR_EL2_PS, kvm_get_parange(mmfr0)); + vtcr |= FIELD_PREP(VTCR_EL2_T0SZ, (UL(64) - phys_shift)); /* * Use a minimum 2 level page table to prevent splitting * host PMD huge pages at stage2. @@ -624,9 +624,7 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_DS; /* Set the vmid bits */ - vtcr |= (get_vmid_bits(mmfr1) == 16) ? - VTCR_EL2_VS_16BIT : - VTCR_EL2_VS_8BIT; + vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS : 0; return vtcr; } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index cdeeb8f09e72..522fa3cc9578 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -377,7 +377,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) { wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK; - switch (vtcr & VTCR_EL2_TG0_MASK) { + switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { case VTCR_EL2_TG0_4K: wi->pgshift = 12; break; case VTCR_EL2_TG0_16K: @@ -513,7 +513,7 @@ static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr) lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(mmu)->mmu_lock); - switch (vtcr & VTCR_EL2_TG0_MASK) { + switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { case VTCR_EL2_TG0_4K: ttl = (TLBI_TTL_TG_4K << 2); break; @@ -530,7 +530,7 @@ static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr) again: /* Iteratively compute the block sizes for a particular granule size */ - switch (vtcr & VTCR_EL2_TG0_MASK) { + switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { case VTCR_EL2_TG0_4K: if (sz < SZ_4K) sz = SZ_4K; else if (sz < SZ_2M) sz = SZ_2M; @@ -593,7 +593,7 @@ unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val) if (!max_size) { /* Compute the maximum extent of the invalidation */ - switch (mmu->tlb_vtcr & VTCR_EL2_TG0_MASK) { + switch (FIELD_GET(VTCR_EL2_TG0_MASK, mmu->tlb_vtcr)) { case VTCR_EL2_TG0_4K: max_size = SZ_1G; break; diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 063b13b7c731..a0f6249bd4f9 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -4400,6 +4400,63 @@ Field 56:12 BADDR Res0 11:0 EndSysreg +Sysreg VTCR_EL2 3 4 2 1 2 +Res0 63:46 +Field 45 HDBSS +Field 44 HAFT +Res0 43:42 +Field 41 TL0 +Field 40 GCSH +Res0 39 +Field 38 D128 +Field 37 S2POE +Field 36 S2PIE +Field 35 TL1 +Field 34 AssuredOnly +Field 33 SL2 +Field 32 DS +Res1 31 +Field 30 NSA +Field 29 NSW +Field 28 HWU62 +Field 27 HWU61 +Field 26 HWU60 +Field 25 HWU59 +Res0 24:23 +Field 22 HD +Field 21 HA +Res0 20 +Enum 19 VS + 0b0 8BIT + 0b1 16BIT +EndEnum +Field 18:16 PS +Enum 15:14 TG0 + 0b00 4K + 0b01 64K + 0b10 16K +EndEnum +Enum 13:12 SH0 + 0b00 NONE + 0b01 OUTER + 0b11 INNER +EndEnum +Enum 11:10 ORGN0 + 0b00 NC + 0b01 WBWA + 0b10 WT + 0b11 WBnWA +EndEnum +Enum 9:8 IRGN0 + 0b00 NC + 0b01 WBWA + 0b10 WT + 0b11 WBnWA +EndEnum +Field 7:6 SL0 +Field 5:0 T0SZ +EndSysreg + Sysreg GCSCR_EL2 3 4 2 5 0 Fields GCSCR_ELx EndSysreg From c259d763e6b09a463c85ff6b1d20ede92da48d24 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 17:30:22 +0000 Subject: [PATCH 09/86] KVM: arm64: Account for RES1 bits in DECLARE_FEAT_MAP() and co None of the registers we manage in the feature dependency infrastructure so far has any RES1 bit. This is about to change, as VTCR_EL2 has its bit 31 being RES1. In order to not fail the consistency checks by not describing a bit, add RES1 bits to the set of immutable bits. This requires some extra surgery for the FGT handling, as we now need to track RES1 bits there as well. There are no RES1 FGT bits *yet*. Watch this space. Reviewed-by: Fuad Tabba Tested-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Tested-by: Fuad Tabba Link: https://patch.msgid.link/20251210173024.561160-5-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/config.c | 25 ++++++++------- arch/arm64/kvm/emulate-nested.c | 53 +++++++++++++++++-------------- 3 files changed, 44 insertions(+), 35 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac7f970c7883..b552a1e03848 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -638,6 +638,7 @@ struct fgt_masks { u64 mask; u64 nmask; u64 res0; + u64 res1; }; extern struct fgt_masks hfgrtr_masks; diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 24bb3f36e9d5..3845b188551b 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -16,14 +16,14 @@ */ struct reg_bits_to_feat_map { union { - u64 bits; - u64 *res0p; + u64 bits; + struct fgt_masks *masks; }; #define NEVER_FGU BIT(0) /* Can trap, but never UNDEF */ #define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */ #define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */ -#define RES0_POINTER BIT(3) /* Pointer to RES0 value instead of bits */ +#define MASKS_POINTER BIT(3) /* Pointer to fgt_masks struct instead of bits */ unsigned long flags; @@ -92,8 +92,8 @@ struct reg_feat_map_desc { #define NEEDS_FEAT_FIXED(m, ...) \ __NEEDS_FEAT_FLAG(m, FIXED_VALUE, bits, __VA_ARGS__, 0) -#define NEEDS_FEAT_RES0(p, ...) \ - __NEEDS_FEAT_FLAG(p, RES0_POINTER, res0p, __VA_ARGS__) +#define NEEDS_FEAT_MASKS(p, ...) \ + __NEEDS_FEAT_FLAG(p, MASKS_POINTER, masks, __VA_ARGS__) /* * Declare the dependency between a set of bits and a set of features, @@ -109,19 +109,20 @@ struct reg_feat_map_desc { #define DECLARE_FEAT_MAP(n, r, m, f) \ struct reg_feat_map_desc n = { \ .name = #r, \ - .feat_map = NEEDS_FEAT(~r##_RES0, f), \ + .feat_map = NEEDS_FEAT(~(r##_RES0 | \ + r##_RES1), f), \ .bit_feat_map = m, \ .bit_feat_map_sz = ARRAY_SIZE(m), \ } /* * Specialised version of the above for FGT registers that have their - * RES0 masks described as struct fgt_masks. + * RESx masks described as struct fgt_masks. */ #define DECLARE_FEAT_MAP_FGT(n, msk, m, f) \ struct reg_feat_map_desc n = { \ .name = #msk, \ - .feat_map = NEEDS_FEAT_RES0(&msk.res0, f),\ + .feat_map = NEEDS_FEAT_MASKS(&msk, f), \ .bit_feat_map = m, \ .bit_feat_map_sz = ARRAY_SIZE(m), \ } @@ -1168,21 +1169,21 @@ static const DECLARE_FEAT_MAP(mdcr_el2_desc, MDCR_EL2, mdcr_el2_feat_map, FEAT_AA64EL2); static void __init check_feat_map(const struct reg_bits_to_feat_map *map, - int map_size, u64 res0, const char *str) + int map_size, u64 resx, const char *str) { u64 mask = 0; for (int i = 0; i < map_size; i++) mask |= map[i].bits; - if (mask != ~res0) + if (mask != ~resx) kvm_err("Undefined %s behaviour, bits %016llx\n", - str, mask ^ ~res0); + str, mask ^ ~resx); } static u64 reg_feat_map_bits(const struct reg_bits_to_feat_map *map) { - return map->flags & RES0_POINTER ? ~(*map->res0p) : map->bits; + return map->flags & MASKS_POINTER ? (map->masks->mask | map->masks->nmask) : map->bits; } static void __init check_reg_desc(const struct reg_feat_map_desc *r) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 834f13fb1fb7..75d49f83342a 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2105,23 +2105,24 @@ static u32 encoding_next(u32 encoding) } #define FGT_MASKS(__n, __m) \ - struct fgt_masks __n = { .str = #__m, .res0 = __m, } + struct fgt_masks __n = { .str = #__m, .res0 = __m ## _RES0, .res1 = __m ## _RES1 } -FGT_MASKS(hfgrtr_masks, HFGRTR_EL2_RES0); -FGT_MASKS(hfgwtr_masks, HFGWTR_EL2_RES0); -FGT_MASKS(hfgitr_masks, HFGITR_EL2_RES0); -FGT_MASKS(hdfgrtr_masks, HDFGRTR_EL2_RES0); -FGT_MASKS(hdfgwtr_masks, HDFGWTR_EL2_RES0); -FGT_MASKS(hafgrtr_masks, HAFGRTR_EL2_RES0); -FGT_MASKS(hfgrtr2_masks, HFGRTR2_EL2_RES0); -FGT_MASKS(hfgwtr2_masks, HFGWTR2_EL2_RES0); -FGT_MASKS(hfgitr2_masks, HFGITR2_EL2_RES0); -FGT_MASKS(hdfgrtr2_masks, HDFGRTR2_EL2_RES0); -FGT_MASKS(hdfgwtr2_masks, HDFGWTR2_EL2_RES0); +FGT_MASKS(hfgrtr_masks, HFGRTR_EL2); +FGT_MASKS(hfgwtr_masks, HFGWTR_EL2); +FGT_MASKS(hfgitr_masks, HFGITR_EL2); +FGT_MASKS(hdfgrtr_masks, HDFGRTR_EL2); +FGT_MASKS(hdfgwtr_masks, HDFGWTR_EL2); +FGT_MASKS(hafgrtr_masks, HAFGRTR_EL2); +FGT_MASKS(hfgrtr2_masks, HFGRTR2_EL2); +FGT_MASKS(hfgwtr2_masks, HFGWTR2_EL2); +FGT_MASKS(hfgitr2_masks, HFGITR2_EL2); +FGT_MASKS(hdfgrtr2_masks, HDFGRTR2_EL2); +FGT_MASKS(hdfgwtr2_masks, HDFGWTR2_EL2); static __init bool aggregate_fgt(union trap_config tc) { struct fgt_masks *rmasks, *wmasks; + u64 rresx, wresx; switch (tc.fgt) { case HFGRTR_GROUP: @@ -2154,24 +2155,27 @@ static __init bool aggregate_fgt(union trap_config tc) break; } + rresx = rmasks->res0 | rmasks->res1; + if (wmasks) + wresx = wmasks->res0 | wmasks->res1; + /* * A bit can be reserved in either the R or W register, but * not both. */ - if ((BIT(tc.bit) & rmasks->res0) && - (!wmasks || (BIT(tc.bit) & wmasks->res0))) + if ((BIT(tc.bit) & rresx) && (!wmasks || (BIT(tc.bit) & wresx))) return false; if (tc.pol) - rmasks->mask |= BIT(tc.bit) & ~rmasks->res0; + rmasks->mask |= BIT(tc.bit) & ~rresx; else - rmasks->nmask |= BIT(tc.bit) & ~rmasks->res0; + rmasks->nmask |= BIT(tc.bit) & ~rresx; if (wmasks) { if (tc.pol) - wmasks->mask |= BIT(tc.bit) & ~wmasks->res0; + wmasks->mask |= BIT(tc.bit) & ~wresx; else - wmasks->nmask |= BIT(tc.bit) & ~wmasks->res0; + wmasks->nmask |= BIT(tc.bit) & ~wresx; } return true; @@ -2180,7 +2184,6 @@ static __init bool aggregate_fgt(union trap_config tc) static __init int check_fgt_masks(struct fgt_masks *masks) { unsigned long duplicate = masks->mask & masks->nmask; - u64 res0 = masks->res0; int ret = 0; if (duplicate) { @@ -2194,10 +2197,14 @@ static __init int check_fgt_masks(struct fgt_masks *masks) ret = -EINVAL; } - masks->res0 = ~(masks->mask | masks->nmask); - if (masks->res0 != res0) - kvm_info("Implicit %s = %016llx, expecting %016llx\n", - masks->str, masks->res0, res0); + if ((masks->res0 | masks->res1 | masks->mask | masks->nmask) != GENMASK(63, 0) || + (masks->res0 & masks->res1) || (masks->res0 & masks->mask) || + (masks->res0 & masks->nmask) || (masks->res1 & masks->mask) || + (masks->res1 & masks->nmask) || (masks->mask & masks->nmask)) { + kvm_info("Inconsistent masks for %s (%016llx, %016llx, %016llx, %016llx)\n", + masks->str, masks->res0, masks->res1, masks->mask, masks->nmask); + masks->res0 = ~(masks->res1 | masks->mask | masks->nmask); + } return ret; } From 9d2de51825598fc8e76f596c16368362753d8021 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 17:30:23 +0000 Subject: [PATCH 10/86] KVM: arm64: Convert VTCR_EL2 to config-driven sanitisation Describe all the VTCR_EL2 fields and their respective configurations, making sure that we correctly ignore the bits that are not defined for a given guest configuration. Reviewed-by: Alexandru Elisei Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20251210173024.561160-6-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 69 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/nested.c | 3 +- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 3845b188551b..9c04f895d376 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -141,6 +141,7 @@ struct reg_feat_map_desc { #define FEAT_AA64EL1 ID_AA64PFR0_EL1, EL1, IMP #define FEAT_AA64EL2 ID_AA64PFR0_EL1, EL2, IMP #define FEAT_AA64EL3 ID_AA64PFR0_EL1, EL3, IMP +#define FEAT_SEL2 ID_AA64PFR0_EL1, SEL2, IMP #define FEAT_AIE ID_AA64MMFR3_EL1, AIE, IMP #define FEAT_S2POE ID_AA64MMFR3_EL1, S2POE, IMP #define FEAT_S1POE ID_AA64MMFR3_EL1, S1POE, IMP @@ -202,6 +203,8 @@ struct reg_feat_map_desc { #define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP #define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP #define FEAT_HAFT ID_AA64MMFR1_EL1, HAFDBS, HAFT +#define FEAT_HDBSS ID_AA64MMFR1_EL1, HAFDBS, HDBSS +#define FEAT_HPDS2 ID_AA64MMFR1_EL1, HPDS, HPDS2 #define FEAT_BTI ID_AA64PFR1_EL1, BT, IMP #define FEAT_ExS ID_AA64MMFR0_EL1, EXS, IMP #define FEAT_IESB ID_AA64MMFR2_EL1, IESB, IMP @@ -219,6 +222,7 @@ struct reg_feat_map_desc { #define FEAT_FGT2 ID_AA64MMFR0_EL1, FGT, FGT2 #define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP #define FEAT_HCX ID_AA64MMFR1_EL1, HCX, IMP +#define FEAT_S2PIE ID_AA64MMFR3_EL1, S2PIE, IMP static bool not_feat_aa64el3(struct kvm *kvm) { @@ -362,6 +366,28 @@ static bool feat_pmuv3p9(struct kvm *kvm) return check_pmu_revision(kvm, V3P9); } +#define has_feat_s2tgran(k, s) \ + ((kvm_has_feat_enum(kvm, ID_AA64MMFR0_EL1, TGRAN##s##_2, TGRAN##s) && \ + kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN##s, IMP)) || \ + kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN##s##_2, IMP)) + +static bool feat_lpa2(struct kvm *kvm) +{ + return ((kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT) || + !kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN4, IMP)) && + (kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT) || + !kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN16, IMP)) && + (kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN4_2, 52_BIT) || + !has_feat_s2tgran(kvm, 4)) && + (kvm_has_feat(kvm, ID_AA64MMFR0_EL1, TGRAN16_2, 52_BIT) || + !has_feat_s2tgran(kvm, 16))); +} + +static bool feat_vmid16(struct kvm *kvm) +{ + return kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16); +} + static bool compute_hcr_rw(struct kvm *kvm, u64 *bits) { /* This is purely academic: AArch32 and NV are mutually exclusive */ @@ -1168,6 +1194,44 @@ static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { static const DECLARE_FEAT_MAP(mdcr_el2_desc, MDCR_EL2, mdcr_el2_feat_map, FEAT_AA64EL2); +static const struct reg_bits_to_feat_map vtcr_el2_feat_map[] = { + NEEDS_FEAT(VTCR_EL2_HDBSS, FEAT_HDBSS), + NEEDS_FEAT(VTCR_EL2_HAFT, FEAT_HAFT), + NEEDS_FEAT(VTCR_EL2_TL0 | + VTCR_EL2_TL1 | + VTCR_EL2_AssuredOnly | + VTCR_EL2_GCSH, + FEAT_THE), + NEEDS_FEAT(VTCR_EL2_D128, FEAT_D128), + NEEDS_FEAT(VTCR_EL2_S2POE, FEAT_S2POE), + NEEDS_FEAT(VTCR_EL2_S2PIE, FEAT_S2PIE), + NEEDS_FEAT(VTCR_EL2_SL2 | + VTCR_EL2_DS, + feat_lpa2), + NEEDS_FEAT(VTCR_EL2_NSA | + VTCR_EL2_NSW, + FEAT_SEL2), + NEEDS_FEAT(VTCR_EL2_HWU62 | + VTCR_EL2_HWU61 | + VTCR_EL2_HWU60 | + VTCR_EL2_HWU59, + FEAT_HPDS2), + NEEDS_FEAT(VTCR_EL2_HD, ID_AA64MMFR1_EL1, HAFDBS, DBM), + NEEDS_FEAT(VTCR_EL2_HA, ID_AA64MMFR1_EL1, HAFDBS, AF), + NEEDS_FEAT(VTCR_EL2_VS, feat_vmid16), + NEEDS_FEAT(VTCR_EL2_PS | + VTCR_EL2_TG0 | + VTCR_EL2_SH0 | + VTCR_EL2_ORGN0 | + VTCR_EL2_IRGN0 | + VTCR_EL2_SL0 | + VTCR_EL2_T0SZ, + FEAT_AA64EL1), +}; + +static const DECLARE_FEAT_MAP(vtcr_el2_desc, VTCR_EL2, + vtcr_el2_feat_map, FEAT_AA64EL2); + static void __init check_feat_map(const struct reg_bits_to_feat_map *map, int map_size, u64 resx, const char *str) { @@ -1211,6 +1275,7 @@ void __init check_feature_map(void) check_reg_desc(&tcr2_el2_desc); check_reg_desc(&sctlr_el1_desc); check_reg_desc(&mdcr_el2_desc); + check_reg_desc(&vtcr_el2_desc); } static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map) @@ -1425,6 +1490,10 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r *res0 = compute_reg_res0_bits(kvm, &mdcr_el2_desc, 0, 0); *res1 = MDCR_EL2_RES1; break; + case VTCR_EL2: + *res0 = compute_reg_res0_bits(kvm, &vtcr_el2_desc, 0, 0); + *res1 = VTCR_EL2_RES1; + break; default: WARN_ON_ONCE(1); *res0 = *res1 = 0; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 522fa3cc9578..486eba72bb02 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1719,8 +1719,7 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) set_sysreg_masks(kvm, VTTBR_EL2, res0, res1); /* VTCR_EL2 */ - res0 = GENMASK(63, 32) | GENMASK(30, 20); - res1 = BIT(31); + get_reg_fixed_bits(kvm, VTCR_EL2, &res0, &res1); set_sysreg_masks(kvm, VTCR_EL2, res0, res1); /* VMPIDR_EL2 */ From 80cbfd7174f31010982f065e8ae73bf337992105 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 17:30:24 +0000 Subject: [PATCH 11/86] KVM: arm64: Honor UX/PX attributes for EL2 S1 mappings Now that we potentially have two bits to deal with when setting execution permissions, make sure we correctly handle them when both when building the page tables and when reading back from them. Reported-by: Alexandru Elisei Reviewed-by: Fuad Tabba Reviewed-by: Joey Gouly Tested-by: Fuad Tabba Link: https://patch.msgid.link/20251210173024.561160-7-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_pgtable.h | 12 +++--------- arch/arm64/kvm/hyp/pgtable.c | 24 +++++++++++++++++++++--- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c0ad262a8289..56bd08be9630 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -87,15 +87,9 @@ typedef u64 kvm_pte_t; #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) -#define __KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) -#define __KVM_PTE_LEAF_ATTR_HI_S1_UXN BIT(54) -#define __KVM_PTE_LEAF_ATTR_HI_S1_PXN BIT(53) - -#define KVM_PTE_LEAF_ATTR_HI_S1_XN \ - ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? \ - (__KVM_PTE_LEAF_ATTR_HI_S1_UXN | \ - __KVM_PTE_LEAF_ATTR_HI_S1_PXN) : \ - __KVM_PTE_LEAF_ATTR_HI_S1_XN; }) +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) +#define KVM_PTE_LEAF_ATTR_HI_S1_UXN BIT(54) +#define KVM_PTE_LEAF_ATTR_HI_S1_PXN BIT(53) #define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index a99a7d6e7d0c..13e34d66b6a7 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -342,6 +342,9 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) if (!(prot & KVM_PGTABLE_PROT_R)) return -EINVAL; + if (!cpus_have_final_cap(ARM64_KVM_HVHE)) + prot &= ~KVM_PGTABLE_PROT_UX; + if (prot & KVM_PGTABLE_PROT_X) { if (prot & KVM_PGTABLE_PROT_W) return -EINVAL; @@ -351,8 +354,16 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) if (system_supports_bti_kernel()) attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP; + } + + if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + if (!(prot & KVM_PGTABLE_PROT_PX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_PXN; + if (!(prot & KVM_PGTABLE_PROT_UX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_UXN; } else { - attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; + if (!(prot & KVM_PGTABLE_PROT_PX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; } attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); @@ -373,8 +384,15 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) if (!kvm_pte_valid(pte)) return prot; - if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) - prot |= KVM_PGTABLE_PROT_X; + if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_PXN)) + prot |= KVM_PGTABLE_PROT_PX; + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_UXN)) + prot |= KVM_PGTABLE_PROT_UX; + } else { + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) + prot |= KVM_PGTABLE_PROT_PX; + } ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte); if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO) From 4a7fe842b8a3f3c173c3075f03c60c3f9f62e299 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:25 +0000 Subject: [PATCH 12/86] arm64: Repaint ID_AA64MMFR2_EL1.IDS description ID_AA64MMFR2_EL1.IDS, as described in the sysreg file, is pretty horrible as it diesctly give the ESR value. Repaint it using the usual NI/IMP identifiers to describe the absence/presence of FEAT_IDST. Also add the new EL3 routing feature, even if we really don't care about it. Reviewed-by: Joey Gouly Link: https://patch.msgid.link/20260108173233.2911955-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 2 +- arch/arm64/tools/sysreg | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 3108b5185c20..4db0562f5bfa 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -134,7 +134,7 @@ static const struct pvm_ftr_bits pvmid_aa64mmfr2[] = { MAX_FEAT(ID_AA64MMFR2_EL1, UAO, IMP), MAX_FEAT(ID_AA64MMFR2_EL1, IESB, IMP), MAX_FEAT(ID_AA64MMFR2_EL1, AT, IMP), - MAX_FEAT_ENUM(ID_AA64MMFR2_EL1, IDS, 0x18), + MAX_FEAT(ID_AA64MMFR2_EL1, IDS, IMP), MAX_FEAT(ID_AA64MMFR2_EL1, TTL, IMP), MAX_FEAT(ID_AA64MMFR2_EL1, BBM, 2), MAX_FEAT(ID_AA64MMFR2_EL1, E0PD, IMP), diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 8921b51866d6..d0ddfd572b89 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2256,9 +2256,10 @@ UnsignedEnum 43:40 FWB 0b0000 NI 0b0001 IMP EndEnum -Enum 39:36 IDS - 0b0000 0x0 - 0b0001 0x18 +UnsignedEnum 39:36 IDS + 0b0000 NI + 0b0001 IMP + 0b0010 EL3 EndEnum UnsignedEnum 35:32 AT 0b0000 NI From 1ad9767accfcb81f404aa3d37d46b3eb494dce2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:26 +0000 Subject: [PATCH 13/86] KVM: arm64: Add trap routing for GMID_EL1 HCR_EL2.TID5 is currently ignored by the trap routing infrastructure. Wire it in the routing table so that GMID_EL1, the sole register trapped by this bit, is correctly handled in the NV case. Link: https://patch.msgid.link/20260108173233.2911955-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/emulate-nested.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 834f13fb1fb7..616eb6ad6870 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -70,6 +70,7 @@ enum cgt_group_id { CGT_HCR_ENSCXT, CGT_HCR_TTLBIS, CGT_HCR_TTLBOS, + CGT_HCR_TID5, CGT_MDCR_TPMCR, CGT_MDCR_TPM, @@ -308,6 +309,12 @@ static const struct trap_bits coarse_trap_bits[] = { .mask = HCR_TTLBOS, .behaviour = BEHAVE_FORWARD_RW, }, + [CGT_HCR_TID5] = { + .index = HCR_EL2, + .value = HCR_TID5, + .mask = HCR_TID5, + .behaviour = BEHAVE_FORWARD_RW, + }, [CGT_MDCR_TPMCR] = { .index = MDCR_EL2, .value = MDCR_EL2_TPMCR, @@ -665,6 +672,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_CCSIDR2_EL1, CGT_HCR_TID2_TID4), SR_TRAP(SYS_CLIDR_EL1, CGT_HCR_TID2_TID4), SR_TRAP(SYS_CSSELR_EL1, CGT_HCR_TID2_TID4), + SR_TRAP(SYS_GMID_EL1, CGT_HCR_TID5), SR_RANGE_TRAP(SYS_ID_PFR0_EL1, sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3), SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC), From 19f75678238734ef383f9e10d8e1020873e97170 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:27 +0000 Subject: [PATCH 14/86] KVM: arm64: Add a generic synchronous exception injection primitive Maybe in a surprising way, we don't currently have a generic way to inject a synchronous exception at the EL the vcpu is currently running at. Extract such primitive from the UNDEF injection code. Reviewed-by: Ben Horgan Reviewed-by: Yuan Yao Link: https://patch.msgid.link/20260108173233.2911955-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 1 + arch/arm64/kvm/inject_fault.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index c9eab316398e..df20d47f0d25 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -45,6 +45,7 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_sync(struct kvm_vcpu *vcpu, u64 esr); int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr); int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); void kvm_inject_size_fault(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index dfcd66c65517..7102424a3fa5 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -162,12 +162,16 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); } +void kvm_inject_sync(struct kvm_vcpu *vcpu, u64 esr) +{ + pend_sync_exception(vcpu); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); +} + static void inject_undef64(struct kvm_vcpu *vcpu) { u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - pend_sync_exception(vcpu); - /* * Build an unknown exception, depending on the instruction * set. @@ -175,7 +179,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) if (kvm_vcpu_trap_il_is32bit(vcpu)) esr |= ESR_ELx_IL; - vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); + kvm_inject_sync(vcpu, esr); } #define DFSR_FSC_EXTABT_LPAE 0x10 From d78a14decd494caf72ea0144624621e7e43ae451 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:28 +0000 Subject: [PATCH 15/86] KVM: arm64: Handle FEAT_IDST for sysregs without specific handlers Add a bit of infrastrtcture to triage_sysreg_trap() to handle the case of registers falling into the Feature ID space that do not have a local handler. For these, we can directly apply the FEAT_IDST semantics and inject an EC=0x18 exception. Otherwise, an UNDEF will do. Reviewed-by: Joey Gouly Reviewed-by: Yuan Yao Link: https://patch.msgid.link/20260108173233.2911955-5-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/emulate-nested.c | 13 +++++++++++++ arch/arm64/kvm/sys_regs.h | 10 ++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 616eb6ad6870..4aabd624c4be 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2588,6 +2588,19 @@ local: params = esr_sys64_to_params(esr); + /* + * This implements the pseudocode UnimplementedIDRegister() + * helper for the purpose of dealing with FEAT_IDST. + */ + if (in_feat_id_space(¶ms)) { + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, IDS, IMP)) + kvm_inject_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + else + kvm_inject_undefined(vcpu); + + return true; + } + /* * Check for the IMPDEF range, as per DDI0487 J.a, * D18.3.2 Reserved encodings for IMPLEMENTATION diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index b3f904472fac..2a983664220c 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -49,6 +49,16 @@ struct sys_reg_params { .Op2 = ((esr) >> 17) & 0x7, \ .is_write = !((esr) & 1) }) +/* + * The Feature ID space is defined as the System register space in AArch64 + * with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, op2=={0-7}. + */ +static inline bool in_feat_id_space(struct sys_reg_params *p) +{ + return (p->Op0 == 3 && !(p->Op1 & 0b100) && p->Op1 != 2 && + p->CRn == 0 && !(p->CRm & 0b1000)); +} + struct sys_reg_desc { /* Sysreg string for debug */ const char *name; From f07ef1bef67ca08799df262cc901971ac274783d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:29 +0000 Subject: [PATCH 16/86] KVM: arm64: Handle CSSIDR2_EL1 and SMIDR_EL1 in a generic way Now that we can handle ID registers using the FEAT_IDST infrastrcuture, get rid of the handling of CSSIDR2_EL1 and SMIDR_EL1. Reviewed-by: Yuan Yao Link: https://patch.msgid.link/20260108173233.2911955-6-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c8fd7c6a12a1..a2b14ca2a702 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3414,8 +3414,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1, .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 }, - { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, - { SYS_DESC(SYS_SMIDR_EL1), undef_access }, IMPLEMENTATION_ID(AIDR_EL1, GENMASK_ULL(63, 0)), { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, ID_FILTERED(CTR_EL0, ctr_el0, From 70a5ce4efc0e1194718aad6f26332c99e6a119db Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:30 +0000 Subject: [PATCH 17/86] KVM: arm64: Force trap of GMID_EL1 when the guest doesn't have MTE If our host has MTE, but the guest doesn't, make sure we set HCR_EL2.TID5 to force GMID_EL1 being trapped. Such trap will be handled by the FEAT_IDST handling. Reviewed-by: Joey Gouly Reviewed-by: Yuan Yao Link: https://patch.msgid.link/20260108173233.2911955-7-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a2b14ca2a702..3de206b49484 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -5576,6 +5576,8 @@ static void vcpu_set_hcr(struct kvm_vcpu *vcpu) if (kvm_has_mte(vcpu->kvm)) vcpu->arch.hcr_el2 |= HCR_ATA; + else + vcpu->arch.hcr_el2 |= HCR_TID5; /* * In the absence of FGT, we cannot independently trap TLBI From e5d40a5a97c1d57e89aa5f324734065c6580b436 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:31 +0000 Subject: [PATCH 18/86] KVM: arm64: pkvm: Add a generic synchronous exception injection primitive Similarly to the "classic" KVM code, pKVM doesn't have an "anything goes" synchronous exception injection primitive. Carve one out of the UNDEF injection code. Link: https://patch.msgid.link/20260108173233.2911955-8-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 4db0562f5bfa..b197f3eb272c 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -243,16 +243,15 @@ static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id) } } -/* - * Inject an unknown/undefined exception to an AArch64 guest while most of its - * sysregs are live. - */ -static void inject_undef64(struct kvm_vcpu *vcpu) +static void inject_sync64(struct kvm_vcpu *vcpu, u64 esr) { - u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + + /* + * Make sure we have the latest update to VBAR_EL1, as pKVM + * handles traps very early, before sysregs are resync'ed + */ __vcpu_assign_sys_reg(vcpu, VBAR_EL1, read_sysreg_el1(SYS_VBAR)); kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); @@ -265,6 +264,15 @@ static void inject_undef64(struct kvm_vcpu *vcpu) write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); } +/* + * Inject an unknown/undefined exception to an AArch64 guest while most of its + * sysregs are live. + */ +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + inject_sync64(vcpu, (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT)); +} + static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { From 592dc2c020686536dae1c427c78cf558a3df4414 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:32 +0000 Subject: [PATCH 19/86] KVM: arm64: pkvm: Report optional ID register traps with a 0x18 syndrome With FEAT_IDST, unimplemented system registers in the feature ID space must be reported using EC=0x18 at the closest handling EL, rather than with an UNDEF. Most of these system registers are always implemented thanks to their dependency on FEAT_AA64, except for a set of (currently) three registers: GMID_EL1 (depending on MTE2), CCSIDR2_EL1 (depending on FEAT_CCIDX), and SMIDR_EL1 (depending on SME). For these three registers, report their trap as EC=0x18 if they end-up trapping into KVM and that FEAT_IDST is implemented in the guest. Otherwise, just make them UNDEF. Link: https://patch.msgid.link/20260108173233.2911955-9-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index b197f3eb272c..06d28621722e 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -347,6 +347,18 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu, return true; } +static bool pvm_idst_access(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, IDS, IMP)) + inject_sync64(vcpu, kvm_vcpu_get_esr(vcpu)); + else + inject_undef64(vcpu); + + return false; +} + /* Mark the specified system register as an AArch32 feature id register. */ #define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 } @@ -477,6 +489,9 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), + { SYS_DESC(SYS_CCSIDR2_EL1), .access = pvm_idst_access }, + { SYS_DESC(SYS_GMID_EL1), .access = pvm_idst_access }, + { SYS_DESC(SYS_SMIDR_EL1), .access = pvm_idst_access }, HOST_HANDLED(SYS_AIDR_EL1), HOST_HANDLED(SYS_CSSELR_EL1), HOST_HANDLED(SYS_CTR_EL0), From b638a9d0f8965b98403022cb91d8f3b31170eb35 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Jan 2026 17:32:33 +0000 Subject: [PATCH 20/86] KVM: arm64: selftests: Add a test for FEAT_IDST Add a very basic test checking that FEAT_IDST actually works for the {GMID,SMIDR,CSSIDR2}_EL1 registers. Link: https://patch.msgid.link/20260108173233.2911955-10-maz@kernel.org Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/Makefile.kvm | 1 + .../testing/selftests/kvm/arm64/idreg-idst.c | 117 ++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 tools/testing/selftests/kvm/arm64/idreg-idst.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index ba5c2b643efa..bfa2fad0aba1 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -175,6 +175,7 @@ TEST_GEN_PROGS_arm64 += arm64/vgic_irq TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += arm64/idreg-idst TEST_GEN_PROGS_arm64 += arm64/kvm-uuid TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer diff --git a/tools/testing/selftests/kvm/arm64/idreg-idst.c b/tools/testing/selftests/kvm/arm64/idreg-idst.c new file mode 100644 index 000000000000..9ca9f125abdb --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/idreg-idst.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Access all FEAT_IDST-handled registers that depend on more than + * just FEAT_AA64, and fail if we don't get an a trap with an 0x18 EC. + */ + +#include +#include +#include + +static volatile bool sys64, undef; + +#define __check_sr_read(r) \ + ({ \ + uint64_t val; \ + \ + sys64 = false; \ + undef = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(!undef, #r " unexpected UNDEF"); \ + __GUEST_ASSERT(sys64, #r " didn't trap"); \ + } while(0) + + +static void guest_code(void) +{ + check_sr_read(CCSIDR2_EL1); + check_sr_read(SMIDR_EL1); + check_sr_read(GMID_EL1); + + GUEST_DONE(); +} + +static void guest_sys64_handler(struct ex_regs *regs) +{ + sys64 = true; + undef = false; + regs->pc += 4; +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + sys64 = false; + undef = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_feat_idst(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* This VM has no MTE, no SME, no CCIDX */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_SYS64, guest_sys64_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t mmfr2; + + test_disable_default_vgic(); + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + mmfr2 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR2_EL1)); + __TEST_REQUIRE(FIELD_GET(ID_AA64MMFR2_EL1_IDS, mmfr2) > 0, + "FEAT_IDST not supported"); + kvm_vm_free(vm); + + test_guest_feat_idst(); + + return 0; +} From 7e03d07d03a486c66d5c084c7185b1bef29049e9 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 9 Jan 2026 08:22:14 +0000 Subject: [PATCH 21/86] KVM: arm64: selftests: Disable unused TTBR1_EL1 translations KVM selftests map all guest code and data into the lower virtual address range (0x0000...) managed by TTBR0_EL1. The upper range (0xFFFF...) managed by TTBR1_EL1 is unused and uninitialized. If a guest accesses the upper range, the MMU attempts a translation table walk using uninitialized registers, leading to unpredictable behavior. Set `TCR_EL1.EPD1` to disable translation table walks for TTBR1_EL1, ensuring that any access to the upper range generates an immediate Translation Fault. Additionally, set `TCR_EL1.TBI1` (Top Byte Ignore) to ensure that tagged pointers in the upper range also deterministically trigger a Translation Fault via EPD1. Define `TCR_EPD1_MASK`, `TCR_EPD1_SHIFT`, and `TCR_TBI1` in `processor.h` to support this configuration. These are based on their definitions in `arch/arm64/include/asm/pgtable-hwdef.h`. Suggested-by: Will Deacon Reviewed-by: Itaru Kitayama Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260109082218.3236580-2-tabba@google.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/include/arm64/processor.h | 4 ++++ tools/testing/selftests/kvm/lib/arm64/processor.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index ff928716574d..ac97a1c436fc 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -90,6 +90,9 @@ #define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) #define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) +#define TCR_EPD1_SHIFT 23 +#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT) + #define TCR_IPS_SHIFT 32 #define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) #define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) @@ -97,6 +100,7 @@ #define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) #define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) +#define TCR_TBI1 (UL(1) << 38) #define TCR_HA (UL(1) << 39) #define TCR_DS (UL(1) << 59) diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index d46e4b13b92c..5b379da8cb90 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -384,6 +384,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; tcr_el1 |= TCR_T0SZ(vm->va_bits); + tcr_el1 |= TCR_TBI1; + tcr_el1 |= TCR_EPD1_MASK; if (use_lpa2_pte_format(vm)) tcr_el1 |= TCR_DS; From dd0c5d04d13cae8ff2694ef83d1ae5804d6d9798 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 9 Jan 2026 08:22:15 +0000 Subject: [PATCH 22/86] KVM: arm64: selftests: Fix incorrect rounding in page_align() The implementation of `page_align()` in `processor.c` calculates alignment incorrectly for values that are already aligned. Specifically, `(v + vm->page_size) & ~(vm->page_size - 1)` aligns to the *next* page boundary even if `v` is already page-aligned, potentially wasting a page of memory. Fix the calculation to use standard alignment logic: `(v + vm->page_size - 1) & ~(vm->page_size - 1)`. Fixes: 7a6629ef746d ("kvm: selftests: add virt mem support for aarch64") Reviewed-by: Andrew Jones Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260109082218.3236580-3-tabba@google.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/lib/arm64/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 5b379da8cb90..607a4e462984 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -23,7 +23,7 @@ static vm_vaddr_t exception_handlers; static uint64_t page_align(struct kvm_vm *vm, uint64_t v) { - return (v + vm->page_size) & ~(vm->page_size - 1); + return (v + vm->page_size - 1) & ~(vm->page_size - 1); } static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) From 582b39463f1c0774e0b3cb5be2118e8564b7941e Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 9 Jan 2026 08:22:16 +0000 Subject: [PATCH 23/86] KVM: riscv: selftests: Fix incorrect rounding in page_align() The implementation of `page_align()` in `processor.c` calculates alignment incorrectly for values that are already aligned. Specifically, `(v + vm->page_size) & ~(vm->page_size - 1)` aligns to the *next* page boundary even if `v` is already page-aligned, potentially wasting a page of memory. Fix the calculation to use standard alignment logic: `(v + vm->page_size - 1) & ~(vm->page_size - 1)`. Fixes: 3e06cdf10520 ("KVM: selftests: Add initial support for RISC-V 64-bit") Reviewed-by: Andrew Jones Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260109082218.3236580-4-tabba@google.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/lib/riscv/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 2eac7d4b59e9..d5e8747b5e69 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -28,7 +28,7 @@ bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) static uint64_t page_align(struct kvm_vm *vm, uint64_t v) { - return (v + vm->page_size) & ~(vm->page_size - 1); + return (v + vm->page_size - 1) & ~(vm->page_size - 1); } static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) From de00d07321cf3f182762de2308c08062d5b824c0 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 9 Jan 2026 08:22:17 +0000 Subject: [PATCH 24/86] KVM: selftests: Move page_align() to shared header To avoid code duplication, move page_align() to the shared `kvm_util.h` header file. Rename it to vm_page_align(), to make it clear that the alignment is done with respect to the guest's base page size. No functional change intended. Reviewed-by: Andrew Jones Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260109082218.3236580-5-tabba@google.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/include/kvm_util.h | 5 +++++ tools/testing/selftests/kvm/lib/arm64/processor.c | 7 +------ tools/testing/selftests/kvm/lib/riscv/processor.c | 7 +------ 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 81f4355ff28a..747effa614f1 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -1258,6 +1258,11 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); } +static inline uint64_t vm_page_align(struct kvm_vm *vm, uint64_t v) +{ + return (v + vm->page_size - 1) & ~(vm->page_size - 1); +} + /* * Arch hook that is invoked via a constructor, i.e. before exeucting main(), * to allow for arch-specific setup that is common to all tests, e.g. computing diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 607a4e462984..1605dc740d1e 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -21,11 +21,6 @@ static vm_vaddr_t exception_handlers; -static uint64_t page_align(struct kvm_vm *vm, uint64_t v) -{ - return (v + vm->page_size - 1) & ~(vm->page_size - 1); -} - static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) { unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; @@ -115,7 +110,7 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; + size_t nr_pages = vm_page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; if (vm->pgd_created) return; diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index d5e8747b5e69..401245fe31db 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -26,11 +26,6 @@ bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) return !ret && !!value; } -static uint64_t page_align(struct kvm_vm *vm, uint64_t v) -{ - return (v + vm->page_size - 1) & ~(vm->page_size - 1); -} - static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) { return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << @@ -68,7 +63,7 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; + size_t nr_pages = vm_page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; if (vm->pgd_created) return; From e0a99a2b72f3c6365d9f4d6943ed45f7fc286b70 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 9 Jan 2026 08:22:18 +0000 Subject: [PATCH 25/86] KVM: selftests: Fix typos and stale comments in kvm_util Fix minor documentation errors in `kvm_util.h` and `kvm_util.c`. - Correct the argument description for `vcpu_args_set` in `kvm_util.h`, which incorrectly listed `vm` instead of `vcpu`. - Fix a typo in the comment for `kvm_selftest_arch_init` ("exeucting" -> "executing"). - Correct the return value description for `vm_vaddr_unused_gap` in `kvm_util.c` to match the implementation, which returns an address "at or above" `vaddr_min`, not "at or below". No functional change intended. Reviewed-by: Andrew Jones Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260109082218.3236580-6-tabba@google.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/include/kvm_util.h | 4 ++-- tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 747effa614f1..97f9251eb073 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -939,7 +939,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); * VM VCPU Args Set * * Input Args: - * vm - Virtual Machine + * vcpu - vCPU * num - number of arguments * ... - arguments, each of type uint64_t * @@ -1264,7 +1264,7 @@ static inline uint64_t vm_page_align(struct kvm_vm *vm, uint64_t v) } /* - * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * Arch hook that is invoked via a constructor, i.e. before executing main(), * to allow for arch-specific setup that is common to all tests, e.g. computing * the default guest "mode". */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8279b6ced8d2..fab6b62d7810 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1351,7 +1351,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) * Output Args: None * * Return: - * Lowest virtual address at or below vaddr_min, with at least + * Lowest virtual address at or above vaddr_min, with at least * sz unused bytes. TEST_ASSERT failure if no area of at least * size sz is available. * From 288eb55483c05bc37379a781d0d18b8e6c280f92 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:01 +0000 Subject: [PATCH 26/86] KVM: arm64: Fix Trace Buffer trapping for protected VMs For protected VMs in pKVM, the hypervisor should trap accesses to trace buffer system registers if Trace Buffer isn't supported by the VM. However, the current code only traps if Trace Buffer External Mode isn't supported. Fix this by checking for FEAT_TRBE (Trace Buffer) rather than FEAT_TRBE_EXT. Fixes: 9d5261269098 ("KVM: arm64: Trap external trace for protected VMs") Reported-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20251211104710.151771-2-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 8911338961c5..f0bfab99c334 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -117,7 +117,7 @@ static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu) if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP)) val |= MDCR_EL2_TTRF; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP)) + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP)) val |= MDCR_EL2_E2TB_MASK; /* Trap Debug Communications Channel registers */ From e913c7ce9e6f62038a486218f43f699fc443e3e1 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:02 +0000 Subject: [PATCH 27/86] KVM: arm64: Fix Trace Buffer trap polarity for protected VMs The E2TB bits in MDCR_EL2 control trapping of Trace Buffer system register accesses. These accesses are trapped to EL2 when the bits are clear. The trap initialization logic for protected VMs in pvm_init_traps_mdcr() had the polarity inverted. When a guest did not support the Trace Buffer feature, the code was setting E2TB. This incorrectly disabled the trap, potentially allowing a protected guest to access registers for a feature it was not given. Fix this by inverting the operation. Fixes: f50758260bff ("KVM: arm64: Group setting traps for protected VMs by control register") Reviewed-by: Suzuki K Poulose Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20251211104710.151771-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index f0bfab99c334..6bd539646204 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -118,7 +118,7 @@ static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu) val |= MDCR_EL2_TTRF; if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP)) - val |= MDCR_EL2_E2TB_MASK; + val &= ~MDCR_EL2_E2TB_MASK; /* Trap Debug Communications Channel registers */ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP)) From ebbcaece84738f71b35f32339bdeb8776004e641 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:03 +0000 Subject: [PATCH 28/86] KVM: arm64: Fix MTE flag initialization for protected VMs The function pkvm_init_features_from_host() initializes guest features, propagating them from the host. The logic to propagate KVM_ARCH_FLAG_MTE_ENABLED (Memory Tagging Extension) has a couple of issues. First, the check was in the common path, before the divergence for protected and non-protected VMs. For non-protected VMs, this was unnecessary, as 'kvm->arch.flags' is completely overwritten by host_arch_flags immediately after, which already contains the MTE flag. For protected VMs, this was setting the flag even if the feature is not allowed. Second, the check was reading 'host_kvm->arch.flags' instead of using the local 'host_arch_flags', which is read once from the host flags. Fix these by moving the MTE flag check inside the protected-VM-only path, checking if the feature is allowed, and changing it to use the correct host_arch_flags local variable. This ensures non-protected VMs get the flag via the bulk copy, and protected VMs get it via an explicit check. Fixes: b7f345fbc32a ("KVM: arm64: Fix FEAT_MTE in pKVM") Reviewed-by: Ben Horgan Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20251211104710.151771-4-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 6bd539646204..d99137dddb1f 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -340,9 +340,6 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc /* Preserve the vgic model so that GICv3 emulation works */ hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model; - if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) - set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); - /* No restrictions for non-protected VMs. */ if (!kvm_vm_is_protected(kvm)) { hyp_vm->kvm.arch.flags = host_arch_flags; @@ -357,6 +354,9 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc return; } + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_MTE)) + kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_MTE_ENABLED); + bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); From c273feee70bd3d8c6c4d5efaf6b3ae945c839378 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:04 +0000 Subject: [PATCH 29/86] KVM: arm64: Introduce helper to calculate fault IPA offset This 12-bit FAR fault IPA offset mask is hard-coded as 'GENMASK(11, 0)' in several places to reconstruct the full fault IPA. Introduce FAR_TO_FIPA_OFFSET() to calculate this value in a shared header and replace all open-coded instances to improve readability. No functional change intended. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20251211104710.151771-5-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 2 ++ arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +- arch/arm64/kvm/inject_fault.c | 2 +- arch/arm64/kvm/mmu.c | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e500600e4b9b..58495593570c 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -344,6 +344,8 @@ #define PAR_TO_HPFAR(par) \ (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) +#define FAR_TO_FIPA_OFFSET(far) ((far) & GENMASK_ULL(11, 0)) + #define ECN(x) { ESR_ELx_EC_##x, #x } #define kvm_arm_exception_class \ diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 5fd99763b54d..f0605836821e 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -44,7 +44,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) /* Build the full address */ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); /* If not for GICV, move on */ if (fault_ipa < vgic->vgic_cpu_base || diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index dfcd66c65517..c9bbce017dee 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -258,7 +258,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) unsigned long addr, esr; addr = kvm_vcpu_get_fault_ipa(vcpu); - addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + addr |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); __kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 48d7c372a4cd..ba44a29b75aa 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2070,7 +2070,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Falls between the IPA range and the PARange? */ if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); return kvm_inject_sea(vcpu, is_iabt, fault_ipa); } @@ -2175,7 +2175,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * faulting VA. This is always 12 bits, irrespective * of the page size. */ - ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); ret = io_mem_abort(vcpu, ipa); goto out_unlock; } From 43a21a0f0c4ab7de755f2cee2ff4700f26fe0bba Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:05 +0000 Subject: [PATCH 30/86] KVM: arm64: Include VM type when checking VM capabilities in pKVM Certain features and capabilities are restricted in protected mode. Most of these features are restricted only for protected VMs, but some are restricted for ALL VMs in protected mode. Extend the pKVM capability check to pass the VM (kvm), and use that when determining supported features. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20251211104710.151771-6-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_pkvm.h | 10 ++++++---- arch/arm64/kvm/arm.c | 4 ++-- arch/arm64/kvm/hyp/nvhe/pkvm.c | 10 +++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 0aecd4ac5f45..cccfff96f062 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -23,10 +23,12 @@ void pkvm_destroy_hyp_vm(struct kvm *kvm); int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); /* - * This functions as an allow-list of protected VM capabilities. - * Features not explicitly allowed by this function are denied. + * Check whether the specific capability is allowed in pKVM. + * + * Certain features are allowed only for non-protected VMs in pKVM, which is why + * this takes the VM (kvm) as a parameter. */ -static inline bool kvm_pvm_ext_allowed(long ext) +static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext) { switch (ext) { case KVM_CAP_IRQCHIP: @@ -43,7 +45,7 @@ static inline bool kvm_pvm_ext_allowed(long ext) case KVM_CAP_ARM_PTRAUTH_GENERIC: return true; default: - return false; + return !kvm || !kvm_vm_is_protected(kvm); } } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4f80da0c0d1d..e906ad414e72 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -87,7 +87,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (cap->flags) return -EINVAL; - if (kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(cap->cap)) + if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, cap->cap)) return -EINVAL; switch (cap->cap) { @@ -303,7 +303,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; - if (kvm && kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(ext)) + if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, ext)) return 0; switch (ext) { diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index d99137dddb1f..191d6f516113 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -354,23 +354,23 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc return; } - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_MTE)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_MTE)) kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_MTE_ENABLED); bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PMU_V3)) set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_ADDRESS)) set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_GENERIC)) set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) { + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_SVE)) { set_bit(KVM_ARM_VCPU_SVE, allowed_features); kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE); } From f4eee308c8f4013a52bd7d7735e64b5127c1b4a8 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:06 +0000 Subject: [PATCH 31/86] KVM: arm64: Do not allow KVM_CAP_ARM_MTE for any guest in pKVM Supporting MTE in pKVM introduces significant complexity to the hypervisor at EL2, even for non-protected VMs, since it would require EL2 to handle tag management. For now, do not allow KVM_CAP_ARM_MTE for any VM type in protected mode. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20251211104710.151771-7-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_pkvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index cccfff96f062..09a759971653 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -44,6 +44,8 @@ static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext) case KVM_CAP_ARM_PTRAUTH_ADDRESS: case KVM_CAP_ARM_PTRAUTH_GENERIC: return true; + case KVM_CAP_ARM_MTE: + return false; default: return !kvm || !kvm_vm_is_protected(kvm); } From 8823485a697de5c280a7a2632620338722f16663 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:07 +0000 Subject: [PATCH 32/86] KVM: arm64: Track KVM IOCTLs and their associated KVM caps Track KVM IOCTLs (VM IOCTLs for now), and the associated KVM capability that enables that IOCTL. Add a function that performs the lookup. This will be used by CoCo VM Hypervisors (e.g., pKVM) to determine whether a particular KVM IOCTL is allowed for its VMs. Suggested-by: Oliver Upton Signed-off-by: Fuad Tabba [maz: don't expose KVM_CAP_BASIC to userspace, and rely on NR_VCPUS as a proxy for this] Link: https://patch.msgid.link/20251211104710.151771-8-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/arm.c | 45 +++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac7f970c7883..395778c8ecab 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1655,4 +1655,6 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg p; \ }) +long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e906ad414e72..749274483185 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -58,6 +58,51 @@ enum kvm_wfx_trap_policy { static enum kvm_wfx_trap_policy kvm_wfi_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; static enum kvm_wfx_trap_policy kvm_wfe_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; +/* + * Tracks KVM IOCTLs and their associated KVM capabilities. + */ +struct kvm_ioctl_cap_map { + unsigned int ioctl; + long ext; +}; + +/* Make KVM_CAP_NR_VCPUS the reference for features we always supported */ +#define KVM_CAP_ARM_BASIC KVM_CAP_NR_VCPUS + +/* + * Sorted by ioctl to allow for potential binary search, + * though linear scan is sufficient for this size. + */ +static const struct kvm_ioctl_cap_map vm_ioctl_caps[] = { + { KVM_CREATE_IRQCHIP, KVM_CAP_IRQCHIP }, + { KVM_ARM_SET_DEVICE_ADDR, KVM_CAP_ARM_SET_DEVICE_ADDR }, + { KVM_ARM_MTE_COPY_TAGS, KVM_CAP_ARM_MTE }, + { KVM_SET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_GET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_HAS_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_ARM_SET_COUNTER_OFFSET, KVM_CAP_COUNTER_OFFSET }, + { KVM_ARM_GET_REG_WRITABLE_MASKS, KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES }, + { KVM_ARM_PREFERRED_TARGET, KVM_CAP_ARM_BASIC }, +}; + +/* + * Set *ext to the capability. + * Return 0 if found, or -EINVAL if no IOCTL matches. + */ +long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vm_ioctl_caps); i++) { + if (vm_ioctl_caps[i].ioctl == ioctl) { + *ext = vm_ioctl_caps[i].ext; + return 0; + } + } + + return -EINVAL; +} + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base); From b12b3b04f6ba072ca5a618a75e546c996be94bd1 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:08 +0000 Subject: [PATCH 33/86] KVM: arm64: Check whether a VM IOCTL is allowed in pKVM Certain VM IOCTLs are tied to specific VM features. Since pKVM does not support all features, restrict which IOCTLs are allowed depending on whether the associated feature is supported. Use the existing VM capability check as the source of truth to whether an IOCTL is allowed for a particular VM by mapping the IOCTLs with their associated capabilities. Suggested-by: Oliver Upton Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20251211104710.151771-9-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_pkvm.h | 20 ++++++++++++++++++++ arch/arm64/kvm/arm.c | 3 +++ 2 files changed, 23 insertions(+) diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 09a759971653..757076ad4ec9 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -9,6 +9,7 @@ #include #include #include +#include #include /* Maximum number of VMs that can co-exist under pKVM. */ @@ -51,6 +52,25 @@ static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext) } } +/* + * Check whether the KVM VM IOCTL is allowed in pKVM. + * + * Certain features are allowed only for non-protected VMs in pKVM, which is why + * this takes the VM (kvm) as a parameter. + */ +static inline bool kvm_pkvm_ioctl_allowed(struct kvm *kvm, unsigned int ioctl) +{ + long ext; + int r; + + r = kvm_get_cap_for_kvm_ioctl(ioctl, &ext); + + if (WARN_ON_ONCE(r < 0)) + return false; + + return kvm_pkvm_ext_allowed(kvm, ext); +} + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 749274483185..a5d48bae8f6a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1938,6 +1938,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) void __user *argp = (void __user *)arg; struct kvm_device_attr attr; + if (is_protected_kvm_enabled() && !kvm_pkvm_ioctl_allowed(kvm, ioctl)) + return -EINVAL; + switch (ioctl) { case KVM_CREATE_IRQCHIP: { int ret; From f7d05ee84a6a8d3775e0f0c3070d9380bed844a9 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 11 Dec 2025 10:47:09 +0000 Subject: [PATCH 34/86] KVM: arm64: Prevent host from managing timer offsets for protected VMs For protected VMs, the guest's timer offset state should not be controlled by the host and must always run with a virtual counter offset of 0. The existing timer logic allowed the host to set and manage the timer counter offsets for protected VMs in certain cases. Disable all host-side management of timer offsets for protected VMs by adding checks in the relevant code paths. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20251211104710.151771-10-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 99a07972068d..600f250753b4 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1056,10 +1056,14 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) ctxt->timer_id = timerid; - if (timerid == TIMER_VTIMER) - ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; - else - ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + if (!kvm_vm_is_protected(vcpu->kvm)) { + if (timerid == TIMER_VTIMER) + ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; + else + ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + } else { + ctxt->offset.vm_offset = NULL; + } hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); @@ -1083,7 +1087,8 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) timer_context_init(vcpu, i); /* Synchronize offsets across timers of a VM if not already provided */ - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { + if (!vcpu_is_protected(vcpu) && + !test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); timer_set_offset(vcpu_ptimer(vcpu), 0); } @@ -1687,6 +1692,9 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, if (offset->reserved) return -EINVAL; + if (kvm_vm_is_protected(kvm)) + return -EINVAL; + mutex_lock(&kvm->lock); if (!kvm_trylock_all_vcpus(kvm)) { From 582234b0d8419e0b6cbfd87ae3f80568c8d0917e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= Date: Fri, 14 Nov 2025 11:11:53 +0000 Subject: [PATCH 35/86] KVM: arm64: Fix error checking for FFA_VERSION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to section 13.2 of the DEN0077 FF-A specification, when firmware does not support the requested version, it should reply with FFA_RET_NOT_SUPPORTED(-1). Table 13.6 specifies the type of the error code as int32. Currently, the error checking logic compares the unsigned long return value it got from the SMC layer, against a "-1" literal. This fails due to a type mismatch: the literal is extended to 64 bits, whereas the register contains only 32 bits of ones(0x00000000ffffffff). Consequently, hyp_ffa_init misinterprets the "-1" return value as an invalid FF-A version. This prevents pKVM initialization on devices where FF-A is not supported in firmware. Fix this by explicitly casting res.a0 to s32. Signed-off-by: Kornel DulÄ™ba Acked-by: Will Deacon Link: https://patch.msgid.link/20251114-pkvm_init_noffa-v1-1-87a82e87c345@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index f731cc4c3f28..94161ea1cd60 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -792,7 +792,7 @@ static void do_ffa_version(struct arm_smccc_1_2_regs *res, .a0 = FFA_VERSION, .a1 = ffa_req_version, }, res); - if (res->a0 == FFA_RET_NOT_SUPPORTED) + if ((s32)res->a0 == FFA_RET_NOT_SUPPORTED) goto unlock; hyp_ffa_version = ffa_req_version; @@ -943,7 +943,7 @@ int hyp_ffa_init(void *pages) .a0 = FFA_VERSION, .a1 = FFA_VERSION_1_2, }, &res); - if (res.a0 == FFA_RET_NOT_SUPPORTED) + if ((s32)res.a0 == FFA_RET_NOT_SUPPORTED) return 0; /* From 54adbfe40e3b6d238293cc36ef071ed9f35c8af7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 22 Jan 2026 08:51:53 +0000 Subject: [PATCH 36/86] KVM: arm64: Always populate FGT masks at boot time We currently only populate the FGT masks if the underlying HW does support FEAT_FGT. However, with the addition of the RES1 support for system registers, this results in a lot of noise at boot time, as reported by Nathan. That's because even if FGT isn't supported, we still check for the attribution of the bits to particular features, and not keeping the masks up-to-date leads to (fairly harmess) warnings. Given that we want these checks to be enforced even if the HW doesn't support FGT, enable the generation of FGT masks unconditionally (this is rather cheap anyway). Only the storage of the FGT configuration is avoided, which will save a tiny bit of memory on these machines. Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Fixes: c259d763e6b09 ("KVM: arm64: Account for RES1 bits in DECLARE_FEAT_MAP() and co") Link: https://lore.kernel.org/r/20260120211558.GA834868@ax162 Link: https://patch.msgid.link/20260122085153.535538-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/emulate-nested.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 75d49f83342a..e5874effdf16 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2276,9 +2276,6 @@ int __init populate_nv_trap_config(void) kvm_info("nv: %ld coarse grained trap handlers\n", ARRAY_SIZE(encoding_to_cgt)); - if (!cpus_have_final_cap(ARM64_HAS_FGT)) - goto check_mcb; - for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) { const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i]; union trap_config tc; @@ -2298,6 +2295,15 @@ int __init populate_nv_trap_config(void) } tc.val |= fgt->tc.val; + + if (!aggregate_fgt(tc)) { + ret = -EINVAL; + print_nv_trap_error(fgt, "FGT bit is reserved", ret); + } + + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + continue; + prev = xa_store(&sr_forward_xa, enc, xa_mk_value(tc.val), GFP_KERNEL); @@ -2305,11 +2311,6 @@ int __init populate_nv_trap_config(void) ret = xa_err(prev); print_nv_trap_error(fgt, "Failed FGT insertion", ret); } - - if (!aggregate_fgt(tc)) { - ret = -EINVAL; - print_nv_trap_error(fgt, "FGT bit is reserved", ret); - } } } @@ -2325,7 +2326,6 @@ int __init populate_nv_trap_config(void) kvm_info("nv: %ld fine grained trap handlers\n", ARRAY_SIZE(encoding_to_fgt)); -check_mcb: for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) { const enum cgt_group_id *cgids; From 2eb80a2eee18762a33aa770d742d64fe47852c7e Mon Sep 17 00:00:00 2001 From: "Zenghui Yu (Huawei)" Date: Wed, 21 Jan 2026 18:16:31 +0800 Subject: [PATCH 37/86] KVM: arm64: nv: Return correct RES0 bits for FGT registers We had extended the sysreg masking infrastructure to more general registers, instead of restricting it to VNCR-backed registers, since commit a0162020095e ("KVM: arm64: Extend masking facility to arbitrary registers"). Fix kvm_get_sysreg_res0() to reflect this fact. Note that we're sure that we only deal with FGT registers in kvm_get_sysreg_res0(), the if (sr < __VNCR_START__) is actually a never false, which should probably be removed later. Fixes: 69c19e047dfe ("KVM: arm64: Add TCR2_EL2 to the sysreg arrays") Signed-off-by: Zenghui Yu (Huawei) Link: https://patch.msgid.link/20260121101631.41037-1-zenghui.yu@linux.dev Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/emulate-nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index e5874effdf16..774cfbf5b43b 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2435,7 +2435,7 @@ static u64 kvm_get_sysreg_res0(struct kvm *kvm, enum vcpu_sysreg sr) masks = kvm->arch.sysreg_masks; - return masks->mask[sr - __VNCR_START__].res0; + return masks->mask[sr - __SANITISED_REG_START__].res0; } static bool check_fgt_bit(struct kvm_vcpu *vcpu, enum vcpu_sysreg sr, From 6191b25d8bd902c1a107170df54bddbb0e2335f8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 Jan 2026 18:06:59 +0000 Subject: [PATCH 38/86] arm64: Unconditionally enable LSE support LSE atomics have been in the architecture since ARMv8.1 (released in 2014), and are hopefully supported by all modern toolchains. Drop the optional nature of LSE support in the kernel, and always compile the support in, as this really is very little code. LL/SC still is the default, and the switch to LSE is done dynamically. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 16 ---------------- arch/arm64/include/asm/insn.h | 23 ----------------------- arch/arm64/include/asm/lse.h | 9 --------- arch/arm64/kernel/cpufeature.c | 2 -- arch/arm64/kvm/at.c | 7 ------- arch/arm64/lib/insn.c | 2 -- arch/arm64/net/bpf_jit_comp.c | 7 ------- 7 files changed, 66 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 93173f0a09c7..b6f57cc1e4df 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1873,22 +1873,6 @@ config ARM64_PAN The feature is detected at runtime, and will remain as a 'nop' instruction if the cpu does not implement the feature. -config ARM64_LSE_ATOMICS - bool - default ARM64_USE_LSE_ATOMICS - -config ARM64_USE_LSE_ATOMICS - bool "Atomic instructions" - default y - help - As part of the Large System Extensions, ARMv8.1 introduces new - atomic instructions that are designed specifically to scale in - very large systems. - - Say Y here to make use of these instructions for the in-kernel - atomic routines. This incurs a small overhead on CPUs that do - not support these instructions. - endmenu # "ARMv8.1 architectural features" menu "ARMv8.2 architectural features" diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index e1d30ba99d01..f463a654a2bb 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -671,7 +671,6 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, enum aarch64_insn_register Rn, enum aarch64_insn_register Rd, u8 lsb); -#ifdef CONFIG_ARM64_LSE_ATOMICS u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, enum aarch64_insn_register address, enum aarch64_insn_register value, @@ -683,28 +682,6 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, enum aarch64_insn_register value, enum aarch64_insn_size_type size, enum aarch64_insn_mem_order_type order); -#else -static inline -u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size, - enum aarch64_insn_mem_atomic_op op, - enum aarch64_insn_mem_order_type order) -{ - return AARCH64_BREAK_FAULT; -} - -static inline -u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size, - enum aarch64_insn_mem_order_type order) -{ - return AARCH64_BREAK_FAULT; -} -#endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 3129a5819d0e..1e77c45bb0a8 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -4,8 +4,6 @@ #include -#ifdef CONFIG_ARM64_LSE_ATOMICS - #define __LSE_PREAMBLE ".arch_extension lse\n" #include @@ -27,11 +25,4 @@ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) -#else /* CONFIG_ARM64_LSE_ATOMICS */ - -#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__) - -#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc - -#endif /* CONFIG_ARM64_LSE_ATOMICS */ #endif /* __ASM_LSE_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c840a93b9ef9..547ccf28f289 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2560,7 +2560,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, PAN3) }, #endif /* CONFIG_ARM64_EPAN */ -#ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, @@ -2568,7 +2567,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP) }, -#endif /* CONFIG_ARM64_LSE_ATOMICS */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 53bf70126f81..6cbcec041a9d 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1700,7 +1700,6 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) } } -#ifdef CONFIG_ARM64_LSE_ATOMICS static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) { u64 tmp = old; @@ -1725,12 +1724,6 @@ static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) return ret; } -#else -static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) -{ - return -EINVAL; -} -#endif static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new) { diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 4e298baddc2e..cc5b40917d0d 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -611,7 +611,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } -#ifdef CONFIG_ARM64_LSE_ATOMICS static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type, u32 insn) { @@ -755,7 +754,6 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, value); } -#endif u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 74dd29816f36..008612aa4131 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -776,7 +776,6 @@ static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } -#ifdef CONFIG_ARM64_LSE_ATOMICS static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; @@ -843,12 +842,6 @@ static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } -#else -static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) -{ - return -EINVAL; -} -#endif static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { From 018a231b0260ebd85eddca3fec85031b59f50117 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 Jan 2026 18:07:00 +0000 Subject: [PATCH 39/86] arm64: Unconditionally enable PAN support FEAT_PAN has been around since ARMv8.1 (over 11 years ago), has no compiler dependency (we have our own accessors), and is a great security benefit. Drop CONFIG_ARM64_PAN, and make the support unconditionnal. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 17 ----------------- arch/arm64/include/asm/cpucaps.h | 2 -- arch/arm64/include/asm/uaccess.h | 6 ++---- arch/arm64/kernel/cpufeature.c | 4 ---- arch/arm64/kvm/hyp/entry.S | 2 +- 5 files changed, 3 insertions(+), 28 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b6f57cc1e4df..fcfb62ec4bae 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1680,7 +1680,6 @@ config MITIGATE_SPECTRE_BRANCH_HISTORY config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" depends on !KCSAN - select ARM64_PAN help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved @@ -1859,20 +1858,6 @@ config ARM64_HW_AFDBM to work on pre-ARMv8.1 hardware and the performance impact is minimal. If unsure, say Y. -config ARM64_PAN - bool "Enable support for Privileged Access Never (PAN)" - default y - help - Privileged Access Never (PAN; part of the ARMv8.1 Extensions) - prevents the kernel or hypervisor from accessing user-space (EL0) - memory directly. - - Choosing this option will cause any unprotected (not using - copy_to_user et al) memory access to fail with a permission fault. - - The feature is detected at runtime, and will remain as a 'nop' - instruction if the cpu does not implement the feature. - endmenu # "ARMv8.1 architectural features" menu "ARMv8.2 architectural features" @@ -2109,7 +2094,6 @@ config ARM64_MTE depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI depends on AS_HAS_ARMV8_5 # Required for tag checking in the uaccess routines - select ARM64_PAN select ARCH_HAS_SUBPAGE_FAULTS select ARCH_USES_HIGH_VMA_FLAGS select ARCH_USES_PG_ARCH_2 @@ -2141,7 +2125,6 @@ menu "ARMv8.7 architectural features" config ARM64_EPAN bool "Enable support for Enhanced Privileged Access Never (EPAN)" default y - depends on ARM64_PAN help Enhanced Privileged Access Never (EPAN) allows Privileged Access Never to be used with Execute-only mappings. diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 2c8029472ad4..177c691914f8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -19,8 +19,6 @@ cpucap_is_possible(const unsigned int cap) "cap must be < ARM64_NCAPS"); switch (cap) { - case ARM64_HAS_PAN: - return IS_ENABLED(CONFIG_ARM64_PAN); case ARM64_HAS_EPAN: return IS_ENABLED(CONFIG_ARM64_EPAN); case ARM64_SVE: diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6490930deef8..9810106a3f66 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -124,14 +124,12 @@ static inline bool uaccess_ttbr0_enable(void) static inline void __uaccess_disable_hw_pan(void) { - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, - CONFIG_ARM64_PAN)); + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN)); } static inline void __uaccess_enable_hw_pan(void) { - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, - CONFIG_ARM64_PAN)); + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN)); } static inline void uaccess_disable_privileged(void) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 547ccf28f289..716440d147a2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2164,7 +2164,6 @@ static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int sco return cpu_supports_bbml2_noabort(); } -#ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { /* @@ -2176,7 +2175,6 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); set_pstate_pan(1); } -#endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_RAS_EXTN static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) @@ -2541,7 +2539,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF) }, -#ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, @@ -2550,7 +2547,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, IMP) }, -#endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_EPAN { .desc = "Enhanced Privileged Access Never", diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 9f4e8d68ab50..11a10d8f5beb 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -126,7 +126,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) add x1, x1, #VCPU_CONTEXT - ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) + ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN) // Store the guest regs x2 and x3 stp x2, x3, [x1, #CPU_XREG_OFFSET(2)] From f174a9ffcd48d78a45d560c02ce4071ded036b53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jan 2026 10:29:22 +0800 Subject: [PATCH 40/86] KVM: arm64: Add exit to userspace on {LD,ST}64B* outside of memslots The main use of {LD,ST}64B* is to talk to a device, which is hopefully directly assigned to the guest and requires no additional handling. However, this does not preclude a VMM from exposing a virtual device to the guest, and to allow 64 byte accesses as part of the programming interface. A direct consequence of this is that we need to be able to forward such access to userspace. Given that such a contraption is very unlikely to ever exist, we choose to offer a limited service: userspace gets (as part of a new exit reason) the ESR, the IPA, and that's it. It is fully expected to handle the full semantics of the instructions, deal with ACCDATA, the return values and increment PC. Much fun. A canonical implementation can also simply inject an abort and be done with it. Frankly, don't try to do anything else unless you have time to waste. Acked-by: Arnd Bergmann Acked-by: Oliver Upton Signed-off-by: Marc Zyngier Signed-off-by: Yicong Yang Signed-off-by: Zhou Wang Signed-off-by: Will Deacon --- arch/arm64/kvm/mmio.c | 27 ++++++++++++++++++++++++++- include/uapi/linux/kvm.h | 3 ++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 54f9358c9e0e..e2285ed8c91d 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -159,6 +159,9 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) bool is_write; int len; u8 data_buf[8]; + u64 esr; + + esr = kvm_vcpu_get_esr(vcpu); /* * No valid syndrome? Ask userspace for help if it has @@ -168,7 +171,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) * though, so directly deliver an exception to the guest. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { - trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), + trace_kvm_mmio_nisv(*vcpu_pc(vcpu), esr, kvm_vcpu_get_hfar(vcpu), fault_ipa); if (vcpu_is_protected(vcpu)) @@ -185,6 +188,28 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) return -ENOSYS; } + /* + * When (DFSC == 0b00xxxx || DFSC == 0b10101x) && DFSC != 0b0000xx + * ESR_EL2[12:11] describe the Load/Store Type. This allows us to + * punt the LD64B/ST64B/ST64BV/ST64BV0 instructions to userspace, + * which will have to provide a full emulation of these 4 + * instructions. No, we don't expect this do be fast. + * + * We rely on traps being set if the corresponding features are not + * enabled, so if we get here, userspace has promised us to handle + * it already. + */ + switch (kvm_vcpu_trap_get_fault(vcpu)) { + case 0b000100 ... 0b001111: + case 0b101010 ... 0b101011: + if (FIELD_GET(GENMASK(12, 11), esr)) { + run->exit_reason = KVM_EXIT_ARM_LDST64B; + run->arm_nisv.esr_iss = esr & ~(u64)ESR_ELx_FSC; + run->arm_nisv.fault_ipa = fault_ipa; + return 0; + } + } + /* * Prepare MMIO operation. First decode the syndrome data we get * from the CPU. Then try if some in-kernel emulation feels diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dddb781b0507..88cca0e22ece 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -180,6 +180,7 @@ struct kvm_xen_exit { #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 #define KVM_EXIT_ARM_SEA 41 +#define KVM_EXIT_ARM_LDST64B 42 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -402,7 +403,7 @@ struct kvm_run { } eoi; /* KVM_EXIT_HYPERV */ struct kvm_hyperv_exit hyperv; - /* KVM_EXIT_ARM_NISV */ + /* KVM_EXIT_ARM_NISV / KVM_EXIT_ARM_LDST64B */ struct { __u64 esr_iss; __u64 fault_ipa; From 902eebac8fa3bad1c369f48f2eaf859755ad9e6d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jan 2026 10:29:23 +0800 Subject: [PATCH 41/86] KVM: arm64: Add documentation for KVM_EXIT_ARM_LDST64B Add a bit of documentation for KVM_EXIT_ARM_LDST64B so that userspace knows what to expect. Acked-by: Arnd Bergmann Acked-by: Oliver Upton Signed-off-by: Marc Zyngier Signed-off-by: Yicong Yang Signed-off-by: Zhou Wang Signed-off-by: Will Deacon --- Documentation/virt/kvm/api.rst | 43 ++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 01a3abef8abb..bfa0ab343081 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1303,12 +1303,13 @@ userspace, for example because of missing instruction syndrome decode information or because there is no device mapped at the accessed IPA, then userspace can ask the kernel to inject an external abort using the address from the exiting fault on the VCPU. It is a programming error to set -ext_dabt_pending after an exit which was not either KVM_EXIT_MMIO or -KVM_EXIT_ARM_NISV. This feature is only available if the system supports -KVM_CAP_ARM_INJECT_EXT_DABT. This is a helper which provides commonality in -how userspace reports accesses for the above cases to guests, across different -userspace implementations. Nevertheless, userspace can still emulate all Arm -exceptions by manipulating individual registers using the KVM_SET_ONE_REG API. +ext_dabt_pending after an exit which was not either KVM_EXIT_MMIO, +KVM_EXIT_ARM_NISV, or KVM_EXIT_ARM_LDST64B. This feature is only available if +the system supports KVM_CAP_ARM_INJECT_EXT_DABT. This is a helper which +provides commonality in how userspace reports accesses for the above cases to +guests, across different userspace implementations. Nevertheless, userspace +can still emulate all Arm exceptions by manipulating individual registers +using the KVM_SET_ONE_REG API. See KVM_GET_VCPU_EVENTS for the data structure. @@ -7050,12 +7051,14 @@ in send_page or recv a buffer to recv_page). :: - /* KVM_EXIT_ARM_NISV */ + /* KVM_EXIT_ARM_NISV / KVM_EXIT_ARM_LDST64B */ struct { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; +- KVM_EXIT_ARM_NISV: + Used on arm64 systems. If a guest accesses memory not in a memslot, KVM will typically return to userspace and ask it to do MMIO emulation on its behalf. However, for certain classes of instructions, no instruction decode @@ -7089,6 +7092,32 @@ Note that although KVM_CAP_ARM_NISV_TO_USER will be reported if queried outside of a protected VM context, the feature will not be exposed if queried on a protected VM file descriptor. +- KVM_EXIT_ARM_LDST64B: + +Used on arm64 systems. When a guest using a LD64B, ST64B, ST64BV, ST64BV0, +outside of a memslot, KVM will return to userspace with KVM_EXIT_ARM_LDST64B, +exposing the relevant ESR_EL2 information and faulting IPA, similarly to +KVM_EXIT_ARM_NISV. + +Userspace is supposed to fully emulate the instructions, which includes: + + - fetch of the operands for a store, including ACCDATA_EL1 in the case + of a ST64BV0 instruction + - deal with the endianness if the guest is big-endian + - emulate the access, including the delivery of an exception if the + access didn't succeed + - provide a return value in the case of ST64BV/ST64BV0 + - return the data in the case of a load + - increment PC if the instruction was successfully executed + +Note that there is no expectation of performance for this emulation, as it +involves a large number of interaction with the guest state. It is, however, +expected that the instruction's semantics are preserved, specially the +single-copy atomicity property of the 64 byte access. + +This exit reason must be handled if userspace sets ID_AA64ISAR1_EL1.LS64 to a +non-zero value, indicating that FEAT_LS64* is enabled. + :: /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ From 2937aeec9dc5d25a02c1415a56d88ee4cc17ad83 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 19 Jan 2026 10:29:24 +0800 Subject: [PATCH 42/86] KVM: arm64: Handle DABT caused by LS64* instructions on unsupported memory If FEAT_LS64WB not supported, FEAT_LS64* instructions only support to access Device/Uncacheable memory, otherwise a data abort for unsupported Exclusive or atomic access (0x35, UAoEF) is generated per spec. It's implementation defined whether the target exception level is routed and is possible to implemented as route to EL2 on a VHE VM according to DDI0487L.b Section C3.2.6 Single-copy atomic 64-byte load/store. If it's implemented as generate the DABT to the final enabled stage (stage-2), inject the UAoEF back to the guest after checking the memslot is valid. Acked-by: Arnd Bergmann Acked-by: Oliver Upton Signed-off-by: Yicong Yang Signed-off-by: Zhou Wang Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 8 +++++++ arch/arm64/include/asm/kvm_emulate.h | 1 + arch/arm64/kvm/inject_fault.c | 34 ++++++++++++++++++++++++++++ arch/arm64/kvm/mmu.c | 14 +++++++++++- 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 4975a92cbd17..7e86d400864e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -124,6 +124,7 @@ #define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +#define ESR_ELx_FSC_EXCL_ATOMIC (0x35) #define ESR_ELx_FSC_ADDRSZ (0x00) /* @@ -488,6 +489,13 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) (esr == ESR_ELx_FSC_ACCESS_L(0)); } +static inline bool esr_fsc_is_excl_atomic_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return esr == ESR_ELx_FSC_EXCL_ATOMIC; +} + static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) { esr &= ESR_ELx_FSC; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index c9eab316398e..bab967d65715 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -47,6 +47,7 @@ void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr); int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); +int kvm_inject_dabt_excl_atomic(struct kvm_vcpu *vcpu, u64 addr); void kvm_inject_size_fault(struct kvm_vcpu *vcpu); static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index dfcd66c65517..6cc7ad84d7d8 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -253,6 +253,40 @@ int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) return 1; } +static int kvm_inject_nested_excl_atomic(struct kvm_vcpu *vcpu, u64 addr) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_DABT_LOW) | + FIELD_PREP(ESR_ELx_FSC, ESR_ELx_FSC_EXCL_ATOMIC) | + ESR_ELx_IL; + + vcpu_write_sys_reg(vcpu, addr, FAR_EL2); + return kvm_inject_nested_sync(vcpu, esr); +} + +/** + * kvm_inject_dabt_excl_atomic - inject a data abort for unsupported exclusive + * or atomic access + * @vcpu: The VCPU to receive the data abort + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +int kvm_inject_dabt_excl_atomic(struct kvm_vcpu *vcpu, u64 addr) +{ + u64 esr; + + if (is_nested_ctxt(vcpu) && (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_VM)) + return kvm_inject_nested_excl_atomic(vcpu, addr); + + __kvm_inject_sea(vcpu, false, addr); + esr = vcpu_read_sys_reg(vcpu, exception_esr_elx(vcpu)); + esr &= ~ESR_ELx_FSC; + esr |= ESR_ELx_FSC_EXCL_ATOMIC; + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); + return 1; +} + void kvm_inject_size_fault(struct kvm_vcpu *vcpu) { unsigned long addr, esr; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 48d7c372a4cd..edc348431d71 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1845,6 +1845,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return ret; } + /* + * Guest performs atomic/exclusive operations on memory with unsupported + * attributes (e.g. ld64b/st64b on normal memory when no FEAT_LS64WB) + * and trigger the exception here. Since the memslot is valid, inject + * the fault back to the guest. + */ + if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(vcpu))) { + kvm_inject_dabt_excl_atomic(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + if (nested) adjust_nested_fault_perms(nested, &prot, &writable); @@ -2082,7 +2093,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Check the stage-2 fault is trans. fault or write fault */ if (!esr_fsc_is_translation_fault(esr) && !esr_fsc_is_permission_fault(esr) && - !esr_fsc_is_access_flag_fault(esr)) { + !esr_fsc_is_access_flag_fault(esr) && + !esr_fsc_is_excl_atomic_fault(esr)) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), From dea58da4b6fede082d9f38ce069090fd6d43f4e2 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 19 Jan 2026 10:29:25 +0800 Subject: [PATCH 43/86] arm64: Provide basic EL2 setup for FEAT_{LS64, LS64_V} usage at EL0/1 Instructions introduced by FEAT_{LS64, LS64_V} is controlled by HCRX_EL2.{EnALS, EnASR}. Configure all of these to allow usage at EL0/1. This doesn't mean these instructions are always available in EL0/1 if provided. The hypervisor still have the control at runtime. Acked-by: Will Deacon Acked-by: Arnd Bergmann Acked-by: Oliver Upton Signed-off-by: Yicong Yang Signed-off-by: Zhou Wang Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index cacd20df1786..9393220de069 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -83,9 +83,19 @@ /* Enable GCS if supported */ mrs_s x1, SYS_ID_AA64PFR1_EL1 ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lset_hcrx_\@ + cbz x1, .Lskip_gcs_hcrx_\@ orr x0, x0, #HCRX_EL2_GCSEn +.Lskip_gcs_hcrx_\@: + /* Enable LS64, LS64_V if supported */ + mrs_s x1, SYS_ID_AA64ISAR1_EL1 + ubfx x1, x1, #ID_AA64ISAR1_EL1_LS64_SHIFT, #4 + cbz x1, .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnALS + cmp x1, #ID_AA64ISAR1_EL1_LS64_LS64_V + b.lt .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnASR + .Lset_hcrx_\@: msr_s SYS_HCRX_EL2, x0 .Lskip_hcrx_\@: From 151b92c92a45704216c37d6238efbffd84aac538 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 19 Jan 2026 10:29:26 +0800 Subject: [PATCH 44/86] KVM: arm64: Enable FEAT_{LS64, LS64_V} in the supported guest Using FEAT_{LS64, LS64_V} instructions in a guest is also controlled by HCRX_EL2.{EnALS, EnASR}. Enable it if guest has related feature. Acked-by: Arnd Bergmann Acked-by: Oliver Upton Signed-off-by: Yicong Yang Signed-off-by: Zhou Wang Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_emulate.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index bab967d65715..29291e25ecfd 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -695,6 +695,12 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu) if (kvm_has_sctlr2(kvm)) vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En; + + if (kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_EnALS; + + if (kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_EnASR; } } #endif /* __ARM64_KVM_EMULATE_H__ */ From 58ce78667a641f93afa0c152c700a1673383d323 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 19 Jan 2026 10:29:27 +0800 Subject: [PATCH 45/86] arm64: Add support for FEAT_{LS64, LS64_V} Armv8.7 introduces single-copy atomic 64-byte loads and stores instructions and its variants named under FEAT_{LS64, LS64_V}. These features are identified by ID_AA64ISAR1_EL1.LS64 and the use of such instructions in userspace (EL0) can be trapped. As st64bv (FEAT_LS64_V) and st64bv0 (FEAT_LS64_ACCDATA) can not be tell apart, FEAT_LS64 and FEAT_LS64_ACCDATA which will be supported in later patch will be exported to userspace, FEAT_LS64_V will be enabled only in kernel. In order to support the use of corresponding instructions in userspace: - Make ID_AA64ISAR1_EL1.LS64 visbile to userspace - Add identifying and enabling in the cpufeature list - Expose these support of these features to userspace through HWCAP3 and cpuinfo ld64b/st64b (FEAT_LS64) and st64bv (FEAT_LS64_V) is intended for special memory (device memory) so requires support by the CPU, system and target memory location (device that support these instructions). The HWCAP3_LS64, implies the support of CPU and system (since no identification method from system, so SoC vendors should advertise support in the CPU if system also support them). Otherwise for ld64b/st64b the atomicity may not be guaranteed or a DABT will be generated, so users (probably userspace driver developer) should make sure the target memory (device) also have the support. For st64bv 0xffffffffffffffff will be returned as status result for unsupported memory so user should check it. Document the restrictions along with HWCAP3_LS64. Acked-by: Arnd Bergmann Acked-by: Oliver Upton Signed-off-by: Yicong Yang Signed-off-by: Zhou Wang Signed-off-by: Will Deacon --- Documentation/arch/arm64/booting.rst | 12 +++++++++++ Documentation/arch/arm64/elf_hwcaps.rst | 7 +++++++ arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 28 +++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/tools/cpucaps | 2 ++ 7 files changed, 52 insertions(+) diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index 26efca09aef3..13ef311dace8 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -556,6 +556,18 @@ Before jumping into the kernel, the following conditions must be met: - MDCR_EL3.TPM (bit 6) must be initialized to 0b0 + For CPUs with support for 64-byte loads and stores without status (FEAT_LS64): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.EnALS (bit 1) must be initialised to 0b1. + + For CPUs with support for 64-byte stores with status (FEAT_LS64_V): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.EnASR (bit 2) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index a15df4956849..97315ae6c0da 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -444,6 +444,13 @@ HWCAP3_MTE_STORE_ONLY HWCAP3_LSFE Functionality implied by ID_AA64ISAR3_EL1.LSFE == 0b0001 +HWCAP3_LS64 + Functionality implied by ID_AA64ISAR1_EL1.LS64 == 0b0001. Note that + the function of instruction ld64b/st64b requires support by CPU, system + and target (device) memory location and HWCAP3_LS64 implies the support + of CPU. User should only use ld64b/st64b on supported target (device) + memory location, otherwise fallback to the non-atomic alternatives. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 1f63814ae6c4..72ea4bda79f3 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -179,6 +179,7 @@ #define KERNEL_HWCAP_MTE_FAR __khwcap3_feature(MTE_FAR) #define KERNEL_HWCAP_MTE_STORE_ONLY __khwcap3_feature(MTE_STORE_ONLY) #define KERNEL_HWCAP_LSFE __khwcap3_feature(LSFE) +#define KERNEL_HWCAP_LS64 __khwcap3_feature(LS64) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 575564ecdb0b..06f83ca8de56 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -146,5 +146,6 @@ #define HWCAP3_MTE_FAR (1UL << 0) #define HWCAP3_MTE_STORE_ONLY (1UL << 1) #define HWCAP3_LSFE (1UL << 2) +#define HWCAP3_LS64 (1UL << 3) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 716440d147a2..c688d2225c94 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -240,6 +240,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LS64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), @@ -2258,6 +2259,16 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_E0PD */ +static void cpu_enable_ls64(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnALS, SCTLR_EL1_EnALS); +} + +static void cpu_enable_ls64_v(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnASR, 0); +} + #ifdef CONFIG_ARM64_PSEUDO_NMI static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, int scope) @@ -3142,6 +3153,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP) }, + { + .desc = "LS64", + .capability = ARM64_HAS_LS64, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_ls64, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64) + }, + { + .desc = "LS64_V", + .capability = ARM64_HAS_LS64_V, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_ls64_v, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64_V) + }, {}, }; @@ -3261,6 +3288,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(ID_AA64ISAR1_EL1, LS64, LS64, CAP_HWCAP, KERNEL_HWCAP_LS64), HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT), HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX), HWCAP_CAP(ID_AA64ISAR3_EL1, LSFE, IMP, CAP_HWCAP, KERNEL_HWCAP_LSFE), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index c44e6d94f5de..6149bc91251d 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_PACA] = "paca", [KERNEL_HWCAP_PACG] = "pacg", [KERNEL_HWCAP_GCS] = "gcs", + [KERNEL_HWCAP_LS64] = "ls64", [KERNEL_HWCAP_DCPODP] = "dcpodp", [KERNEL_HWCAP_SVE2] = "sve2", [KERNEL_HWCAP_SVEAES] = "sveaes", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 0fac75f01534..50a0c695f340 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -46,6 +46,8 @@ HAS_HCX HAS_LDAPR HAS_LPA2 HAS_LSE_ATOMICS +HAS_LS64 +HAS_LS64_V HAS_MOPS HAS_NESTED_VIRT HAS_BBML2_NOABORT From c103c2dfe4975da31b67a0fcb95761359f30992d Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 22 Jan 2026 11:22:15 +0000 Subject: [PATCH 46/86] KVM: arm64: Remove dead code resetting HCR_EL2 for pKVM The pKVM lifecycle does not support tearing down the hypervisor and returning to the hyp stub once initialized. The transition to protected mode is one-way. Consequently, the code path in hyp-init.S responsible for resetting EL2 registers (triggered by kexec or hibernation) is unreachable in protected mode. Remove the dead code handling HCR_EL2 reset for ARM64_KVM_PROTECTED_MODE. No functional change intended. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260122112218.531948-2-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index aada42522e7b..0d42eedc7167 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -260,11 +260,6 @@ reset: msr sctlr_el2, x5 isb -alternative_if ARM64_KVM_PROTECTED_MODE - mov_q x5, HCR_HOST_NVHE_FLAGS - msr_hcr_el2 x5 -alternative_else_nop_endif - /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 From f35abcbb8a084db4c24b66ccc8db0405c08e2f61 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 22 Jan 2026 11:22:16 +0000 Subject: [PATCH 47/86] KVM: arm64: Trap MTE access and discovery when MTE is disabled If MTE is not supported by the hardware, or is disabled in the kernel configuration (`CONFIG_ARM64_MTE=n`) or command line (`arm64.nomte`), the kernel stops advertising MTE to userspace and avoids using MTE instructions. However, this is a software-level disable only. When MTE hardware is present and enabled by EL3 firmware, leaving `HCR_EL2.ATA` set allows the host to execute MTE instructions (STG, LDG, etc.) and access allocation tags in physical memory. Prevent this by clearing `HCR_EL2.ATA` when MTE is disabled. Remove it from the `HCR_HOST_NVHE_FLAGS` default, and conditionally set it in `cpu_prepare_hyp_mode()` only when `system_supports_mte()` returns true. This causes MTE instructions to trap to EL2 when `HCR_EL2.ATA` is cleared. Additionally, set `HCR_EL2.TID5` when MTE is disabled. This traps reads of `GMID_EL1` (Multiple tag transfer ID register) to EL2, preventing the discovery of MTE parameters (such as tag block size) when the feature is suppressed. Early boot code in `head.S` temporarily keeps `HCR_ATA` set to avoid special-casing initialization paths. This is safe because this code executes before untrusted code runs and will clear `HCR_ATA` if MTE is disabled. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260122112218.531948-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kernel/head.S | 2 +- arch/arm64/kvm/arm.c | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e500600e4b9b..752e3e1604e8 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -101,7 +101,7 @@ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) -#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ca04b338cb0d..87a822e5c4ca 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -299,7 +299,7 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) isb 0: - init_el2_hcr HCR_HOST_NVHE_FLAGS + init_el2_hcr HCR_HOST_NVHE_FLAGS | HCR_ATA init_el2_state /* Hypervisor stub */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4f80da0c0d1d..aeac113e5e74 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2044,6 +2044,12 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; else params->hcr_el2 = HCR_HOST_NVHE_FLAGS; + + if (system_supports_mte()) + params->hcr_el2 |= HCR_ATA; + else + params->hcr_el2 |= HCR_TID5; + if (cpus_have_final_cap(ARM64_KVM_HVHE)) params->hcr_el2 |= HCR_E2H; params->vttbr = params->vtcr = 0; From 5ee8ad69da07d0e2cffa0ce2f2339c9ad2d587f2 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 22 Jan 2026 11:22:17 +0000 Subject: [PATCH 48/86] KVM: arm64: Inject UNDEF when accessing MTE sysregs with MTE disabled When MTE hardware is present but disabled via software (`arm64.nomte` or `CONFIG_ARM64_MTE=n`), the kernel clears `HCR_EL2.ATA` and sets `HCR_EL2.TID5`, to prevent the use of MTE instructions. Additionally, accesses to certain MTE system registers trap to EL2 with exception class ESR_ELx_EC_SYS64. To emulate hardware without MTE (where such accesses would cause an Undefined Instruction exception), inject UNDEF into the host. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260122112218.531948-4-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 67 ++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a7c689152f68..faed1b38e6cc 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -687,6 +687,69 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) kvm_skip_host_instr(); } +/* + * Inject an Undefined Instruction exception into the host. + * + * This is open-coded to allow control over PSTATE construction without + * complicating the generic exception entry helpers. + */ +static void inject_undef64(void) +{ + u64 spsr_mask, vbar, sctlr, old_spsr, new_spsr, esr, offset; + + spsr_mask = PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT | PSR_DIT_BIT | PSR_PAN_BIT; + + vbar = read_sysreg_el1(SYS_VBAR); + sctlr = read_sysreg_el1(SYS_SCTLR); + old_spsr = read_sysreg_el2(SYS_SPSR); + + new_spsr = old_spsr & spsr_mask; + new_spsr |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT; + new_spsr |= PSR_MODE_EL1h; + + if (!(sctlr & SCTLR_EL1_SPAN)) + new_spsr |= PSR_PAN_BIT; + + if (sctlr & SCTLR_ELx_DSSBS) + new_spsr |= PSR_SSBS_BIT; + + if (system_supports_mte()) + new_spsr |= PSR_TCO_BIT; + + esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT) | ESR_ELx_IL; + offset = CURRENT_EL_SP_ELx_VECTOR + except_type_sync; + + write_sysreg_el1(esr, SYS_ESR); + write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); + write_sysreg_el1(old_spsr, SYS_SPSR); + write_sysreg_el2(vbar + offset, SYS_ELR); + write_sysreg_el2(new_spsr, SYS_SPSR); +} + +static bool handle_host_mte(u64 esr) +{ + switch (esr_sys64_to_sysreg(esr)) { + case SYS_RGSR_EL1: + case SYS_GCR_EL1: + case SYS_TFSR_EL1: + case SYS_TFSRE0_EL1: + /* If we're here for any reason other than MTE, it's a bug. */ + if (read_sysreg(HCR_EL2) & HCR_ATA) + return false; + break; + case SYS_GMID_EL1: + /* If we're here for any reason other than MTE, it's a bug. */ + if (!(read_sysreg(HCR_EL2) & HCR_TID5)) + return false; + break; + default: + return false; + } + + inject_undef64(); + return true; +} + void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); @@ -702,6 +765,10 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); break; + case ESR_ELx_EC_SYS64: + if (handle_host_mte(esr)) + break; + fallthrough; default: BUG(); } From 230b080623fec2e1302df2afe2cf2dcb34f9c89b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 22 Jan 2026 11:22:18 +0000 Subject: [PATCH 49/86] KVM: arm64: Use kvm_has_mte() in pKVM trap initialization When initializing HCR traps in protected mode, use kvm_has_mte() to check for MTE support rather than kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP). kvm_has_mte() provides a more comprehensive check: - kvm_has_feat() only checks if MTE is in the guest's ID register view (i.e., what we advertise to the guest) - kvm_has_mte() checks both system_supports_mte() AND whether KVM_ARCH_FLAG_MTE_ENABLED is set for this VM instance Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260122112218.531948-5-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 8911338961c5..9b933424e70a 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -82,7 +82,7 @@ static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu) if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) val &= ~(HCR_AMVOFFEN); - if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) { + if (!kvm_has_mte(kvm)) { val |= HCR_TID5; val &= ~(HCR_DCT | HCR_ATA); } From d4236f1ef270347b26ed34bcd0eb28d285b86bcb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Jan 2026 19:16:33 +0000 Subject: [PATCH 50/86] arm64: Add MT_S2{,_FWB}_AS_S1 encodings pKVM usage of S2 translation on the host is purely for isolation purposes, not translation. To that effect, the memory attributes being used must be that of S1. With FWB=0, this is easily achieved by using the Normal Cacheable type (which is the weakest possible memory type) at S2, and let S1 pick something stronger as required. With FWB=1, the attributes are combined in a different way, and we cannot arbitrarily use Normal Cacheable. We can, however, use a memattr encoding that indicates that the final attributes are that of Stage-1. Add these encoding and a few pointers to the relevant parts of the specification. It might come handy some day. Reviewed-by: Joey Gouly Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260123191637.715429-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/memory.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 9d54b2ea49d6..a2b7a33966ff 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -175,19 +175,24 @@ #define MT_DEVICE_nGnRE 4 /* - * Memory types for Stage-2 translation + * Memory types for Stage-2 translation when HCR_EL2.FWB=0. See R_HMNDG, + * R_TNHFM, R_GQFSF and I_MCQKW for the details on how these attributes get + * combined with Stage-1. */ #define MT_S2_NORMAL 0xf #define MT_S2_NORMAL_NC 0x5 #define MT_S2_DEVICE_nGnRE 0x1 +#define MT_S2_AS_S1 MT_S2_NORMAL /* - * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001 - * Stage-2 enforces Normal-WB and Device-nGnRE + * Memory types for Stage-2 translation when HCR_EL2.FWB=1. Stage-2 enforces + * Normal-WB and Device-nGnRE, unless we actively say that S1 wins. See + * R_VRJSW and R_RHWZM for details. */ #define MT_S2_FWB_NORMAL 6 #define MT_S2_FWB_NORMAL_NC 5 #define MT_S2_FWB_DEVICE_nGnRE 1 +#define MT_S2_FWB_AS_S1 7 #ifdef CONFIG_ARM64_4K_PAGES #define IOREMAP_MAX_ORDER (PUD_SHIFT) From 17d7b15131f3ed7fff29f2a43b6ead547e495653 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Jan 2026 19:16:34 +0000 Subject: [PATCH 51/86] KVM: arm64: Add KVM_PGTABLE_S2_AS_S1 flag Plumb the MT_S2{,_FWB}_AS_S1 memory types into the KVM_S2_MEMATTR() macro with a new KVM_PGTABLE_S2_AS_S1 flag. Nobody selects it yet. Reviewed-by: Joey Gouly Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260123191637.715429-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_pgtable.h | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index fc02de43c68d..9ce51a637da0 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -232,10 +232,12 @@ struct kvm_pgtable_mm_ops { * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have * ARM64_HAS_STAGE2_FWB. * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings. + * @KVM_PGTABLE_S2_AS_S1: Final memory attributes are that of Stage-1. */ enum kvm_pgtable_stage2_flags { KVM_PGTABLE_S2_NOFWB = BIT(0), KVM_PGTABLE_S2_IDMAP = BIT(1), + KVM_PGTABLE_S2_AS_S1 = BIT(2), }; /** diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 947ac1a951a5..c52a24c15ff2 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -659,7 +659,19 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, } } -#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) +#define KVM_S2_MEMATTR(pgt, attr) \ + ({ \ + kvm_pte_t __attr; \ + \ + if ((pgt)->flags & KVM_PGTABLE_S2_AS_S1) \ + __attr = PAGE_S2_MEMATTR(AS_S1, \ + stage2_has_fwb(pgt)); \ + else \ + __attr = PAGE_S2_MEMATTR(attr, \ + stage2_has_fwb(pgt)); \ + \ + __attr; \ + }) static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr) { From a373930ec9406aeb9f82b5b78db56ca8d0919d99 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Jan 2026 19:16:35 +0000 Subject: [PATCH 52/86] KVM: arm64: Switch pKVM host S2 over to KVM_PGTABLE_S2_AS_S1 Since we have the basics to use the S1 memory attributes as the final ones with FWB, flip the host over to that when FWB is present. Reviewed-by: Joey Gouly Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Link: https://patch.msgid.link/20260123191637.715429-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 49db32f3ddf7..38f66a56a766 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -19,7 +19,7 @@ #include #include -#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) +#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP) struct host_mmu host_mmu; @@ -324,6 +324,8 @@ int __pkvm_prot_finalize(void) params->vttbr = kvm_get_vttbr(mmu); params->vtcr = mmu->vtcr; params->hcr_el2 |= HCR_VM; + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) + params->hcr_el2 |= HCR_FWB; /* * The CMO below not only cleans the updated params to the From 4f27fe82aa304c50b24f92da72b4895cf73b54ba Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Jan 2026 19:16:36 +0000 Subject: [PATCH 53/86] KVM: arm64: Kill KVM_PGTABLE_S2_NOFWB Nobody is using this flag anymore, so remove it. This allows some cleanup by removing stage2_has_fwb(), which is can be replaced by a direct check on the capability. Reviewed-by: Joey Gouly Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260123191637.715429-5-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_pgtable.h | 7 ++----- arch/arm64/kvm/hyp/pgtable.c | 21 ++++++--------------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 9ce51a637da0..2198b6242883 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -229,15 +229,12 @@ struct kvm_pgtable_mm_ops { /** * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags. - * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have - * ARM64_HAS_STAGE2_FWB. * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings. * @KVM_PGTABLE_S2_AS_S1: Final memory attributes are that of Stage-1. */ enum kvm_pgtable_stage2_flags { - KVM_PGTABLE_S2_NOFWB = BIT(0), - KVM_PGTABLE_S2_IDMAP = BIT(1), - KVM_PGTABLE_S2_AS_S1 = BIT(2), + KVM_PGTABLE_S2_IDMAP = BIT(0), + KVM_PGTABLE_S2_AS_S1 = BIT(1), }; /** diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c52a24c15ff2..00e33a16494b 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -631,14 +631,6 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) return vtcr; } -static bool stage2_has_fwb(struct kvm_pgtable *pgt) -{ - if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) - return false; - - return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); -} - void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, size_t size) { @@ -661,14 +653,13 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, #define KVM_S2_MEMATTR(pgt, attr) \ ({ \ + bool __fwb = cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); \ kvm_pte_t __attr; \ \ if ((pgt)->flags & KVM_PGTABLE_S2_AS_S1) \ - __attr = PAGE_S2_MEMATTR(AS_S1, \ - stage2_has_fwb(pgt)); \ + __attr = PAGE_S2_MEMATTR(AS_S1, __fwb); \ else \ - __attr = PAGE_S2_MEMATTR(attr, \ - stage2_has_fwb(pgt)); \ + __attr = PAGE_S2_MEMATTR(attr, __fwb); \ \ __attr; \ }) @@ -880,7 +871,7 @@ static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt) * system supporting FWB as the optimization is entirely * pointless when the unmap walker needs to perform CMOs. */ - return system_supports_tlb_range() && stage2_has_fwb(pgt); + return system_supports_tlb_range() && cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); } static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, @@ -1160,7 +1151,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, if (mm_ops->page_count(childp) != 1) return 0; } else if (stage2_pte_cacheable(pgt, ctx->old)) { - need_flush = !stage2_has_fwb(pgt); + need_flush = !cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); } /* @@ -1390,7 +1381,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) .arg = pgt, }; - if (stage2_has_fwb(pgt)) + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) return 0; return kvm_pgtable_walk(pgt, addr, size, &walker); From 65d00e37b17f46a21e7a25e0fb211a4c33274a83 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Jan 2026 19:16:37 +0000 Subject: [PATCH 54/86] KVM: arm64: Simplify PAGE_S2_MEMATTR Restore PAGE_S2_MEMATTR() to its former glory, keeping the use of FWB as an implementation detail. Reviewed-by: Joey Gouly Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260123191637.715429-6-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/pgtable-prot.h | 4 ++-- arch/arm64/kvm/hyp/pgtable.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 161e8660eddd..d27e8872fe3c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -109,10 +109,10 @@ static inline bool __pure lpa2_is_enabled(void) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT) -#define PAGE_S2_MEMATTR(attr, has_fwb) \ +#define PAGE_S2_MEMATTR(attr) \ ({ \ u64 __val; \ - if (has_fwb) \ + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) \ __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \ else \ __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 00e33a16494b..a6f7533ce497 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -653,13 +653,12 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, #define KVM_S2_MEMATTR(pgt, attr) \ ({ \ - bool __fwb = cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); \ kvm_pte_t __attr; \ \ if ((pgt)->flags & KVM_PGTABLE_S2_AS_S1) \ - __attr = PAGE_S2_MEMATTR(AS_S1, __fwb); \ + __attr = PAGE_S2_MEMATTR(AS_S1); \ else \ - __attr = PAGE_S2_MEMATTR(attr, __fwb); \ + __attr = PAGE_S2_MEMATTR(attr); \ \ __attr; \ }) From da63758c1876d899031066a9d4b8050af767ceb8 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Mon, 8 Dec 2025 15:28:22 +0000 Subject: [PATCH 55/86] KVM: arm64: gic: Enable GICv3 CPUIF trapping on GICv5 hosts if required Factor out the enable (and printing of) the GICv3 CPUIF traps from the main GICv3 probe into a separate function. Call said function from the GICv5 probe for legacy support, ensuring that any required GICv3 CPUIF traps on GICv5 hosts will be correctly handled, rather than injecting an undef into the guest. Signed-off-by: Sascha Bischoff Link: https://patch.msgid.link/20251208152724.3637157-3-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v3.c | 25 +++++++++++++++---------- arch/arm64/kvm/vgic/vgic-v5.c | 2 ++ arch/arm64/kvm/vgic/vgic.h | 1 + 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 1d6dd1b545bd..4b03a726f164 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -880,6 +880,20 @@ void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt, *updptr = cpu_to_le32(insn); } +void vgic_v3_enable_cpuif_traps(void) +{ + u64 traps = vgic_ich_hcr_trap_bits(); + + if (traps) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", + (traps & ICH_HCR_EL2_TALL0) ? "G0" : "", + (traps & ICH_HCR_EL2_TALL1) ? "G1" : "", + (traps & ICH_HCR_EL2_TC) ? "C" : "", + (traps & ICH_HCR_EL2_TDIR) ? "D" : ""); + static_branch_enable(&vgic_v3_cpuif_trap); + } +} + /** * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller * @info: pointer to the GIC description @@ -891,7 +905,6 @@ int vgic_v3_probe(const struct gic_kvm_info *info) { u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); bool has_v2; - u64 traps; int ret; has_v2 = ich_vtr_el2 >> 63; @@ -955,15 +968,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS; } - traps = vgic_ich_hcr_trap_bits(); - if (traps) { - kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", - (traps & ICH_HCR_EL2_TALL0) ? "G0" : "", - (traps & ICH_HCR_EL2_TALL1) ? "G1" : "", - (traps & ICH_HCR_EL2_TC) ? "C" : "", - (traps & ICH_HCR_EL2_TDIR) ? "D" : ""); - static_branch_enable(&vgic_v3_cpuif_trap); - } + vgic_v3_enable_cpuif_traps(); kvm_vgic_global_state.vctrl_base = NULL; kvm_vgic_global_state.type = VGIC_V3; diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index 2d3811f4e117..331651087e2c 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -48,5 +48,7 @@ int vgic_v5_probe(const struct gic_kvm_info *info) static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); kvm_info("GCIE legacy system register CPU interface\n"); + vgic_v3_enable_cpuif_traps(); + return 0; } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 5f0fc96b4dc2..c9b3bb07e483 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -324,6 +324,7 @@ void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als); void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_reset(struct kvm_vcpu *vcpu); +void vgic_v3_enable_cpuif_traps(void); int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); From 28e505d81766dcbe25c60d57ab9fc941cd3d38bf Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Mon, 8 Dec 2025 15:28:23 +0000 Subject: [PATCH 56/86] KVM: arm64: Correct test for ICH_HCR_EL2_TDIR cap for GICv5 hosts The original order of checks in the ICH_HCR_EL2_TDIR test returned with false early in the case where the native GICv3 CPUIF was not present. The result was that on GICv5 hosts with legacy support - which do not have the GICv3 CPUIF - the test always returned false. Reshuffle the checks such that support for GICv5 legacy is checked prior to checking for the native GICv3 CPUIF. Signed-off-by: Sascha Bischoff Fixes: 2a28810cbb8b2 ("KVM: arm64: GICv3: Detect and work around the lack of ICV_DIR_EL1 trapping") Link: https://patch.msgid.link/20251208152724.3637157-4-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kernel/cpufeature.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c840a93b9ef9..65aaea68adaa 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2326,16 +2326,16 @@ static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry, BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF); BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY); - if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) && - !is_midr_in_range_list(has_vgic_v3)) - return false; - if (!is_hyp_mode_available()) return false; if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY)) return true; + if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) && + !is_midr_in_range_list(has_vgic_v3)) + return false; + if (is_kernel_in_hyp_mode()) res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2); else From 9d46e83a72392a644604458448a72d7c45977f0f Mon Sep 17 00:00:00 2001 From: "Zenghui Yu (Huawei)" Date: Fri, 30 Jan 2026 17:44:35 +0800 Subject: [PATCH 57/86] KVM: arm64: nv: Add trap config for DBGWCR<15>_EL1 Seems that it was missed when MDCR_EL2 was first added to the trap forwarding infrastructure. Add it back. Fixes: cb31632c4452 ("KVM: arm64: nv: Add trap forwarding for MDCR_EL2") Signed-off-by: Zenghui Yu (Huawei) Link: https://patch.msgid.link/20260130094435.39942-1-zenghui.yu@linux.dev Signed-off-by: Marc Zyngier --- arch/arm64/kvm/emulate-nested.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 834f13fb1fb7..34a5460adaf0 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -1166,6 +1166,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_DBGWCRn_EL1(12), CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGWCRn_EL1(13), CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGWCRn_EL1(14), CGT_MDCR_TDE_TDA), + SR_TRAP(SYS_DBGWCRn_EL1(15), CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGCLAIMSET_EL1, CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGCLAIMCLR_EL1, CGT_MDCR_TDE_TDA), SR_TRAP(SYS_DBGAUTHSTATUS_EL1, CGT_MDCR_TDE_TDA), From 82a32eacbacc6f7e372f98999e5ee1ee0dd7462d Mon Sep 17 00:00:00 2001 From: "Zenghui Yu (Huawei)" Date: Wed, 28 Jan 2026 15:52:08 +0800 Subject: [PATCH 58/86] KVM: arm64: Fix various comments Use tab instead of whitespaces, as well as 2 minor typo fixes. Signed-off-by: Zenghui Yu (Huawei) Link: https://patch.msgid.link/20260128075208.23024-1-zenghui.yu@linux.dev Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 2 +- arch/arm64/kvm/vgic/vgic-v3-nested.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4c8b4274f669..fa01877a7ba1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -201,7 +201,7 @@ struct kvm_s2_mmu { * host to parse the guest S2. * This either contains: * - the virtual VTTBR programmed by the guest hypervisor with - * CnP cleared + * CnP cleared * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid * * We also cache the full VTCR which gets used for TLB invalidation, diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index f28c6cf4fe1b..b254d442e54e 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -205,7 +205,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) /* * When running a normal EL1 guest, we only load a new vcpu - * after a context switch, which imvolves a DSB, so all + * after a context switch, which involves a DSB, so all * speculative EL1&0 walks will have already completed. * If running NV, the vcpu may transition between vEL1 and * vEL2 without a context switch, so make sure we complete diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index 61b44f3f2bf1..a2ccef116483 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -57,7 +57,7 @@ static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx) * as the L1 guest is in charge of provisioning the interrupts via its own * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR * page. This means that the flow described above does work (there is no - * state to rebuild in the L0 hypervisor), and that most things happed on L2 + * state to rebuild in the L0 hypervisor), and that most things happen on L2 * load/put: * * - on L2 load: move the in-memory L1 vGIC configuration into a shadow, From 4a03431b742b4edc24fe1a14d355de1df6d80f86 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 28 Jan 2026 17:59:50 +0000 Subject: [PATCH 59/86] KVM: arm64: gic-v3: Switch vGIC-v3 to use generated ICH_VMCR_EL2 The VGIC-v3 code relied on hand-written definitions for the ICH_VMCR_EL2 register. This register, and the associated fields, is now generated as part of the sysreg framework. Move to using the generated definitions instead of the hand-written ones. There are no functional changes as part of this change. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260128175919.3828384-3-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 21 --------- arch/arm64/kvm/hyp/vgic-v3-sr.c | 69 ++++++++++------------------ arch/arm64/kvm/vgic/vgic-v3-nested.c | 8 ++-- arch/arm64/kvm/vgic/vgic-v3.c | 48 +++++++++---------- 4 files changed, 51 insertions(+), 95 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9df51accbb02..b3b8b8cd7bf1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -560,7 +560,6 @@ #define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) #define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) -#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) #define SYS_ICH_LR0_EL2 __SYS__LR0_EL2(0) @@ -988,26 +987,6 @@ #define ICH_LR_PRIORITY_SHIFT 48 #define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) -/* ICH_VMCR_EL2 bit definitions */ -#define ICH_VMCR_ACK_CTL_SHIFT 2 -#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) -#define ICH_VMCR_FIQ_EN_SHIFT 3 -#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) -#define ICH_VMCR_CBPR_SHIFT 4 -#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) -#define ICH_VMCR_EOIM_SHIFT 9 -#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) -#define ICH_VMCR_BPR1_SHIFT 18 -#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) -#define ICH_VMCR_BPR0_SHIFT 21 -#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) -#define ICH_VMCR_PMR_SHIFT 24 -#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) -#define ICH_VMCR_ENG0_SHIFT 0 -#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) -#define ICH_VMCR_ENG1_SHIFT 1 -#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) - /* * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 0b670a033fd8..c4d2f1feea8b 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -569,11 +569,11 @@ static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, continue; /* Group-0 interrupt, but Group-0 disabled? */ - if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) + if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_EL2_VENG0_MASK)) continue; /* Group-1 interrupt, but Group-1 disabled? */ - if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) + if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_EL2_VENG1_MASK)) continue; /* Not the highest priority? */ @@ -646,19 +646,19 @@ static int __vgic_v3_get_highest_active_priority(void) static unsigned int __vgic_v3_get_bpr0(u32 vmcr) { - return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + return FIELD_GET(ICH_VMCR_EL2_VBPR0, vmcr); } static unsigned int __vgic_v3_get_bpr1(u32 vmcr) { unsigned int bpr; - if (vmcr & ICH_VMCR_CBPR_MASK) { + if (vmcr & ICH_VMCR_EL2_VCBPR_MASK) { bpr = __vgic_v3_get_bpr0(vmcr); if (bpr < 7) bpr++; } else { - bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + bpr = FIELD_GET(ICH_VMCR_EL2_VBPR1, vmcr); } return bpr; @@ -758,7 +758,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) if (grp != !!(lr_val & ICH_LR_GROUP)) goto spurious; - pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + pmr = FIELD_GET(ICH_VMCR_EL2_VPMR, vmcr); lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; if (pmr <= lr_prio) goto spurious; @@ -806,7 +806,7 @@ static int ___vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) int lr; /* EOImode == 0, nothing to be done here */ - if (!(vmcr & ICH_VMCR_EOIM_MASK)) + if (!(vmcr & ICH_VMCR_EL2_VEOIM_MASK)) return 1; /* No deactivate to be performed on an LPI */ @@ -849,7 +849,7 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) } /* EOImode == 1 and not an LPI, nothing to be done here */ - if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI)) + if ((vmcr & ICH_VMCR_EL2_VEOIM_MASK) && !(vid >= VGIC_MIN_LPI)) return; lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; @@ -865,22 +865,19 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { - vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); + vcpu_set_reg(vcpu, rt, FIELD_GET(ICH_VMCR_EL2_VENG0, vmcr)); } static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { - vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); + vcpu_set_reg(vcpu, rt, FIELD_GET(ICH_VMCR_EL2_VENG1, vmcr)); } static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); - if (val & 1) - vmcr |= ICH_VMCR_ENG0_MASK; - else - vmcr &= ~ICH_VMCR_ENG0_MASK; + FIELD_MODIFY(ICH_VMCR_EL2_VENG0, &vmcr, val & 1); __vgic_v3_write_vmcr(vmcr); } @@ -889,10 +886,7 @@ static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); - if (val & 1) - vmcr |= ICH_VMCR_ENG1_MASK; - else - vmcr &= ~ICH_VMCR_ENG1_MASK; + FIELD_MODIFY(ICH_VMCR_EL2_VENG1, &vmcr, val & 1); __vgic_v3_write_vmcr(vmcr); } @@ -916,10 +910,7 @@ static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) if (val < bpr_min) val = bpr_min; - val <<= ICH_VMCR_BPR0_SHIFT; - val &= ICH_VMCR_BPR0_MASK; - vmcr &= ~ICH_VMCR_BPR0_MASK; - vmcr |= val; + FIELD_MODIFY(ICH_VMCR_EL2_VBPR0, &vmcr, val); __vgic_v3_write_vmcr(vmcr); } @@ -929,17 +920,14 @@ static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min(); - if (vmcr & ICH_VMCR_CBPR_MASK) + if (FIELD_GET(ICH_VMCR_EL2_VCBPR, val)) return; /* Enforce BPR limiting */ if (val < bpr_min) val = bpr_min; - val <<= ICH_VMCR_BPR1_SHIFT; - val &= ICH_VMCR_BPR1_MASK; - vmcr &= ~ICH_VMCR_BPR1_MASK; - vmcr |= val; + FIELD_MODIFY(ICH_VMCR_EL2_VBPR1, &vmcr, val); __vgic_v3_write_vmcr(vmcr); } @@ -1029,19 +1017,14 @@ spurious: static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { - vmcr &= ICH_VMCR_PMR_MASK; - vmcr >>= ICH_VMCR_PMR_SHIFT; - vcpu_set_reg(vcpu, rt, vmcr); + vcpu_set_reg(vcpu, rt, FIELD_GET(ICH_VMCR_EL2_VPMR, vmcr)); } static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); - val <<= ICH_VMCR_PMR_SHIFT; - val &= ICH_VMCR_PMR_MASK; - vmcr &= ~ICH_VMCR_PMR_MASK; - vmcr |= val; + FIELD_MODIFY(ICH_VMCR_EL2_VPMR, &vmcr, val); write_gicreg(vmcr, ICH_VMCR_EL2); } @@ -1064,9 +1047,11 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ - val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT; + val |= FIELD_PREP(ICC_CTLR_EL1_EOImode_MASK, + FIELD_GET(ICH_VMCR_EL2_VEOIM, vmcr)); /* CBPR */ - val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + val |= FIELD_PREP(ICC_CTLR_EL1_CBPR_MASK, + FIELD_GET(ICH_VMCR_EL2_VCBPR, vmcr)); vcpu_set_reg(vcpu, rt, val); } @@ -1075,15 +1060,11 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); - if (val & ICC_CTLR_EL1_CBPR_MASK) - vmcr |= ICH_VMCR_CBPR_MASK; - else - vmcr &= ~ICH_VMCR_CBPR_MASK; + FIELD_MODIFY(ICH_VMCR_EL2_VCBPR, &vmcr, + FIELD_GET(ICC_CTLR_EL1_CBPR_MASK, val)); - if (val & ICC_CTLR_EL1_EOImode_MASK) - vmcr |= ICH_VMCR_EOIM_MASK; - else - vmcr &= ~ICH_VMCR_EOIM_MASK; + FIELD_MODIFY(ICH_VMCR_EL2_VEOIM, &vmcr, + FIELD_GET(ICC_CTLR_EL1_EOImode_MASK, val)); write_gicreg(vmcr, ICH_VMCR_EL2); } diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index 61b44f3f2bf1..c9e35ec67117 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -202,16 +202,16 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend) reg |= ICH_MISR_EL2_NP; - if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_ENG0_MASK)) + if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_EL2_VENG0_MASK)) reg |= ICH_MISR_EL2_VGrp0E; - if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_ENG0_MASK)) + if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_EL2_VENG0_MASK)) reg |= ICH_MISR_EL2_VGrp0D; - if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_ENG1_MASK)) + if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_EL2_VENG1_MASK)) reg |= ICH_MISR_EL2_VGrp1E; - if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_ENG1_MASK)) + if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_EL2_VENG1_MASK)) reg |= ICH_MISR_EL2_VGrp1D; return reg; diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 1d6dd1b545bd..2afc04167231 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -41,9 +41,9 @@ void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, if (!als->nr_sgi) cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount; - cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ? + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_EL2_VENG0_MASK) ? ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE; - cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ? + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_EL2_VENG1_MASK) ? ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE; /* @@ -215,7 +215,7 @@ void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val) * We only deal with DIR when EOIMode==1, and only for SGI, * PPI or SPI. */ - if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) || + if (!(cpuif->vgic_vmcr & ICH_VMCR_EL2_VEOIM_MASK) || val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS) return; @@ -408,25 +408,23 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) u32 vmcr; if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { - vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) & - ICH_VMCR_ACK_CTL_MASK; - vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) & - ICH_VMCR_FIQ_EN_MASK; + vmcr = FIELD_PREP(ICH_VMCR_EL2_VAckCtl, vmcrp->ackctl); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VFIQEn, vmcrp->fiqen); } else { /* * When emulating GICv3 on GICv3 with SRE=1 on the * VFIQEn bit is RES1 and the VAckCtl bit is RES0. */ - vmcr = ICH_VMCR_FIQ_EN_MASK; + vmcr = ICH_VMCR_EL2_VFIQEn_MASK; } - vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; - vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; - vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; - vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; - vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; - vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; - vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VCBPR, vmcrp->cbpr); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VEOIM, vmcrp->eoim); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VBPR1, vmcrp->abpr); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VBPR0, vmcrp->bpr); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VPMR, vmcrp->pmr); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VENG0, vmcrp->grpen0); + vmcr |= FIELD_PREP(ICH_VMCR_EL2_VENG1, vmcrp->grpen1); cpu_if->vgic_vmcr = vmcr; } @@ -440,10 +438,8 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vmcr = cpu_if->vgic_vmcr; if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { - vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >> - ICH_VMCR_ACK_CTL_SHIFT; - vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >> - ICH_VMCR_FIQ_EN_SHIFT; + vmcrp->ackctl = FIELD_GET(ICH_VMCR_EL2_VAckCtl, vmcr); + vmcrp->fiqen = FIELD_GET(ICH_VMCR_EL2_VFIQEn, vmcr); } else { /* * When emulating GICv3 on GICv3 with SRE=1 on the @@ -453,13 +449,13 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vmcrp->ackctl = 0; } - vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; - vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT; - vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; - vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; - vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; - vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; - vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; + vmcrp->cbpr = FIELD_GET(ICH_VMCR_EL2_VCBPR, vmcr); + vmcrp->eoim = FIELD_GET(ICH_VMCR_EL2_VEOIM, vmcr); + vmcrp->abpr = FIELD_GET(ICH_VMCR_EL2_VBPR1, vmcr); + vmcrp->bpr = FIELD_GET(ICH_VMCR_EL2_VBPR0, vmcr); + vmcrp->pmr = FIELD_GET(ICH_VMCR_EL2_VPMR, vmcr); + vmcrp->grpen0 = FIELD_GET(ICH_VMCR_EL2_VENG0, vmcr); + vmcrp->grpen1 = FIELD_GET(ICH_VMCR_EL2_VENG1, vmcr); } #define INITIAL_PENDBASER_VALUE \ From b583177aafe3ca753ddd3624c8731a93d0cd0b37 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 28 Jan 2026 18:00:06 +0000 Subject: [PATCH 60/86] arm64/sysreg: Drop ICH_HFGRTR_EL2.ICC_HAPR_EL1 and make RES1 The GICv5 architecture is dropping the ICC_HAPR_EL1 and ICV_HAPR_EL1 system registers. These registers were never added to the sysregs, but the traps for them were. Drop the trap bit from the ICH_HFGRTR_EL2 and make it Res1 as per the upcoming GICv5 spec change. Additionally, update the EL2 setup code to not attempt to set that bit. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260128175919.3828384-4-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/el2_setup.h | 1 - arch/arm64/tools/sysreg | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index cacd20df1786..07c12f4a69b4 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -225,7 +225,6 @@ ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \ ICH_HFGRTR_EL2_ICC_PCR_EL1 | \ ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \ - ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \ ICH_HFGRTR_EL2_ICC_CR0_EL1 | \ ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \ ICH_HFGRTR_EL2_ICC_APR_EL1) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 8921b51866d6..dab5bfe8c968 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -4579,7 +4579,7 @@ Field 7 ICC_IAFFIDR_EL1 Field 6 ICC_ICSR_EL1 Field 5 ICC_PCR_EL1 Field 4 ICC_HPPIR_EL1 -Field 3 ICC_HAPR_EL1 +Res1 3 Field 2 ICC_CR0_EL1 Field 1 ICC_IDRn_EL1 Field 0 ICC_APR_EL1 From 9435c1e1431003e23aa34ef8e46c30d09c3dbcb5 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 28 Jan 2026 18:00:52 +0000 Subject: [PATCH 61/86] KVM: arm64: gic: Set vgic_model before initing private IRQs Different GIC types require the private IRQs to be initialised differently. GICv5 is the culprit as it supports both a different number of private IRQs, and all of these are PPIs (there are no SGIs). Moreover, as GICv5 uses the top bits of the interrupt ID to encode the type, the intid also needs to computed differently. Up until now, the GIC model has been set after initialising the private IRQs for a VCPU. Move this earlier to ensure that the GIC model is available when configuring the private IRQs. While we're at it, also move the setting of the in_kernel flag and implementation revision to keep them grouped together as before. Signed-off-by: Sascha Bischoff Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260128175919.3828384-7-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index dc9f9db31026..86c149537493 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -140,6 +140,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) goto out_unlock; } + kvm->arch.vgic.in_kernel = true; + kvm->arch.vgic.vgic_model = type; + kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST; + kvm_for_each_vcpu(i, vcpu, kvm) { ret = vgic_allocate_private_irqs_locked(vcpu, type); if (ret) @@ -156,10 +160,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) goto out_unlock; } - kvm->arch.vgic.in_kernel = true; - kvm->arch.vgic.vgic_model = type; - kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; From 3227c3a89d65fe7482312b7b27038d9ebd86f210 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 28 Jan 2026 18:07:33 +0000 Subject: [PATCH 62/86] irqchip/gic-v5: Check if impl is virt capable Now that there is support for creating a GICv5-based guest with KVM, check that the hardware itself supports virtualisation, skipping the setting of struct gic_kvm_info if not. Note: If native GICv5 virt is not supported, then nor is FEAT_GCIE_LEGACY, so we are able to skip altogether. Signed-off-by: Sascha Bischoff Reviewed-by: Lorenzo Pieralisi Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260128175919.3828384-33-sascha.bischoff@arm.com [maz: cosmetic changes] Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v5-irs.c | 2 ++ drivers/irqchip/irq-gic-v5.c | 10 ++++++++++ include/linux/irqchip/arm-gic-v5.h | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c index ce2732d649a3..eeeb40fb0eaa 100644 --- a/drivers/irqchip/irq-gic-v5-irs.c +++ b/drivers/irqchip/irq-gic-v5-irs.c @@ -743,6 +743,8 @@ static int __init gicv5_irs_init(struct device_node *node) * be consistent across IRSes by the architecture. */ if (list_empty(&irs_nodes)) { + idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR0); + gicv5_global_data.virt_capable = !FIELD_GET(GICV5_IRS_IDR0_VIRT, idr); idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1); irs_setup_pri_bits(idr); diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c index 41ef286c4d78..3c86bbc05761 100644 --- a/drivers/irqchip/irq-gic-v5.c +++ b/drivers/irqchip/irq-gic-v5.c @@ -1064,6 +1064,16 @@ static struct gic_kvm_info gic_v5_kvm_info __initdata; static void __init gic_of_setup_kvm_info(struct device_node *node) { + /* + * If we don't have native GICv5 virtualisation support, then + * we also don't have FEAT_GCIE_LEGACY - the architecture + * forbids this combination. + */ + if (!gicv5_global_data.virt_capable) { + pr_info("GIC implementation is not virtualization capable\n"); + return; + } + gic_v5_kvm_info.type = GIC_V5; /* GIC Virtual CPU interface maintenance interrupt */ diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index 68ddcdb1cec5..4cb71ce6e8ad 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -43,6 +43,7 @@ /* * IRS registers and tables structures */ +#define GICV5_IRS_IDR0 0x0000 #define GICV5_IRS_IDR1 0x0004 #define GICV5_IRS_IDR2 0x0008 #define GICV5_IRS_IDR5 0x0014 @@ -63,6 +64,8 @@ #define GICV5_IRS_IST_STATUSR 0x0194 #define GICV5_IRS_MAP_L2_ISTR 0x01c0 +#define GICV5_IRS_IDR0_VIRT BIT(6) + #define GICV5_IRS_IDR1_PRIORITY_BITS GENMASK(22, 20) #define GICV5_IRS_IDR1_IAFFID_BITS GENMASK(19, 16) @@ -278,6 +281,7 @@ struct gicv5_chip_data { u8 cpuif_pri_bits; u8 cpuif_id_bits; u8 irs_pri_bits; + bool virt_capable; struct { __le64 *l1ist_addr; u32 l2_size; From dcd79ed450934421158d81600f1be4c2e2af20bf Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 2 Feb 2026 08:57:19 +0000 Subject: [PATCH 63/86] KVM: arm64: Use standard seq_file iterator for idregs debugfs The current implementation uses `idreg_debugfs_iter` in `struct kvm_arch` to track the sequence position. This effectively makes the iterator shared across all open file descriptors for the VM. This approach has significant drawbacks: - It enforces mutual exclusion, preventing concurrent reads of the debugfs file (returning -EBUSY). - It relies on storing transient iterator state in the long-lived VM structure (`kvm_arch`). - The use of `u8` for the iterator index imposes an implicit limit of 255 registers. While not currently exceeded, this is fragile against future architectural growth. Switching to `loff_t` eliminates this overflow risk. Refactor the implementation to use the standard `seq_file` iterator. Instead of storing state in `kvm_arch`, rely on the `pos` argument passed to the `start` and `next` callbacks, which tracks the logical index specific to the file descriptor. This change enables concurrent access and eliminates the `idreg_debugfs_iter` field from `struct kvm_arch`. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260202085721.3954942-2-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 3 -- arch/arm64/kvm/sys_regs.c | 50 +++++-------------------------- 2 files changed, 8 insertions(+), 45 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac7f970c7883..643a199f6e9e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -373,9 +373,6 @@ struct kvm_arch { /* Maximum number of counters for the guest */ u8 nr_pmu_counters; - /* Iterator for idreg debugfs */ - u8 idreg_debugfs_iter; - /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; struct maple_tree smccc_filter; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c8fd7c6a12a1..ef6b76ecfb11 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -4992,7 +4992,7 @@ static bool emulate_sys_reg(struct kvm_vcpu *vcpu, return false; } -static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) +static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, loff_t pos) { unsigned long i, idreg_idx = 0; @@ -5002,10 +5002,8 @@ static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) if (!is_vm_ftr_id_reg(reg_to_encoding(r))) continue; - if (idreg_idx == pos) + if (idreg_idx++ == pos) return r; - - idreg_idx++; } return NULL; @@ -5014,23 +5012,11 @@ static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) static void *idregs_debug_start(struct seq_file *s, loff_t *pos) { struct kvm *kvm = s->private; - u8 *iter; - mutex_lock(&kvm->arch.config_lock); + if (!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) + return NULL; - iter = &kvm->arch.idreg_debugfs_iter; - if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags) && - *iter == (u8)~0) { - *iter = *pos; - if (!idregs_debug_find(kvm, *iter)) - iter = NULL; - } else { - iter = ERR_PTR(-EBUSY); - } - - mutex_unlock(&kvm->arch.config_lock); - - return iter; + return (void *)idregs_debug_find(kvm, *pos); } static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) @@ -5039,37 +5025,19 @@ static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) (*pos)++; - if (idregs_debug_find(kvm, kvm->arch.idreg_debugfs_iter + 1)) { - kvm->arch.idreg_debugfs_iter++; - - return &kvm->arch.idreg_debugfs_iter; - } - - return NULL; + return (void *)idregs_debug_find(kvm, *pos); } static void idregs_debug_stop(struct seq_file *s, void *v) { - struct kvm *kvm = s->private; - - if (IS_ERR(v)) - return; - - mutex_lock(&kvm->arch.config_lock); - - kvm->arch.idreg_debugfs_iter = ~0; - - mutex_unlock(&kvm->arch.config_lock); } static int idregs_debug_show(struct seq_file *s, void *v) { - const struct sys_reg_desc *desc; + const struct sys_reg_desc *desc = v; struct kvm *kvm = s->private; - desc = idregs_debug_find(kvm, kvm->arch.idreg_debugfs_iter); - - if (!desc->name) + if (!desc) return 0; seq_printf(s, "%20s:\t%016llx\n", @@ -5089,8 +5057,6 @@ DEFINE_SEQ_ATTRIBUTE(idregs_debug); void kvm_sys_regs_create_debugfs(struct kvm *kvm) { - kvm->arch.idreg_debugfs_iter = ~0; - debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm, &idregs_debug_fops); } From 5ab24969705a9adadbc1d3cff4c1c15df174eafb Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 2 Feb 2026 08:57:20 +0000 Subject: [PATCH 64/86] KVM: arm64: Reimplement vgic-debug XArray iteration The vgic-debug interface implementation uses XArray marks (`LPI_XA_MARK_DEBUG_ITER`) to "snapshot" LPIs at the start of iteration. This modifies global state for a read-only operation and complicates reference counting, leading to leaks if iteration is aborted or fails. Reimplement the iterator to use dynamic iteration logic: - Remove `lpi_idx` from `struct vgic_state_iter`. - Replace the XArray marking mechanism with dynamic iteration using `xa_find_after(..., XA_PRESENT)`. - Wrap XArray traversals in `rcu_read_lock()`/`rcu_read_unlock()` to ensure safety against concurrent modifications (e.g., LPI unmapping). - Handle potential races where an LPI is removed during iteration by gracefully skipping it in `show()`, rather than warning. - Remove the unused `LPI_XA_MARK_DEBUG_ITER` definition. This simplifies the lifecycle management of the iterator and prevents resource leaks associated with the marking mechanism, and paves the way for using a standard seq_file iterator. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260202085721.3954942-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-debug.c | 68 ++++++++++---------------------- include/kvm/arm_vgic.h | 1 - 2 files changed, 20 insertions(+), 49 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bb92853d1fd3..ec3d0c1fe703 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -25,11 +25,9 @@ struct vgic_state_iter { int nr_cpus; int nr_spis; - int nr_lpis; int dist_id; int vcpu_id; unsigned long intid; - int lpi_idx; }; static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) @@ -45,13 +43,15 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) * Let the xarray drive the iterator after the last SPI, as the iterator * has exhausted the sequentially-allocated INTID space. */ - if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1) && - iter->nr_lpis) { - if (iter->lpi_idx < iter->nr_lpis) - xa_find_after(&dist->lpi_xa, &iter->intid, - VGIC_LPI_MAX_INTID, - LPI_XA_MARK_DEBUG_ITER); - iter->lpi_idx++; + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) { + if (iter->intid == VGIC_LPI_MAX_INTID + 1) + return; + + rcu_read_lock(); + if (!xa_find_after(&dist->lpi_xa, &iter->intid, + VGIC_LPI_MAX_INTID, XA_PRESENT)) + iter->intid = VGIC_LPI_MAX_INTID + 1; + rcu_read_unlock(); return; } @@ -61,44 +61,21 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) iter->intid = 0; } -static int iter_mark_lpis(struct kvm *kvm) +static int vgic_count_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid, flags; struct vgic_irq *irq; + unsigned long intid; int nr_lpis = 0; - xa_lock_irqsave(&dist->lpi_xa, flags); - - xa_for_each(&dist->lpi_xa, intid, irq) { - if (!vgic_try_get_irq_ref(irq)) - continue; - - __xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + rcu_read_lock(); + xa_for_each(&dist->lpi_xa, intid, irq) nr_lpis++; - } - - xa_unlock_irqrestore(&dist->lpi_xa, flags); + rcu_read_unlock(); return nr_lpis; } -static void iter_unmark_lpis(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid, flags; - struct vgic_irq *irq; - - xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { - xa_lock_irqsave(&dist->lpi_xa, flags); - __xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); - xa_unlock_irqrestore(&dist->lpi_xa, flags); - - /* vgic_put_irq() expects to be called outside of the xa_lock */ - vgic_put_irq(kvm, irq); - } -} - static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, loff_t pos) { @@ -108,8 +85,6 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, iter->nr_cpus = nr_cpus; iter->nr_spis = kvm->arch.vgic.nr_spis; - if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - iter->nr_lpis = iter_mark_lpis(kvm); /* Fast forward to the right position if needed */ while (pos--) @@ -121,7 +96,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) return iter->dist_id > 0 && iter->vcpu_id == iter->nr_cpus && iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && - (!iter->nr_lpis || iter->lpi_idx > iter->nr_lpis); + iter->intid > VGIC_LPI_MAX_INTID; } static void *vgic_debug_start(struct seq_file *s, loff_t *pos) @@ -178,7 +153,6 @@ static void vgic_debug_stop(struct seq_file *s, void *v) mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; - iter_unmark_lpis(kvm); kfree(iter); kvm->arch.vgic.iter = NULL; mutex_unlock(&kvm->arch.config_lock); @@ -188,13 +162,14 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist, struct vgic_state_iter *iter) { bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; + struct kvm *kvm = s->private; seq_printf(s, "Distributor\n"); seq_printf(s, "===========\n"); seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); if (v3) - seq_printf(s, "nr_lpis:\t%d\n", iter->nr_lpis); + seq_printf(s, "nr_lpis:\t%d\n", vgic_count_lpis(kvm)); seq_printf(s, "enabled:\t%d\n", dist->enabled); seq_printf(s, "\n"); @@ -291,16 +266,13 @@ static int vgic_debug_show(struct seq_file *s, void *v) if (iter->vcpu_id < iter->nr_cpus) vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); - /* - * Expect this to succeed, as iter_mark_lpis() takes a reference on - * every LPI to be visited. - */ if (iter->intid < VGIC_NR_PRIVATE_IRQS) irq = vgic_get_vcpu_irq(vcpu, iter->intid); else irq = vgic_get_irq(kvm, iter->intid); - if (WARN_ON_ONCE(!irq)) - return -EINVAL; + + if (!irq) + return 0; raw_spin_lock_irqsave(&irq->irq_lock, flags); print_irq_state(s, irq, vcpu); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b261fb3968d0..d32fafbd2907 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -300,7 +300,6 @@ struct vgic_dist { */ u64 propbaser; -#define LPI_XA_MARK_DEBUG_ITER XA_MARK_0 struct xarray lpi_xa; /* used by vgic-debug */ From fb21cb08566ebed91d5c876db5c013cc8af83b89 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 2 Feb 2026 08:57:21 +0000 Subject: [PATCH 65/86] KVM: arm64: Use standard seq_file iterator for vgic-debug debugfs The current implementation uses `vgic_state_iter` in `struct vgic_dist` to track the sequence position. This effectively makes the iterator shared across all open file descriptors for the VM. This approach has significant drawbacks: - It enforces mutual exclusion, preventing concurrent reads of the debugfs file (returning -EBUSY). - It relies on storing transient iterator state in the long-lived VM structure (`vgic_dist`). Refactor the implementation to use the standard `seq_file` iterator. Instead of storing state in `kvm_arch`, rely on the `pos` argument passed to the `start` and `next` callbacks, which tracks the logical index specific to the file descriptor. This change enables concurrent access and eliminates the `vgic_state_iter` field from `struct vgic_dist`. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260202085721.3954942-4-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-debug.c | 40 ++++++++++---------------------- include/kvm/arm_vgic.h | 3 --- 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index ec3d0c1fe703..2c6776a1779b 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -104,58 +104,42 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) struct kvm *kvm = s->private; struct vgic_state_iter *iter; - mutex_lock(&kvm->arch.config_lock); - iter = kvm->arch.vgic.iter; - if (iter) { - iter = ERR_PTR(-EBUSY); - goto out; - } - iter = kmalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) { - iter = ERR_PTR(-ENOMEM); - goto out; - } + if (!iter) + return ERR_PTR(-ENOMEM); iter_init(kvm, iter, *pos); - kvm->arch.vgic.iter = iter; - if (end_of_vgic(iter)) + if (end_of_vgic(iter)) { + kfree(iter); iter = NULL; -out: - mutex_unlock(&kvm->arch.config_lock); + } + return iter; } static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) { struct kvm *kvm = s->private; - struct vgic_state_iter *iter = kvm->arch.vgic.iter; + struct vgic_state_iter *iter = v; ++*pos; iter_next(kvm, iter); - if (end_of_vgic(iter)) + if (end_of_vgic(iter)) { + kfree(iter); iter = NULL; + } return iter; } static void vgic_debug_stop(struct seq_file *s, void *v) { - struct kvm *kvm = s->private; - struct vgic_state_iter *iter; + struct vgic_state_iter *iter = v; - /* - * If the seq file wasn't properly opened, there's nothing to clearn - * up. - */ - if (IS_ERR(v)) + if (IS_ERR_OR_NULL(v)) return; - mutex_lock(&kvm->arch.config_lock); - iter = kvm->arch.vgic.iter; kfree(iter); - kvm->arch.vgic.iter = NULL; - mutex_unlock(&kvm->arch.config_lock); } static void print_dist_state(struct seq_file *s, struct vgic_dist *dist, diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d32fafbd2907..f2eafc65bbf4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -302,9 +302,6 @@ struct vgic_dist { struct xarray lpi_xa; - /* used by vgic-debug */ - struct vgic_state_iter *iter; - /* * GICv4 ITS per-VM data, containing the IRQ domain, the VPE * array, the property table pointer as well as allocation From 0c4762e26879acc101790269382f230f22fd6905 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 2 Feb 2026 15:22:53 +0000 Subject: [PATCH 66/86] KVM: arm64: nv: Avoid NV stage-2 code when NV is not supported The NV stage-2 manipulation functions kvm_nested_s2_unmap(), kvm_nested_s2_wp(), and others, are being called for any stage-2 manipulation regardless of whether nested virtualization is supported or enabled for the VM. For protected KVM (pKVM), `struct kvm_pgtable` uses the `pkvm_mappings` member of the union. This member aliases `ia_bits`, which is used by the non-protected NV code paths. Attempting to read `pgt->ia_bits` in these functions results in treating protected mapping pointers or state values as bit-shift amounts. This triggers a UBSAN shift-out-of-bounds error: UBSAN: shift-out-of-bounds in arch/arm64/kvm/nested.c:1127:34 shift exponent 174565952 is too large for 64-bit type 'unsigned long' Call trace: __ubsan_handle_shift_out_of_bounds+0x28c/0x2c0 kvm_nested_s2_unmap+0x228/0x248 kvm_arch_flush_shadow_memslot+0x98/0xc0 kvm_set_memslot+0x248/0xce0 Since pKVM and NV are mutually exclusive, prevent entry into these NV handling functions if the VM has not allocated any nested MMUs (i.e., `kvm->arch.nested_mmus_size` is 0). Fixes: 7270cc9157f47 ("KVM: arm64: nv: Handle VNCR_EL2 invalidation from MMU notifiers") Suggested-by: Marc Zyngier Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260202152310.113467-1-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/nested.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index cdeeb8f09e72..d03e9b71bf6c 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1101,6 +1101,9 @@ void kvm_nested_s2_wp(struct kvm *kvm) lockdep_assert_held_write(&kvm->mmu_lock); + if (!kvm->arch.nested_mmus_size) + return; + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; @@ -1117,6 +1120,9 @@ void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block) lockdep_assert_held_write(&kvm->mmu_lock); + if (!kvm->arch.nested_mmus_size) + return; + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; @@ -1133,6 +1139,9 @@ void kvm_nested_s2_flush(struct kvm *kvm) lockdep_assert_held_write(&kvm->mmu_lock); + if (!kvm->arch.nested_mmus_size) + return; + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; @@ -1145,6 +1154,9 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) { int i; + if (!kvm->arch.nested_mmus_size) + return; + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; From 0a35bd285f43c26ccec33872fc6bb679069eaea8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:10 +0000 Subject: [PATCH 67/86] arm64: Convert SCTLR_EL2 to sysreg infrastructure Convert SCTLR_EL2 to the sysreg infrastructure, as per the 2025-12_rel revision of the Registers.json file. Note that we slightly deviate from the above, as we stick to the ARM ARM M.a definition of SCTLR_EL2[9], which is RES0, in order to avoid dragging the POE2 definitions... Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 7 --- arch/arm64/tools/sysreg | 69 +++++++++++++++++++++++++++ tools/arch/arm64/include/asm/sysreg.h | 6 --- 3 files changed, 69 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 939f9c5bbae6..30f0409b1c80 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -504,7 +504,6 @@ #define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0) #define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5) -#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) #define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3) #define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) @@ -837,12 +836,6 @@ #define SCTLR_ELx_A (BIT(1)) #define SCTLR_ELx_M (BIT(0)) -/* SCTLR_EL2 specific flags. */ -#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ - (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ - (BIT(29))) - -#define SCTLR_EL2_BT (BIT(36)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE #else diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index a0f6249bd4f9..969a75615d61 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3749,6 +3749,75 @@ UnsignedEnum 2:0 F8S1 EndEnum EndSysreg +Sysreg SCTLR_EL2 3 4 1 0 0 +Field 63 TIDCP +Field 62 SPINTMASK +Field 61 NMI +Field 60 EnTP2 +Field 59 TCSO +Field 58 TCSO0 +Field 57 EPAN +Field 56 EnALS +Field 55 EnAS0 +Field 54 EnASR +Res0 53:50 +Field 49:46 TWEDEL +Field 45 TWEDEn +Field 44 DSSBS +Field 43 ATA +Field 42 ATA0 +Enum 41:40 TCF + 0b00 NONE + 0b01 SYNC + 0b10 ASYNC + 0b11 ASYMM +EndEnum +Enum 39:38 TCF0 + 0b00 NONE + 0b01 SYNC + 0b10 ASYNC + 0b11 ASYMM +EndEnum +Field 37 ITFSB +Field 36 BT +Field 35 BT0 +Field 34 EnFPM +Field 33 MSCEn +Field 32 CMOW +Field 31 EnIA +Field 30 EnIB +Field 29 LSMAOE +Field 28 nTLSMD +Field 27 EnDA +Field 26 UCI +Field 25 EE +Field 24 E0E +Field 23 SPAN +Field 22 EIS +Field 21 IESB +Field 20 TSCXT +Field 19 WXN +Field 18 nTWE +Res0 17 +Field 16 nTWI +Field 15 UCT +Field 14 DZE +Field 13 EnDB +Field 12 I +Field 11 EOS +Field 10 EnRCTX +Res0 9 +Field 8 SED +Field 7 ITD +Field 6 nAA +Field 5 CP15BEN +Field 4 SA0 +Field 3 SA +Field 2 C +Field 1 A +Field 0 M +EndSysreg + Sysreg HCR_EL2 3 4 1 1 0 Field 63:60 TWEDEL Field 59 TWEDEn diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index 178b7322bf04..f75efe98e9df 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -847,12 +847,6 @@ #define SCTLR_ELx_A (BIT(1)) #define SCTLR_ELx_M (BIT(0)) -/* SCTLR_EL2 specific flags. */ -#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ - (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ - (BIT(29))) - -#define SCTLR_EL2_BT (BIT(36)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE #else From 4faf52106de73cdd1f34752afd60b256721e97b5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:11 +0000 Subject: [PATCH 68/86] KVM: arm64: Remove duplicate configuration for SCTLR_EL1.{EE,E0E} We already have specific constraints for SCTLR_EL1.{EE,E0E}, and making them depend on FEAT_AA64EL1 is just buggy. Fixes: 6bd4a274b026e ("KVM: arm64: Convert SCTLR_EL1 to config-driven sanitisation") Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 9c04f895d376..0bcdb3988573 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1140,8 +1140,6 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { SCTLR_EL1_TWEDEn, FEAT_TWED), NEEDS_FEAT(SCTLR_EL1_UCI | - SCTLR_EL1_EE | - SCTLR_EL1_E0E | SCTLR_EL1_WXN | SCTLR_EL1_nTWE | SCTLR_EL1_nTWI | From a3c92001812358c35c55c844c1e0d9de8caf13fe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:12 +0000 Subject: [PATCH 69/86] KVM: arm64: Introduce standalone FGU computing primitive Computing the FGU bits is made oddly complicated, as we use the RES0 helper instead of using a specific abstraction. Introduce such an abstraction, which is going to make things significantly simpler in the future. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 57 ++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 0bcdb3988573..2122599f7cbb 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1335,26 +1335,30 @@ static u64 compute_res0_bits(struct kvm *kvm, static u64 compute_reg_res0_bits(struct kvm *kvm, const struct reg_feat_map_desc *r, unsigned long require, unsigned long exclude) - { u64 res0; res0 = compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, require, exclude); - /* - * If computing FGUs, don't take RES0 or register existence - * into account -- we're not computing bits for the register - * itself. - */ - if (!(exclude & NEVER_FGU)) { - res0 |= compute_res0_bits(kvm, &r->feat_map, 1, require, exclude); - res0 |= ~reg_feat_map_bits(&r->feat_map); - } + res0 |= compute_res0_bits(kvm, &r->feat_map, 1, require, exclude); + res0 |= ~reg_feat_map_bits(&r->feat_map); return res0; } +static u64 compute_fgu_bits(struct kvm *kvm, const struct reg_feat_map_desc *r) +{ + /* + * If computing FGUs, we collect the unsupported feature bits as + * RES0 bits, but don't take the actual RES0 bits or register + * existence into account -- we're not computing bits for the + * register itself. + */ + return compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + 0, NEVER_FGU); +} + static u64 compute_reg_fixed_bits(struct kvm *kvm, const struct reg_feat_map_desc *r, u64 *fixed_bits, unsigned long require, @@ -1370,40 +1374,29 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) switch (fgt) { case HFGRTR_GROUP: - val |= compute_reg_res0_bits(kvm, &hfgrtr_desc, - 0, NEVER_FGU); - val |= compute_reg_res0_bits(kvm, &hfgwtr_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hfgrtr_desc); + val |= compute_fgu_bits(kvm, &hfgwtr_desc); break; case HFGITR_GROUP: - val |= compute_reg_res0_bits(kvm, &hfgitr_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hfgitr_desc); break; case HDFGRTR_GROUP: - val |= compute_reg_res0_bits(kvm, &hdfgrtr_desc, - 0, NEVER_FGU); - val |= compute_reg_res0_bits(kvm, &hdfgwtr_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hdfgrtr_desc); + val |= compute_fgu_bits(kvm, &hdfgwtr_desc); break; case HAFGRTR_GROUP: - val |= compute_reg_res0_bits(kvm, &hafgrtr_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hafgrtr_desc); break; case HFGRTR2_GROUP: - val |= compute_reg_res0_bits(kvm, &hfgrtr2_desc, - 0, NEVER_FGU); - val |= compute_reg_res0_bits(kvm, &hfgwtr2_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hfgrtr2_desc); + val |= compute_fgu_bits(kvm, &hfgwtr2_desc); break; case HFGITR2_GROUP: - val |= compute_reg_res0_bits(kvm, &hfgitr2_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hfgitr2_desc); break; case HDFGRTR2_GROUP: - val |= compute_reg_res0_bits(kvm, &hdfgrtr2_desc, - 0, NEVER_FGU); - val |= compute_reg_res0_bits(kvm, &hdfgwtr2_desc, - 0, NEVER_FGU); + val |= compute_fgu_bits(kvm, &hdfgrtr2_desc); + val |= compute_fgu_bits(kvm, &hdfgwtr2_desc); break; default: BUG(); From 0879478913dd671b0aed1e3960c4b35fb8546ab4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:13 +0000 Subject: [PATCH 70/86] KVM: arm64: Introduce data structure tracking both RES0 and RES1 bits We have so far mostly tracked RES0 bits, but only made a few attempts at being just as strict for RES1 bits (probably because they are both rarer and harder to handle). Start scratching the surface by introducing a data structure tracking RES0 and RES1 bits at the same time. Note that contrary to the usual idiom, this structure is mostly passed around by value -- the ABI handles it nicely, and the resulting code is much nicer. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-5-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 23 +++-- arch/arm64/kvm/config.c | 148 ++++++++++++++++-------------- arch/arm64/kvm/nested.c | 129 +++++++++++++------------- 3 files changed, 161 insertions(+), 139 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b552a1e03848..799f494a1349 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -626,13 +626,24 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; -struct kvm_sysreg_masks { - struct { - u64 res0; - u64 res1; - } mask[NR_SYS_REGS - __SANITISED_REG_START__]; +struct resx { + u64 res0; + u64 res1; }; +struct kvm_sysreg_masks { + struct resx mask[NR_SYS_REGS - __SANITISED_REG_START__]; +}; + +static inline void __kvm_set_sysreg_resx(struct kvm_arch *arch, + enum vcpu_sysreg sr, struct resx resx) +{ + arch->sysreg_masks->mask[sr - __SANITISED_REG_START__] = resx; +} + +#define kvm_set_sysreg_resx(k, sr, resx) \ + __kvm_set_sysreg_resx(&(k)->arch, (sr), (resx)) + struct fgt_masks { const char *str; u64 mask; @@ -1607,7 +1618,7 @@ static inline bool kvm_arch_has_irq_bypass(void) } void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); -void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); +struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg); void check_feature_map(void); void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 2122599f7cbb..2214c06902f8 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1290,14 +1290,14 @@ static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map } } -static u64 __compute_fixed_bits(struct kvm *kvm, - const struct reg_bits_to_feat_map *map, - int map_size, - u64 *fixed_bits, - unsigned long require, - unsigned long exclude) +static struct resx __compute_fixed_bits(struct kvm *kvm, + const struct reg_bits_to_feat_map *map, + int map_size, + u64 *fixed_bits, + unsigned long require, + unsigned long exclude) { - u64 val = 0; + struct resx resx = {}; for (int i = 0; i < map_size; i++) { bool match; @@ -1316,53 +1316,62 @@ static u64 __compute_fixed_bits(struct kvm *kvm, match = idreg_feat_match(kvm, &map[i]); if (!match || (map[i].flags & FIXED_VALUE)) - val |= reg_feat_map_bits(&map[i]); + resx.res0 |= reg_feat_map_bits(&map[i]); } - return val; + return resx; } -static u64 compute_res0_bits(struct kvm *kvm, - const struct reg_bits_to_feat_map *map, - int map_size, - unsigned long require, - unsigned long exclude) +static struct resx compute_resx_bits(struct kvm *kvm, + const struct reg_bits_to_feat_map *map, + int map_size, + unsigned long require, + unsigned long exclude) { return __compute_fixed_bits(kvm, map, map_size, NULL, require, exclude | FIXED_VALUE); } -static u64 compute_reg_res0_bits(struct kvm *kvm, - const struct reg_feat_map_desc *r, - unsigned long require, unsigned long exclude) +static struct resx compute_reg_resx_bits(struct kvm *kvm, + const struct reg_feat_map_desc *r, + unsigned long require, + unsigned long exclude) { - u64 res0; + struct resx resx, tmp; - res0 = compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + resx = compute_resx_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, require, exclude); - res0 |= compute_res0_bits(kvm, &r->feat_map, 1, require, exclude); - res0 |= ~reg_feat_map_bits(&r->feat_map); + tmp = compute_resx_bits(kvm, &r->feat_map, 1, require, exclude); - return res0; + resx.res0 |= tmp.res0; + resx.res0 |= ~reg_feat_map_bits(&r->feat_map); + resx.res1 |= tmp.res1; + + return resx; } static u64 compute_fgu_bits(struct kvm *kvm, const struct reg_feat_map_desc *r) { + struct resx resx; + /* * If computing FGUs, we collect the unsupported feature bits as - * RES0 bits, but don't take the actual RES0 bits or register + * RESx bits, but don't take the actual RESx bits or register * existence into account -- we're not computing bits for the * register itself. */ - return compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, + resx = compute_resx_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, 0, NEVER_FGU); + + return resx.res0 | resx.res1; } -static u64 compute_reg_fixed_bits(struct kvm *kvm, - const struct reg_feat_map_desc *r, - u64 *fixed_bits, unsigned long require, - unsigned long exclude) +static struct resx compute_reg_fixed_bits(struct kvm *kvm, + const struct reg_feat_map_desc *r, + u64 *fixed_bits, + unsigned long require, + unsigned long exclude) { return __compute_fixed_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, fixed_bits, require | FIXED_VALUE, exclude); @@ -1405,91 +1414,94 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) kvm->arch.fgu[fgt] = val; } -void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1) +struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg) { u64 fixed = 0, mask; + struct resx resx; switch (reg) { case HFGRTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgrtr_desc, 0, 0); - *res1 = HFGRTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgrtr_desc, 0, 0); + resx.res1 |= HFGRTR_EL2_RES1; break; case HFGWTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgwtr_desc, 0, 0); - *res1 = HFGWTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgwtr_desc, 0, 0); + resx.res1 |= HFGWTR_EL2_RES1; break; case HFGITR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgitr_desc, 0, 0); - *res1 = HFGITR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgitr_desc, 0, 0); + resx.res1 |= HFGITR_EL2_RES1; break; case HDFGRTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hdfgrtr_desc, 0, 0); - *res1 = HDFGRTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hdfgrtr_desc, 0, 0); + resx.res1 |= HDFGRTR_EL2_RES1; break; case HDFGWTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hdfgwtr_desc, 0, 0); - *res1 = HDFGWTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hdfgwtr_desc, 0, 0); + resx.res1 |= HDFGWTR_EL2_RES1; break; case HAFGRTR_EL2: - *res0 = compute_reg_res0_bits(kvm, &hafgrtr_desc, 0, 0); - *res1 = HAFGRTR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hafgrtr_desc, 0, 0); + resx.res1 |= HAFGRTR_EL2_RES1; break; case HFGRTR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgrtr2_desc, 0, 0); - *res1 = HFGRTR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgrtr2_desc, 0, 0); + resx.res1 |= HFGRTR2_EL2_RES1; break; case HFGWTR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgwtr2_desc, 0, 0); - *res1 = HFGWTR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgwtr2_desc, 0, 0); + resx.res1 |= HFGWTR2_EL2_RES1; break; case HFGITR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hfgitr2_desc, 0, 0); - *res1 = HFGITR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hfgitr2_desc, 0, 0); + resx.res1 |= HFGITR2_EL2_RES1; break; case HDFGRTR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hdfgrtr2_desc, 0, 0); - *res1 = HDFGRTR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hdfgrtr2_desc, 0, 0); + resx.res1 |= HDFGRTR2_EL2_RES1; break; case HDFGWTR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &hdfgwtr2_desc, 0, 0); - *res1 = HDFGWTR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hdfgwtr2_desc, 0, 0); + resx.res1 |= HDFGWTR2_EL2_RES1; break; case HCRX_EL2: - *res0 = compute_reg_res0_bits(kvm, &hcrx_desc, 0, 0); - *res1 = __HCRX_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &hcrx_desc, 0, 0); + resx.res1 |= __HCRX_EL2_RES1; break; case HCR_EL2: - mask = compute_reg_fixed_bits(kvm, &hcr_desc, &fixed, 0, 0); - *res0 = compute_reg_res0_bits(kvm, &hcr_desc, 0, 0); - *res0 |= (mask & ~fixed); - *res1 = HCR_EL2_RES1 | (mask & fixed); + mask = compute_reg_fixed_bits(kvm, &hcr_desc, &fixed, 0, 0).res0; + resx = compute_reg_resx_bits(kvm, &hcr_desc, 0, 0); + resx.res0 |= (mask & ~fixed); + resx.res1 |= HCR_EL2_RES1 | (mask & fixed); break; case SCTLR2_EL1: case SCTLR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &sctlr2_desc, 0, 0); - *res1 = SCTLR2_EL1_RES1; + resx = compute_reg_resx_bits(kvm, &sctlr2_desc, 0, 0); + resx.res1 |= SCTLR2_EL1_RES1; break; case TCR2_EL2: - *res0 = compute_reg_res0_bits(kvm, &tcr2_el2_desc, 0, 0); - *res1 = TCR2_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &tcr2_el2_desc, 0, 0); + resx.res1 |= TCR2_EL2_RES1; break; case SCTLR_EL1: - *res0 = compute_reg_res0_bits(kvm, &sctlr_el1_desc, 0, 0); - *res1 = SCTLR_EL1_RES1; + resx = compute_reg_resx_bits(kvm, &sctlr_el1_desc, 0, 0); + resx.res1 |= SCTLR_EL1_RES1; break; case MDCR_EL2: - *res0 = compute_reg_res0_bits(kvm, &mdcr_el2_desc, 0, 0); - *res1 = MDCR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &mdcr_el2_desc, 0, 0); + resx.res1 |= MDCR_EL2_RES1; break; case VTCR_EL2: - *res0 = compute_reg_res0_bits(kvm, &vtcr_el2_desc, 0, 0); - *res1 = VTCR_EL2_RES1; + resx = compute_reg_resx_bits(kvm, &vtcr_el2_desc, 0, 0); + resx.res1 |= VTCR_EL2_RES1; break; default: WARN_ON_ONCE(1); - *res0 = *res1 = 0; + resx = (typeof(resx)){}; break; } + + return resx; } static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 486eba72bb02..c5a45bc62153 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1683,22 +1683,19 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu, return v; } -static __always_inline void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) +static __always_inline void set_sysreg_masks(struct kvm *kvm, int sr, struct resx resx) { - int i = sr - __SANITISED_REG_START__; - BUILD_BUG_ON(!__builtin_constant_p(sr)); BUILD_BUG_ON(sr < __SANITISED_REG_START__); BUILD_BUG_ON(sr >= NR_SYS_REGS); - kvm->arch.sysreg_masks->mask[i].res0 = res0; - kvm->arch.sysreg_masks->mask[i].res1 = res1; + kvm_set_sysreg_resx(kvm, sr, resx); } int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - u64 res0, res1; + struct resx resx; lockdep_assert_held(&kvm->arch.config_lock); @@ -1711,110 +1708,112 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) return -ENOMEM; /* VTTBR_EL2 */ - res0 = res1 = 0; + resx = (typeof(resx)){}; if (!kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16)) - res0 |= GENMASK(63, 56); + resx.res0 |= GENMASK(63, 56); if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, CnP, IMP)) - res0 |= VTTBR_CNP_BIT; - set_sysreg_masks(kvm, VTTBR_EL2, res0, res1); + resx.res0 |= VTTBR_CNP_BIT; + set_sysreg_masks(kvm, VTTBR_EL2, resx); /* VTCR_EL2 */ - get_reg_fixed_bits(kvm, VTCR_EL2, &res0, &res1); - set_sysreg_masks(kvm, VTCR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, VTCR_EL2); + set_sysreg_masks(kvm, VTCR_EL2, resx); /* VMPIDR_EL2 */ - res0 = GENMASK(63, 40) | GENMASK(30, 24); - res1 = BIT(31); - set_sysreg_masks(kvm, VMPIDR_EL2, res0, res1); + resx.res0 = GENMASK(63, 40) | GENMASK(30, 24); + resx.res1 = BIT(31); + set_sysreg_masks(kvm, VMPIDR_EL2, resx); /* HCR_EL2 */ - get_reg_fixed_bits(kvm, HCR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HCR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HCR_EL2); + set_sysreg_masks(kvm, HCR_EL2, resx); /* HCRX_EL2 */ - get_reg_fixed_bits(kvm, HCRX_EL2, &res0, &res1); - set_sysreg_masks(kvm, HCRX_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HCRX_EL2); + set_sysreg_masks(kvm, HCRX_EL2, resx); /* HFG[RW]TR_EL2 */ - get_reg_fixed_bits(kvm, HFGRTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGRTR_EL2, res0, res1); - get_reg_fixed_bits(kvm, HFGWTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGWTR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HFGRTR_EL2); + set_sysreg_masks(kvm, HFGRTR_EL2, resx); + resx = get_reg_fixed_bits(kvm, HFGWTR_EL2); + set_sysreg_masks(kvm, HFGWTR_EL2, resx); /* HDFG[RW]TR_EL2 */ - get_reg_fixed_bits(kvm, HDFGRTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HDFGRTR_EL2, res0, res1); - get_reg_fixed_bits(kvm, HDFGWTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HDFGWTR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HDFGRTR_EL2); + set_sysreg_masks(kvm, HDFGRTR_EL2, resx); + resx = get_reg_fixed_bits(kvm, HDFGWTR_EL2); + set_sysreg_masks(kvm, HDFGWTR_EL2, resx); /* HFGITR_EL2 */ - get_reg_fixed_bits(kvm, HFGITR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGITR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HFGITR_EL2); + set_sysreg_masks(kvm, HFGITR_EL2, resx); /* HAFGRTR_EL2 - not a lot to see here */ - get_reg_fixed_bits(kvm, HAFGRTR_EL2, &res0, &res1); - set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HAFGRTR_EL2); + set_sysreg_masks(kvm, HAFGRTR_EL2, resx); /* HFG[RW]TR2_EL2 */ - get_reg_fixed_bits(kvm, HFGRTR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGRTR2_EL2, res0, res1); - get_reg_fixed_bits(kvm, HFGWTR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGWTR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HFGRTR2_EL2); + set_sysreg_masks(kvm, HFGRTR2_EL2, resx); + resx = get_reg_fixed_bits(kvm, HFGWTR2_EL2); + set_sysreg_masks(kvm, HFGWTR2_EL2, resx); /* HDFG[RW]TR2_EL2 */ - get_reg_fixed_bits(kvm, HDFGRTR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HDFGRTR2_EL2, res0, res1); - get_reg_fixed_bits(kvm, HDFGWTR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HDFGWTR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HDFGRTR2_EL2); + set_sysreg_masks(kvm, HDFGRTR2_EL2, resx); + resx = get_reg_fixed_bits(kvm, HDFGWTR2_EL2); + set_sysreg_masks(kvm, HDFGWTR2_EL2, resx); /* HFGITR2_EL2 */ - get_reg_fixed_bits(kvm, HFGITR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, HFGITR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, HFGITR2_EL2); + set_sysreg_masks(kvm, HFGITR2_EL2, resx); /* TCR2_EL2 */ - get_reg_fixed_bits(kvm, TCR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, TCR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, TCR2_EL2); + set_sysreg_masks(kvm, TCR2_EL2, resx); /* SCTLR_EL1 */ - get_reg_fixed_bits(kvm, SCTLR_EL1, &res0, &res1); - set_sysreg_masks(kvm, SCTLR_EL1, res0, res1); + resx = get_reg_fixed_bits(kvm, SCTLR_EL1); + set_sysreg_masks(kvm, SCTLR_EL1, resx); /* SCTLR2_ELx */ - get_reg_fixed_bits(kvm, SCTLR2_EL1, &res0, &res1); - set_sysreg_masks(kvm, SCTLR2_EL1, res0, res1); - get_reg_fixed_bits(kvm, SCTLR2_EL2, &res0, &res1); - set_sysreg_masks(kvm, SCTLR2_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, SCTLR2_EL1); + set_sysreg_masks(kvm, SCTLR2_EL1, resx); + resx = get_reg_fixed_bits(kvm, SCTLR2_EL2); + set_sysreg_masks(kvm, SCTLR2_EL2, resx); /* MDCR_EL2 */ - get_reg_fixed_bits(kvm, MDCR_EL2, &res0, &res1); - set_sysreg_masks(kvm, MDCR_EL2, res0, res1); + resx = get_reg_fixed_bits(kvm, MDCR_EL2); + set_sysreg_masks(kvm, MDCR_EL2, resx); /* CNTHCTL_EL2 */ - res0 = GENMASK(63, 20); - res1 = 0; + resx.res0 = GENMASK(63, 20); + resx.res1 = 0; if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RME, IMP)) - res0 |= CNTHCTL_CNTPMASK | CNTHCTL_CNTVMASK; + resx.res0 |= CNTHCTL_CNTPMASK | CNTHCTL_CNTVMASK; if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, CNTPOFF)) { - res0 |= CNTHCTL_ECV; + resx.res0 |= CNTHCTL_ECV; if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, IMP)) - res0 |= (CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT | - CNTHCTL_EL1NVPCT | CNTHCTL_EL1NVVCT); + resx.res0 |= (CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT | + CNTHCTL_EL1NVPCT | CNTHCTL_EL1NVVCT); } if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP)) - res0 |= GENMASK(11, 8); - set_sysreg_masks(kvm, CNTHCTL_EL2, res0, res1); + resx.res0 |= GENMASK(11, 8); + set_sysreg_masks(kvm, CNTHCTL_EL2, resx); /* ICH_HCR_EL2 */ - res0 = ICH_HCR_EL2_RES0; - res1 = ICH_HCR_EL2_RES1; + resx.res0 = ICH_HCR_EL2_RES0; + resx.res1 = ICH_HCR_EL2_RES1; if (!(kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_TDS)) - res0 |= ICH_HCR_EL2_TDIR; + resx.res0 |= ICH_HCR_EL2_TDIR; /* No GICv4 is presented to the guest */ - res0 |= ICH_HCR_EL2_DVIM | ICH_HCR_EL2_vSGIEOICount; - set_sysreg_masks(kvm, ICH_HCR_EL2, res0, res1); + resx.res0 |= ICH_HCR_EL2_DVIM | ICH_HCR_EL2_vSGIEOICount; + set_sysreg_masks(kvm, ICH_HCR_EL2, resx); /* VNCR_EL2 */ - set_sysreg_masks(kvm, VNCR_EL2, VNCR_EL2_RES0, VNCR_EL2_RES1); + resx.res0 = VNCR_EL2_RES0; + resx.res1 = VNCR_EL2_RES1; + set_sysreg_masks(kvm, VNCR_EL2, resx); out: for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) From f9d58956423844237e18a758dc0f1b2cf6480042 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:14 +0000 Subject: [PATCH 71/86] KVM: arm64: Extend unified RESx handling to runtime sanitisation Add a new helper to retrieve the RESx values for a given system register, and use it for the runtime sanitisation. This results in slightly better code generation for a fairly hot path in the hypervisor, and additionally covers all sanitised registers in all conditions, not just the VNCR-based ones. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-6-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++ arch/arm64/kvm/emulate-nested.c | 10 +--------- arch/arm64/kvm/nested.c | 13 ++++--------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 799f494a1349..20ebc1610ac8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -635,6 +635,21 @@ struct kvm_sysreg_masks { struct resx mask[NR_SYS_REGS - __SANITISED_REG_START__]; }; +static inline struct resx __kvm_get_sysreg_resx(struct kvm_arch *arch, + enum vcpu_sysreg sr) +{ + struct kvm_sysreg_masks *masks; + + masks = arch->sysreg_masks; + if (likely(masks && + sr >= __SANITISED_REG_START__ && sr < NR_SYS_REGS)) + return masks->mask[sr - __SANITISED_REG_START__]; + + return (struct resx){}; +} + +#define kvm_get_sysreg_resx(k, sr) __kvm_get_sysreg_resx(&(k)->arch, (sr)) + static inline void __kvm_set_sysreg_resx(struct kvm_arch *arch, enum vcpu_sysreg sr, struct resx resx) { diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 774cfbf5b43b..43334cd2db9e 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2427,15 +2427,7 @@ static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu, static u64 kvm_get_sysreg_res0(struct kvm *kvm, enum vcpu_sysreg sr) { - struct kvm_sysreg_masks *masks; - - /* Only handle the VNCR-backed regs for now */ - if (sr < __VNCR_START__) - return 0; - - masks = kvm->arch.sysreg_masks; - - return masks->mask[sr - __SANITISED_REG_START__].res0; + return kvm_get_sysreg_resx(kvm, sr).res0; } static bool check_fgt_bit(struct kvm_vcpu *vcpu, enum vcpu_sysreg sr, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index c5a45bc62153..75a23f1c56d1 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1669,16 +1669,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr, u64 v) { - struct kvm_sysreg_masks *masks; + struct resx resx; - masks = vcpu->kvm->arch.sysreg_masks; - - if (masks) { - sr -= __SANITISED_REG_START__; - - v &= ~masks->mask[sr].res0; - v |= masks->mask[sr].res1; - } + resx = kvm_get_sysreg_resx(vcpu->kvm, sr); + v &= ~resx.res0; + v |= resx.res1; return v; } From bbea27636e660df907ebf0d36e3dfca5c77cfbc0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:15 +0000 Subject: [PATCH 72/86] KVM: arm64: Inherit RESx bits from FGT register descriptors The FGT registers have their computed RESx bits stashed in specific descriptors, which we can easily use when computing the masks used for the guest. This removes a bit of boilerplate code. Reviewed-by: Joey Gouly Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-7-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 2214c06902f8..9ad7eb5f4b98 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1342,6 +1342,11 @@ static struct resx compute_reg_resx_bits(struct kvm *kvm, resx = compute_resx_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, require, exclude); + if (r->feat_map.flags & MASKS_POINTER) { + resx.res0 |= r->feat_map.masks->res0; + resx.res1 |= r->feat_map.masks->res1; + } + tmp = compute_resx_bits(kvm, &r->feat_map, 1, require, exclude); resx.res0 |= tmp.res0; @@ -1422,47 +1427,36 @@ struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg) switch (reg) { case HFGRTR_EL2: resx = compute_reg_resx_bits(kvm, &hfgrtr_desc, 0, 0); - resx.res1 |= HFGRTR_EL2_RES1; break; case HFGWTR_EL2: resx = compute_reg_resx_bits(kvm, &hfgwtr_desc, 0, 0); - resx.res1 |= HFGWTR_EL2_RES1; break; case HFGITR_EL2: resx = compute_reg_resx_bits(kvm, &hfgitr_desc, 0, 0); - resx.res1 |= HFGITR_EL2_RES1; break; case HDFGRTR_EL2: resx = compute_reg_resx_bits(kvm, &hdfgrtr_desc, 0, 0); - resx.res1 |= HDFGRTR_EL2_RES1; break; case HDFGWTR_EL2: resx = compute_reg_resx_bits(kvm, &hdfgwtr_desc, 0, 0); - resx.res1 |= HDFGWTR_EL2_RES1; break; case HAFGRTR_EL2: resx = compute_reg_resx_bits(kvm, &hafgrtr_desc, 0, 0); - resx.res1 |= HAFGRTR_EL2_RES1; break; case HFGRTR2_EL2: resx = compute_reg_resx_bits(kvm, &hfgrtr2_desc, 0, 0); - resx.res1 |= HFGRTR2_EL2_RES1; break; case HFGWTR2_EL2: resx = compute_reg_resx_bits(kvm, &hfgwtr2_desc, 0, 0); - resx.res1 |= HFGWTR2_EL2_RES1; break; case HFGITR2_EL2: resx = compute_reg_resx_bits(kvm, &hfgitr2_desc, 0, 0); - resx.res1 |= HFGITR2_EL2_RES1; break; case HDFGRTR2_EL2: resx = compute_reg_resx_bits(kvm, &hdfgrtr2_desc, 0, 0); - resx.res1 |= HDFGRTR2_EL2_RES1; break; case HDFGWTR2_EL2: resx = compute_reg_resx_bits(kvm, &hdfgwtr2_desc, 0, 0); - resx.res1 |= HDFGWTR2_EL2_RES1; break; case HCRX_EL2: resx = compute_reg_resx_bits(kvm, &hcrx_desc, 0, 0); From 459fc4e77e1ac932e47cb4a6d1a01b3be79fd41c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:16 +0000 Subject: [PATCH 73/86] KVM: arm64: Allow RES1 bits to be inferred from configuration So far, when a bit field is tied to an unsupported feature, we set it as RES0. This is almost correct, but there are a few exceptions where the bits become RES1. Add a AS_RES1 qualifier that instruct the RESx computing code to simply do that. Reviewed-by: Joey Gouly Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-8-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 9ad7eb5f4b98..72c6dd7656ba 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -24,6 +24,7 @@ struct reg_bits_to_feat_map { #define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */ #define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */ #define MASKS_POINTER BIT(3) /* Pointer to fgt_masks struct instead of bits */ +#define AS_RES1 BIT(4) /* RES1 when not supported */ unsigned long flags; @@ -1315,8 +1316,12 @@ static struct resx __compute_fixed_bits(struct kvm *kvm, else match = idreg_feat_match(kvm, &map[i]); - if (!match || (map[i].flags & FIXED_VALUE)) - resx.res0 |= reg_feat_map_bits(&map[i]); + if (!match || (map[i].flags & FIXED_VALUE)) { + if (map[i].flags & AS_RES1) + resx.res1 |= reg_feat_map_bits(&map[i]); + else + resx.res0 |= reg_feat_map_bits(&map[i]); + } } return resx; From c27b8b7aabefb8ef226c3dfbc86ef7df6a9958c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:17 +0000 Subject: [PATCH 74/86] KVM: arm64: Correctly handle SCTLR_EL1 RES1 bits for unsupported features A bunch of SCTLR_EL1 bits must be set to RES1 when the controlling feature is not present. Add the AS_RES1 qualifier where needed. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-9-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 72c6dd7656ba..25ef1873a6c3 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1085,27 +1085,28 @@ static const DECLARE_FEAT_MAP(tcr2_el2_desc, TCR2_EL2, tcr2_el2_feat_map, FEAT_TCR2); static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { - NEEDS_FEAT(SCTLR_EL1_CP15BEN | - SCTLR_EL1_ITD | - SCTLR_EL1_SED, - FEAT_AA32EL0), + NEEDS_FEAT(SCTLR_EL1_CP15BEN, FEAT_AA32EL0), + NEEDS_FEAT_FLAG(SCTLR_EL1_ITD | + SCTLR_EL1_SED, + AS_RES1, FEAT_AA32EL0), NEEDS_FEAT(SCTLR_EL1_BT0 | SCTLR_EL1_BT1, FEAT_BTI), NEEDS_FEAT(SCTLR_EL1_CMOW, FEAT_CMOW), - NEEDS_FEAT(SCTLR_EL1_TSCXT, feat_csv2_2_csv2_1p2), - NEEDS_FEAT(SCTLR_EL1_EIS | - SCTLR_EL1_EOS, - FEAT_ExS), + NEEDS_FEAT_FLAG(SCTLR_EL1_TSCXT, + AS_RES1, feat_csv2_2_csv2_1p2), + NEEDS_FEAT_FLAG(SCTLR_EL1_EIS | + SCTLR_EL1_EOS, + AS_RES1, FEAT_ExS), NEEDS_FEAT(SCTLR_EL1_EnFPM, FEAT_FPMR), NEEDS_FEAT(SCTLR_EL1_IESB, FEAT_IESB), NEEDS_FEAT(SCTLR_EL1_EnALS, FEAT_LS64), NEEDS_FEAT(SCTLR_EL1_EnAS0, FEAT_LS64_ACCDATA), NEEDS_FEAT(SCTLR_EL1_EnASR, FEAT_LS64_V), NEEDS_FEAT(SCTLR_EL1_nAA, FEAT_LSE2), - NEEDS_FEAT(SCTLR_EL1_LSMAOE | - SCTLR_EL1_nTLSMD, - FEAT_LSMAOC), + NEEDS_FEAT_FLAG(SCTLR_EL1_LSMAOE | + SCTLR_EL1_nTLSMD, + AS_RES1, FEAT_LSMAOC), NEEDS_FEAT(SCTLR_EL1_EE, FEAT_MixedEnd), NEEDS_FEAT(SCTLR_EL1_E0E, feat_mixedendel0), NEEDS_FEAT(SCTLR_EL1_MSCEn, FEAT_MOPS), @@ -1121,7 +1122,8 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { NEEDS_FEAT(SCTLR_EL1_NMI | SCTLR_EL1_SPINTMASK, FEAT_NMI), - NEEDS_FEAT(SCTLR_EL1_SPAN, FEAT_PAN), + NEEDS_FEAT_FLAG(SCTLR_EL1_SPAN, + AS_RES1, FEAT_PAN), NEEDS_FEAT(SCTLR_EL1_EPAN, FEAT_PAN3), NEEDS_FEAT(SCTLR_EL1_EnDA | SCTLR_EL1_EnDB | From fb86207bdc5bf4d5743eab78d6babbecda6c2609 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:18 +0000 Subject: [PATCH 75/86] KVM: arm64: Convert HCR_EL2.RW to AS_RES1 Now that we have the AS_RES1 constraint, it becomes trivial to express the HCR_EL2.RW behaviour. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-10-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 25ef1873a6c3..70aa5587d99b 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -389,19 +389,6 @@ static bool feat_vmid16(struct kvm *kvm) return kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16); } -static bool compute_hcr_rw(struct kvm *kvm, u64 *bits) -{ - /* This is purely academic: AArch32 and NV are mutually exclusive */ - if (bits) { - if (kvm_has_feat(kvm, FEAT_AA32EL1)) - *bits &= ~HCR_EL2_RW; - else - *bits |= HCR_EL2_RW; - } - - return true; -} - static bool compute_hcr_e2h(struct kvm *kvm, u64 *bits) { if (bits) { @@ -967,7 +954,7 @@ static const DECLARE_FEAT_MAP(hcrx_desc, __HCRX_EL2, static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT(HCR_EL2_TID0, FEAT_AA32EL0), - NEEDS_FEAT_FIXED(HCR_EL2_RW, compute_hcr_rw), + NEEDS_FEAT_FLAG(HCR_EL2_RW, AS_RES1, FEAT_AA32EL1), NEEDS_FEAT(HCR_EL2_HCD, not_feat_aa64el3), NEEDS_FEAT(HCR_EL2_AMO | HCR_EL2_BSU | From 8d94458263bb2d44d8ba461327a1e18c05cfc453 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:19 +0000 Subject: [PATCH 76/86] KVM: arm64: Simplify FIXED_VALUE handling The FIXED_VALUE qualifier (mostly used for HCR_EL2) is pointlessly complicated, as it tries to piggy-back on the previous RES0 handling while being done in a different phase, on different data. Instead, make it an integral part of the RESx computation, and allow it to directly set RESx bits. This is much easier to understand. It also paves the way for some additional changes to that will allow the full removal of the FIXED_VALUE handling. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-11-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 66 ++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 44 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 70aa5587d99b..d3467bac40fc 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -37,7 +37,7 @@ struct reg_bits_to_feat_map { s8 lo_lim; }; bool (*match)(struct kvm *); - bool (*fval)(struct kvm *, u64 *); + bool (*fval)(struct kvm *, struct resx *); }; }; @@ -389,14 +389,12 @@ static bool feat_vmid16(struct kvm *kvm) return kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16); } -static bool compute_hcr_e2h(struct kvm *kvm, u64 *bits) +static bool compute_hcr_e2h(struct kvm *kvm, struct resx *bits) { - if (bits) { - if (kvm_has_feat(kvm, FEAT_E2H0)) - *bits &= ~HCR_EL2_E2H; - else - *bits |= HCR_EL2_E2H; - } + if (kvm_has_feat(kvm, FEAT_E2H0)) + bits->res0 |= HCR_EL2_E2H; + else + bits->res1 |= HCR_EL2_E2H; return true; } @@ -1280,12 +1278,11 @@ static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map } } -static struct resx __compute_fixed_bits(struct kvm *kvm, - const struct reg_bits_to_feat_map *map, - int map_size, - u64 *fixed_bits, - unsigned long require, - unsigned long exclude) +static struct resx compute_resx_bits(struct kvm *kvm, + const struct reg_bits_to_feat_map *map, + int map_size, + unsigned long require, + unsigned long exclude) { struct resx resx = {}; @@ -1298,14 +1295,18 @@ static struct resx __compute_fixed_bits(struct kvm *kvm, if (map[i].flags & exclude) continue; - if (map[i].flags & CALL_FUNC) - match = (map[i].flags & FIXED_VALUE) ? - map[i].fval(kvm, fixed_bits) : - map[i].match(kvm); - else + switch (map[i].flags & (CALL_FUNC | FIXED_VALUE)) { + case CALL_FUNC | FIXED_VALUE: + map[i].fval(kvm, &resx); + continue; + case CALL_FUNC: + match = map[i].match(kvm); + break; + default: match = idreg_feat_match(kvm, &map[i]); + } - if (!match || (map[i].flags & FIXED_VALUE)) { + if (!match) { if (map[i].flags & AS_RES1) resx.res1 |= reg_feat_map_bits(&map[i]); else @@ -1316,16 +1317,6 @@ static struct resx __compute_fixed_bits(struct kvm *kvm, return resx; } -static struct resx compute_resx_bits(struct kvm *kvm, - const struct reg_bits_to_feat_map *map, - int map_size, - unsigned long require, - unsigned long exclude) -{ - return __compute_fixed_bits(kvm, map, map_size, NULL, - require, exclude | FIXED_VALUE); -} - static struct resx compute_reg_resx_bits(struct kvm *kvm, const struct reg_feat_map_desc *r, unsigned long require, @@ -1366,16 +1357,6 @@ static u64 compute_fgu_bits(struct kvm *kvm, const struct reg_feat_map_desc *r) return resx.res0 | resx.res1; } -static struct resx compute_reg_fixed_bits(struct kvm *kvm, - const struct reg_feat_map_desc *r, - u64 *fixed_bits, - unsigned long require, - unsigned long exclude) -{ - return __compute_fixed_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, - fixed_bits, require | FIXED_VALUE, exclude); -} - void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) { u64 val = 0; @@ -1415,7 +1396,6 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg) { - u64 fixed = 0, mask; struct resx resx; switch (reg) { @@ -1457,10 +1437,8 @@ struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg) resx.res1 |= __HCRX_EL2_RES1; break; case HCR_EL2: - mask = compute_reg_fixed_bits(kvm, &hcr_desc, &fixed, 0, 0).res0; resx = compute_reg_resx_bits(kvm, &hcr_desc, 0, 0); - resx.res0 |= (mask & ~fixed); - resx.res1 |= HCR_EL2_RES1 | (mask & fixed); + resx.res1 |= HCR_EL2_RES1; break; case SCTLR2_EL1: case SCTLR2_EL2: From ad90512f12fef5506d1f72cdfbd720eb701eab8c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:20 +0000 Subject: [PATCH 77/86] KVM: arm64: Add REQUIRES_E2H1 constraint as configuration flags A bunch of EL2 configuration are very similar to their EL1 counterpart, with the added constraint that HCR_EL2.E2H being 1. For us, this means HCR_EL2.E2H being RES1, which is something we can statically evaluate. Add a REQUIRES_E2H1 constraint, which allows us to express conditions in a much simpler way (without extra code). Existing occurrences are converted, before we add a lot more. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-12-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index d3467bac40fc..84c577672e97 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -25,6 +25,7 @@ struct reg_bits_to_feat_map { #define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */ #define MASKS_POINTER BIT(3) /* Pointer to fgt_masks struct instead of bits */ #define AS_RES1 BIT(4) /* RES1 when not supported */ +#define REQUIRES_E2H1 BIT(5) /* Add HCR_EL2.E2H RES1 as a pre-condition */ unsigned long flags; @@ -311,21 +312,6 @@ static bool feat_trbe_mpam(struct kvm *kvm) (read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_MPAM)); } -static bool feat_asid2_e2h1(struct kvm *kvm) -{ - return kvm_has_feat(kvm, FEAT_ASID2) && !kvm_has_feat(kvm, FEAT_E2H0); -} - -static bool feat_d128_e2h1(struct kvm *kvm) -{ - return kvm_has_feat(kvm, FEAT_D128) && !kvm_has_feat(kvm, FEAT_E2H0); -} - -static bool feat_mec_e2h1(struct kvm *kvm) -{ - return kvm_has_feat(kvm, FEAT_MEC) && !kvm_has_feat(kvm, FEAT_E2H0); -} - static bool feat_ebep_pmuv3_ss(struct kvm *kvm) { return kvm_has_feat(kvm, FEAT_EBEP) || kvm_has_feat(kvm, FEAT_PMUv3_SS); @@ -1045,15 +1031,15 @@ static const DECLARE_FEAT_MAP(sctlr2_desc, SCTLR2_EL1, sctlr2_feat_map, FEAT_SCTLR2); static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { - NEEDS_FEAT(TCR2_EL2_FNG1 | - TCR2_EL2_FNG0 | - TCR2_EL2_A2, - feat_asid2_e2h1), - NEEDS_FEAT(TCR2_EL2_DisCH1 | - TCR2_EL2_DisCH0 | - TCR2_EL2_D128, - feat_d128_e2h1), - NEEDS_FEAT(TCR2_EL2_AMEC1, feat_mec_e2h1), + NEEDS_FEAT_FLAG(TCR2_EL2_FNG1 | + TCR2_EL2_FNG0 | + TCR2_EL2_A2, + REQUIRES_E2H1, FEAT_ASID2), + NEEDS_FEAT_FLAG(TCR2_EL2_DisCH1 | + TCR2_EL2_DisCH0 | + TCR2_EL2_D128, + REQUIRES_E2H1, FEAT_D128), + NEEDS_FEAT_FLAG(TCR2_EL2_AMEC1, REQUIRES_E2H1, FEAT_MEC), NEEDS_FEAT(TCR2_EL2_AMEC0, FEAT_MEC), NEEDS_FEAT(TCR2_EL2_HAFT, FEAT_HAFT), NEEDS_FEAT(TCR2_EL2_PTTWI | @@ -1284,6 +1270,7 @@ static struct resx compute_resx_bits(struct kvm *kvm, unsigned long require, unsigned long exclude) { + bool e2h0 = kvm_has_feat(kvm, FEAT_E2H0); struct resx resx = {}; for (int i = 0; i < map_size; i++) { @@ -1306,6 +1293,9 @@ static struct resx compute_resx_bits(struct kvm *kvm, match = idreg_feat_match(kvm, &map[i]); } + if (map[i].flags & REQUIRES_E2H1) + match &= !e2h0; + if (!match) { if (map[i].flags & AS_RES1) resx.res1 |= reg_feat_map_bits(&map[i]); From d406fcb2030e3efe2c5a7f043028cb3727f522d8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:21 +0000 Subject: [PATCH 78/86] KVM: arm64: Add RES1_WHEN_E2Hx constraints as configuration flags "Thanks" to VHE, SCTLR_EL2 radically changes shape depending on the value of HCR_EL2.E2H, as a lot of the bits that didn't have much meaning with E2H=0 start impacting EL0 with E2H=1. This has a direct impact on the RESx behaviour of these bits, and we need a way to express them. For this purpose, introduce two new constaints that, when the controlling feature is not present, force the field to RES1 depending on the value of E2H. Note that RES0 is still implicit, This allows diverging RESx values depending on the value of E2H, something that is required by a bunch of SCTLR_EL2 bits. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-13-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 84c577672e97..7e8e42c1cee4 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -26,6 +26,8 @@ struct reg_bits_to_feat_map { #define MASKS_POINTER BIT(3) /* Pointer to fgt_masks struct instead of bits */ #define AS_RES1 BIT(4) /* RES1 when not supported */ #define REQUIRES_E2H1 BIT(5) /* Add HCR_EL2.E2H RES1 as a pre-condition */ +#define RES1_WHEN_E2H0 BIT(6) /* RES1 when E2H=0 and not supported */ +#define RES1_WHEN_E2H1 BIT(7) /* RES1 when E2H=1 and not supported */ unsigned long flags; @@ -1297,10 +1299,14 @@ static struct resx compute_resx_bits(struct kvm *kvm, match &= !e2h0; if (!match) { - if (map[i].flags & AS_RES1) - resx.res1 |= reg_feat_map_bits(&map[i]); + u64 bits = reg_feat_map_bits(&map[i]); + + if ((map[i].flags & AS_RES1) || + (e2h0 && (map[i].flags & RES1_WHEN_E2H0)) || + (!e2h0 && (map[i].flags & RES1_WHEN_E2H1))) + resx.res1 |= bits; else - resx.res0 |= reg_feat_map_bits(&map[i]); + resx.res0 |= bits; } } From d2f629aa75bef1c346f17ca195271582dafc6f3b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:22 +0000 Subject: [PATCH 79/86] KVM: arm64: Move RESx into individual register descriptors Instead of hacking the RES1 bits at runtime, move them into the register descriptors. This makes it significantly nicer. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-14-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 48 +++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 7e8e42c1cee4..474d5c8038c2 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -28,6 +28,7 @@ struct reg_bits_to_feat_map { #define REQUIRES_E2H1 BIT(5) /* Add HCR_EL2.E2H RES1 as a pre-condition */ #define RES1_WHEN_E2H0 BIT(6) /* RES1 when E2H=0 and not supported */ #define RES1_WHEN_E2H1 BIT(7) /* RES1 when E2H=1 and not supported */ +#define FORCE_RESx BIT(8) /* Unconditional RESx */ unsigned long flags; @@ -87,6 +88,12 @@ struct reg_feat_map_desc { .match = (fun), \ } +#define __NEEDS_FEAT_0(m, f, w, ...) \ + { \ + .w = (m), \ + .flags = (f), \ + } + #define __NEEDS_FEAT_FLAG(m, f, w, ...) \ CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, w, __VA_ARGS__) @@ -105,10 +112,14 @@ struct reg_feat_map_desc { */ #define NEEDS_FEAT(m, ...) NEEDS_FEAT_FLAG(m, 0, __VA_ARGS__) +/* Declare fixed RESx bits */ +#define FORCE_RES0(m) NEEDS_FEAT_FLAG(m, FORCE_RESx) +#define FORCE_RES1(m) NEEDS_FEAT_FLAG(m, FORCE_RESx | AS_RES1) + /* - * Declare the dependency between a non-FGT register, a set of - * feature, and the set of individual bits it contains. This generates - * a struct reg_feat_map_desc. + * Declare the dependency between a non-FGT register, a set of features, + * and the set of individual bits it contains. This generates a struct + * reg_feat_map_desc. */ #define DECLARE_FEAT_MAP(n, r, m, f) \ struct reg_feat_map_desc n = { \ @@ -1007,6 +1018,8 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { HCR_EL2_TWEDEn, FEAT_TWED), NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h), + FORCE_RES0(HCR_EL2_RES0), + FORCE_RES1(HCR_EL2_RES1), }; static const DECLARE_FEAT_MAP(hcr_desc, HCR_EL2, @@ -1027,6 +1040,8 @@ static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { SCTLR2_EL1_CPTM | SCTLR2_EL1_CPTM0, FEAT_CPA2), + FORCE_RES0(SCTLR2_EL1_RES0), + FORCE_RES1(SCTLR2_EL1_RES1), }; static const DECLARE_FEAT_MAP(sctlr2_desc, SCTLR2_EL1, @@ -1052,6 +1067,8 @@ static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { TCR2_EL2_E0POE, FEAT_S1POE), NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE), + FORCE_RES0(TCR2_EL2_RES0), + FORCE_RES1(TCR2_EL2_RES1), }; static const DECLARE_FEAT_MAP(tcr2_el2_desc, TCR2_EL2, @@ -1129,6 +1146,8 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { SCTLR_EL1_A | SCTLR_EL1_M, FEAT_AA64EL1), + FORCE_RES0(SCTLR_EL1_RES0), + FORCE_RES1(SCTLR_EL1_RES1), }; static const DECLARE_FEAT_MAP(sctlr_el1_desc, SCTLR_EL1, @@ -1163,6 +1182,8 @@ static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { MDCR_EL2_TDE | MDCR_EL2_TDRA, FEAT_AA64EL1), + FORCE_RES0(MDCR_EL2_RES0), + FORCE_RES1(MDCR_EL2_RES1), }; static const DECLARE_FEAT_MAP(mdcr_el2_desc, MDCR_EL2, @@ -1201,6 +1222,8 @@ static const struct reg_bits_to_feat_map vtcr_el2_feat_map[] = { VTCR_EL2_SL0 | VTCR_EL2_T0SZ, FEAT_AA64EL1), + FORCE_RES0(VTCR_EL2_RES0), + FORCE_RES1(VTCR_EL2_RES1), }; static const DECLARE_FEAT_MAP(vtcr_el2_desc, VTCR_EL2, @@ -1211,8 +1234,14 @@ static void __init check_feat_map(const struct reg_bits_to_feat_map *map, { u64 mask = 0; + /* + * Don't account for FORCE_RESx that are architectural, and + * therefore part of the resx parameter. Other FORCE_RESx bits + * are implementation choices, and therefore accounted for. + */ for (int i = 0; i < map_size; i++) - mask |= map[i].bits; + if (!((map[i].flags & FORCE_RESx) && (map[i].bits & resx))) + mask |= map[i].bits; if (mask != ~resx) kvm_err("Undefined %s behaviour, bits %016llx\n", @@ -1284,13 +1313,16 @@ static struct resx compute_resx_bits(struct kvm *kvm, if (map[i].flags & exclude) continue; - switch (map[i].flags & (CALL_FUNC | FIXED_VALUE)) { + switch (map[i].flags & (FORCE_RESx | CALL_FUNC | FIXED_VALUE)) { case CALL_FUNC | FIXED_VALUE: map[i].fval(kvm, &resx); continue; case CALL_FUNC: match = map[i].match(kvm); break; + case FORCE_RESx: + match = false; + break; default: match = idreg_feat_match(kvm, &map[i]); } @@ -1434,28 +1466,22 @@ struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg) break; case HCR_EL2: resx = compute_reg_resx_bits(kvm, &hcr_desc, 0, 0); - resx.res1 |= HCR_EL2_RES1; break; case SCTLR2_EL1: case SCTLR2_EL2: resx = compute_reg_resx_bits(kvm, &sctlr2_desc, 0, 0); - resx.res1 |= SCTLR2_EL1_RES1; break; case TCR2_EL2: resx = compute_reg_resx_bits(kvm, &tcr2_el2_desc, 0, 0); - resx.res1 |= TCR2_EL2_RES1; break; case SCTLR_EL1: resx = compute_reg_resx_bits(kvm, &sctlr_el1_desc, 0, 0); - resx.res1 |= SCTLR_EL1_RES1; break; case MDCR_EL2: resx = compute_reg_resx_bits(kvm, &mdcr_el2_desc, 0, 0); - resx.res1 |= MDCR_EL2_RES1; break; case VTCR_EL2: resx = compute_reg_resx_bits(kvm, &vtcr_el2_desc, 0, 0); - resx.res1 |= VTCR_EL2_RES1; break; default: WARN_ON_ONCE(1); From f01e3429cf0e4b1ab20c9d51ebfa0d8514d8fe4d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:23 +0000 Subject: [PATCH 80/86] KVM: arm64: Simplify handling of HCR_EL2.E2H RESx Now that we can link the RESx behaviour with the value of HCR_EL2.E2H, we can trivially express the tautological constraint that makes E2H a reserved value at all times. Fun, isn't it? Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-15-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 474d5c8038c2..ae72f3b8e50b 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -388,16 +388,6 @@ static bool feat_vmid16(struct kvm *kvm) return kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16); } -static bool compute_hcr_e2h(struct kvm *kvm, struct resx *bits) -{ - if (kvm_has_feat(kvm, FEAT_E2H0)) - bits->res0 |= HCR_EL2_E2H; - else - bits->res1 |= HCR_EL2_E2H; - - return true; -} - static const struct reg_bits_to_feat_map hfgrtr_feat_map[] = { NEEDS_FEAT(HFGRTR_EL2_nAMAIR2_EL1 | HFGRTR_EL2_nMAIR2_EL1, @@ -1017,7 +1007,7 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT(HCR_EL2_TWEDEL | HCR_EL2_TWEDEn, FEAT_TWED), - NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h), + NEEDS_FEAT_FLAG(HCR_EL2_E2H, RES1_WHEN_E2H1 | FORCE_RESx), FORCE_RES0(HCR_EL2_RES0), FORCE_RES1(HCR_EL2_RES1), }; From ab1f377b4c930e8d117cc461bd64d5866fc6aab8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:24 +0000 Subject: [PATCH 81/86] KVM: arm64: Get rid of FIXED_VALUE altogether We have now killed every occurrences of FIXED_VALUE, and we can therefore drop the whole infrastructure. Good riddance. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-16-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index ae72f3b8e50b..274ae049c4b3 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -22,13 +22,12 @@ struct reg_bits_to_feat_map { #define NEVER_FGU BIT(0) /* Can trap, but never UNDEF */ #define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */ -#define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */ +#define FORCE_RESx BIT(2) /* Unconditional RESx */ #define MASKS_POINTER BIT(3) /* Pointer to fgt_masks struct instead of bits */ #define AS_RES1 BIT(4) /* RES1 when not supported */ #define REQUIRES_E2H1 BIT(5) /* Add HCR_EL2.E2H RES1 as a pre-condition */ #define RES1_WHEN_E2H0 BIT(6) /* RES1 when E2H=0 and not supported */ #define RES1_WHEN_E2H1 BIT(7) /* RES1 when E2H=1 and not supported */ -#define FORCE_RESx BIT(8) /* Unconditional RESx */ unsigned long flags; @@ -41,7 +40,6 @@ struct reg_bits_to_feat_map { s8 lo_lim; }; bool (*match)(struct kvm *); - bool (*fval)(struct kvm *, struct resx *); }; }; @@ -74,13 +72,6 @@ struct reg_feat_map_desc { .lo_lim = id ##_## fld ##_## lim \ } -#define __NEEDS_FEAT_2(m, f, w, fun, dummy) \ - { \ - .w = (m), \ - .flags = (f) | CALL_FUNC, \ - .fval = (fun), \ - } - #define __NEEDS_FEAT_1(m, f, w, fun) \ { \ .w = (m), \ @@ -100,9 +91,6 @@ struct reg_feat_map_desc { #define NEEDS_FEAT_FLAG(m, f, ...) \ __NEEDS_FEAT_FLAG(m, f, bits, __VA_ARGS__) -#define NEEDS_FEAT_FIXED(m, ...) \ - __NEEDS_FEAT_FLAG(m, FIXED_VALUE, bits, __VA_ARGS__, 0) - #define NEEDS_FEAT_MASKS(p, ...) \ __NEEDS_FEAT_FLAG(p, MASKS_POINTER, masks, __VA_ARGS__) @@ -1303,19 +1291,12 @@ static struct resx compute_resx_bits(struct kvm *kvm, if (map[i].flags & exclude) continue; - switch (map[i].flags & (FORCE_RESx | CALL_FUNC | FIXED_VALUE)) { - case CALL_FUNC | FIXED_VALUE: - map[i].fval(kvm, &resx); - continue; - case CALL_FUNC: - match = map[i].match(kvm); - break; - case FORCE_RESx: + if (map[i].flags & FORCE_RESx) match = false; - break; - default: + else if (map[i].flags & CALL_FUNC) + match = map[i].match(kvm); + else match = idreg_feat_match(kvm, &map[i]); - } if (map[i].flags & REQUIRES_E2H1) match &= !e2h0; From d784cfe697abdfd53f332d39d0cffcf03cbcafaa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:25 +0000 Subject: [PATCH 82/86] KVM: arm64: Simplify handling of full register invalid constraint Now that we embed the RESx bits in the register description, it becomes easier to deal with registers that are simply not valid, as their existence is not satisfied by the configuration (SCTLR2_ELx without FEAT_SCTLR2, for example). Such registers essentially become RES0 for any bit that wasn't already advertised as RESx. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-17-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 274ae049c4b3..b37b40744db9 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1321,7 +1321,7 @@ static struct resx compute_reg_resx_bits(struct kvm *kvm, unsigned long require, unsigned long exclude) { - struct resx resx, tmp; + struct resx resx; resx = compute_resx_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, require, exclude); @@ -1331,11 +1331,14 @@ static struct resx compute_reg_resx_bits(struct kvm *kvm, resx.res1 |= r->feat_map.masks->res1; } - tmp = compute_resx_bits(kvm, &r->feat_map, 1, require, exclude); - - resx.res0 |= tmp.res0; - resx.res0 |= ~reg_feat_map_bits(&r->feat_map); - resx.res1 |= tmp.res1; + /* + * If the register itself was not valid, all the non-RESx bits are + * now considered RES0 (this matches the behaviour of registers such + * as SCTLR2 and TCR2). Weed out any potential (though unlikely) + * overlap with RES1 bits coming from the previous computation. + */ + resx.res0 |= compute_resx_bits(kvm, &r->feat_map, 1, require, exclude).res0; + resx.res1 &= ~resx.res0; return resx; } From d65bf6e317e7bb13612bd94e01c5a11b6fc67e9d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:26 +0000 Subject: [PATCH 83/86] KVM: arm64: Remove all traces of FEAT_TME FEAT_TME has been dropped from the architecture. Retrospectively. I'm sure someone is crying somewhere, but most of us won't. Clean-up time. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-18-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 7 ------- arch/arm64/kvm/nested.c | 5 ----- arch/arm64/tools/sysreg | 12 +++--------- tools/perf/Documentation/perf-arm-spe.txt | 1 - tools/testing/selftests/kvm/arm64/set_id_regs.c | 1 - 5 files changed, 3 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index b37b40744db9..c1b76a76a5e4 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -187,7 +187,6 @@ struct reg_feat_map_desc { #define FEAT_RME ID_AA64PFR0_EL1, RME, IMP #define FEAT_MPAM ID_AA64PFR0_EL1, MPAM, 1 #define FEAT_S2FWB ID_AA64MMFR2_EL1, FWB, IMP -#define FEAT_TME ID_AA64ISAR0_EL1, TME, IMP #define FEAT_TWED ID_AA64MMFR1_EL1, TWED, IMP #define FEAT_E2H0 ID_AA64MMFR4_EL1, E2H0, IMP #define FEAT_SRMASK ID_AA64MMFR4_EL1, SRMASK, IMP @@ -991,7 +990,6 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT(HCR_EL2_FIEN, feat_rasv1p1), NEEDS_FEAT(HCR_EL2_GPF, FEAT_RME), NEEDS_FEAT(HCR_EL2_FWB, FEAT_S2FWB), - NEEDS_FEAT(HCR_EL2_TME, FEAT_TME), NEEDS_FEAT(HCR_EL2_TWEDEL | HCR_EL2_TWEDEn, FEAT_TWED), @@ -1102,11 +1100,6 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { NEEDS_FEAT(SCTLR_EL1_EnRCTX, FEAT_SPECRES), NEEDS_FEAT(SCTLR_EL1_DSSBS, FEAT_SSBS), NEEDS_FEAT(SCTLR_EL1_TIDCP, FEAT_TIDCP1), - NEEDS_FEAT(SCTLR_EL1_TME0 | - SCTLR_EL1_TME | - SCTLR_EL1_TMT0 | - SCTLR_EL1_TMT, - FEAT_TME), NEEDS_FEAT(SCTLR_EL1_TWEDEL | SCTLR_EL1_TWEDEn, FEAT_TWED), diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 75a23f1c56d1..96e899dbd919 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1505,11 +1505,6 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) u64 orig_val = val; switch (reg) { - case SYS_ID_AA64ISAR0_EL1: - /* Support everything but TME */ - val &= ~ID_AA64ISAR0_EL1_TME; - break; - case SYS_ID_AA64ISAR1_EL1: /* Support everything but LS64 and Spec Invalidation */ val &= ~(ID_AA64ISAR1_EL1_LS64 | diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 969a75615d61..650d7d477087 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1856,10 +1856,7 @@ UnsignedEnum 31:28 RDM 0b0000 NI 0b0001 IMP EndEnum -UnsignedEnum 27:24 TME - 0b0000 NI - 0b0001 IMP -EndEnum +Res0 27:24 UnsignedEnum 23:20 ATOMIC 0b0000 NI 0b0010 IMP @@ -2432,10 +2429,7 @@ Field 57 EPAN Field 56 EnALS Field 55 EnAS0 Field 54 EnASR -Field 53 TME -Field 52 TME0 -Field 51 TMT -Field 50 TMT0 +Res0 53:50 Field 49:46 TWEDEL Field 45 TWEDEn Field 44 DSSBS @@ -3840,7 +3834,7 @@ Field 43 NV1 Field 42 NV Field 41 API Field 40 APK -Field 39 TME +Res0 39 Field 38 MIOCNCE Field 37 TEA Field 36 TERR diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt index 8b02e5b983fa..201a82bec0de 100644 --- a/tools/perf/Documentation/perf-arm-spe.txt +++ b/tools/perf/Documentation/perf-arm-spe.txt @@ -176,7 +176,6 @@ and inv_event_filter are: bit 10 - Remote access (FEAT_SPEv1p4) bit 11 - Misaligned access (FEAT_SPEv1p1) bit 12-15 - IMPLEMENTATION DEFINED events (when implemented) - bit 16 - Transaction (FEAT_TME) bit 17 - Partial or empty SME or SVE predicate (FEAT_SPEv1p1) bit 18 - Empty SME or SVE predicate (FEAT_SPEv1p1) bit 19 - L2D access (FEAT_SPEv1p4) diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index c4815d365816..73de5be58bab 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -91,7 +91,6 @@ static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0), From fb40cb15e8ad1e7511966a953de0f409aaae4398 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:27 +0000 Subject: [PATCH 84/86] KVM: arm64: Remove all traces of HCR_EL2.MIOCNCE MIOCNCE had the potential to eat your data, and also was never implemented by anyone. It's been retrospectively removed from the architecture, and we're happy to follow that lead. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-19-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 1 - arch/arm64/tools/sysreg | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index c1b76a76a5e4..8640f9c9b2e0 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -938,7 +938,6 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { HCR_EL2_FMO | HCR_EL2_ID | HCR_EL2_IMO | - HCR_EL2_MIOCNCE | HCR_EL2_PTW | HCR_EL2_SWIO | HCR_EL2_TACR | diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 650d7d477087..724e6ad966c2 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3834,8 +3834,7 @@ Field 43 NV1 Field 42 NV Field 41 API Field 40 APK -Res0 39 -Field 38 MIOCNCE +Res0 39:38 Field 37 TEA Field 36 TERR Field 35 TLOR From e8ef27900c6a8c0727dcf62d4112d08c5046d33e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:28 +0000 Subject: [PATCH 85/86] KVM: arm64: Add sanitisation to SCTLR_EL2 Sanitise SCTLR_EL2 the usual way. The most important aspect of this is that we benefit from SCTLR_EL2.SPAN being RES1 when HCR_EL2.E2H==0. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-20-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/config.c | 82 +++++++++++++++++++++++++++++++ arch/arm64/kvm/nested.c | 4 ++ 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 20ebc1610ac8..771ef1b61f9a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -495,7 +495,6 @@ enum vcpu_sysreg { DBGVCR32_EL2, /* Debug Vector Catch Register */ /* EL2 registers */ - SCTLR_EL2, /* System Control Register (EL2) */ ACTLR_EL2, /* Auxiliary Control Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ @@ -526,6 +525,7 @@ enum vcpu_sysreg { /* Anything from this can be RES0/RES1 sanitised */ MARKER(__SANITISED_REG_START__), + SCTLR_EL2, /* System Control Register (EL2) */ TCR2_EL2, /* Extended Translation Control Register (EL2) */ SCTLR2_EL2, /* System Control Register 2 (EL2) */ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 8640f9c9b2e0..d9f553cbf9df 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1123,6 +1123,84 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { static const DECLARE_FEAT_MAP(sctlr_el1_desc, SCTLR_EL1, sctlr_el1_feat_map, FEAT_AA64EL1); +static const struct reg_bits_to_feat_map sctlr_el2_feat_map[] = { + NEEDS_FEAT_FLAG(SCTLR_EL2_CP15BEN, + RES1_WHEN_E2H0 | REQUIRES_E2H1, + FEAT_AA32EL0), + NEEDS_FEAT_FLAG(SCTLR_EL2_ITD | + SCTLR_EL2_SED, + RES1_WHEN_E2H1 | REQUIRES_E2H1, + FEAT_AA32EL0), + NEEDS_FEAT_FLAG(SCTLR_EL2_BT0, REQUIRES_E2H1, FEAT_BTI), + NEEDS_FEAT(SCTLR_EL2_BT, FEAT_BTI), + NEEDS_FEAT_FLAG(SCTLR_EL2_CMOW, REQUIRES_E2H1, FEAT_CMOW), + NEEDS_FEAT_FLAG(SCTLR_EL2_TSCXT, + RES1_WHEN_E2H1 | REQUIRES_E2H1, + feat_csv2_2_csv2_1p2), + NEEDS_FEAT_FLAG(SCTLR_EL2_EIS | + SCTLR_EL2_EOS, + AS_RES1, FEAT_ExS), + NEEDS_FEAT(SCTLR_EL2_EnFPM, FEAT_FPMR), + NEEDS_FEAT(SCTLR_EL2_IESB, FEAT_IESB), + NEEDS_FEAT_FLAG(SCTLR_EL2_EnALS, REQUIRES_E2H1, FEAT_LS64), + NEEDS_FEAT_FLAG(SCTLR_EL2_EnAS0, REQUIRES_E2H1, FEAT_LS64_ACCDATA), + NEEDS_FEAT_FLAG(SCTLR_EL2_EnASR, REQUIRES_E2H1, FEAT_LS64_V), + NEEDS_FEAT(SCTLR_EL2_nAA, FEAT_LSE2), + NEEDS_FEAT_FLAG(SCTLR_EL2_LSMAOE | + SCTLR_EL2_nTLSMD, + AS_RES1 | REQUIRES_E2H1, FEAT_LSMAOC), + NEEDS_FEAT(SCTLR_EL2_EE, FEAT_MixedEnd), + NEEDS_FEAT_FLAG(SCTLR_EL2_E0E, REQUIRES_E2H1, feat_mixedendel0), + NEEDS_FEAT_FLAG(SCTLR_EL2_MSCEn, REQUIRES_E2H1, FEAT_MOPS), + NEEDS_FEAT_FLAG(SCTLR_EL2_ATA0 | + SCTLR_EL2_TCF0, + REQUIRES_E2H1, FEAT_MTE2), + NEEDS_FEAT(SCTLR_EL2_ATA | + SCTLR_EL2_TCF, + FEAT_MTE2), + NEEDS_FEAT(SCTLR_EL2_ITFSB, feat_mte_async), + NEEDS_FEAT_FLAG(SCTLR_EL2_TCSO0, REQUIRES_E2H1, FEAT_MTE_STORE_ONLY), + NEEDS_FEAT(SCTLR_EL2_TCSO, + FEAT_MTE_STORE_ONLY), + NEEDS_FEAT(SCTLR_EL2_NMI | + SCTLR_EL2_SPINTMASK, + FEAT_NMI), + NEEDS_FEAT_FLAG(SCTLR_EL2_SPAN, AS_RES1 | REQUIRES_E2H1, FEAT_PAN), + NEEDS_FEAT_FLAG(SCTLR_EL2_EPAN, REQUIRES_E2H1, FEAT_PAN3), + NEEDS_FEAT(SCTLR_EL2_EnDA | + SCTLR_EL2_EnDB | + SCTLR_EL2_EnIA | + SCTLR_EL2_EnIB, + feat_pauth), + NEEDS_FEAT_FLAG(SCTLR_EL2_EnTP2, REQUIRES_E2H1, FEAT_SME), + NEEDS_FEAT(SCTLR_EL2_EnRCTX, FEAT_SPECRES), + NEEDS_FEAT(SCTLR_EL2_DSSBS, FEAT_SSBS), + NEEDS_FEAT_FLAG(SCTLR_EL2_TIDCP, REQUIRES_E2H1, FEAT_TIDCP1), + NEEDS_FEAT_FLAG(SCTLR_EL2_TWEDEL | + SCTLR_EL2_TWEDEn, + REQUIRES_E2H1, FEAT_TWED), + NEEDS_FEAT_FLAG(SCTLR_EL2_nTWE | + SCTLR_EL2_nTWI, + AS_RES1 | REQUIRES_E2H1, FEAT_AA64EL2), + NEEDS_FEAT_FLAG(SCTLR_EL2_UCI | + SCTLR_EL2_UCT | + SCTLR_EL2_DZE | + SCTLR_EL2_SA0, + REQUIRES_E2H1, FEAT_AA64EL2), + NEEDS_FEAT(SCTLR_EL2_WXN | + SCTLR_EL2_I | + SCTLR_EL2_SA | + SCTLR_EL2_C | + SCTLR_EL2_A | + SCTLR_EL2_M, + FEAT_AA64EL2), + FORCE_RES0(SCTLR_EL2_RES0), + FORCE_RES1(SCTLR_EL2_RES1), +}; + +static const DECLARE_FEAT_MAP(sctlr_el2_desc, SCTLR_EL2, + sctlr_el2_feat_map, FEAT_AA64EL2); + static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9), NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock), @@ -1247,6 +1325,7 @@ void __init check_feature_map(void) check_reg_desc(&sctlr2_desc); check_reg_desc(&tcr2_el2_desc); check_reg_desc(&sctlr_el1_desc); + check_reg_desc(&sctlr_el2_desc); check_reg_desc(&mdcr_el2_desc); check_reg_desc(&vtcr_el2_desc); } @@ -1443,6 +1522,9 @@ struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg) case SCTLR_EL1: resx = compute_reg_resx_bits(kvm, &sctlr_el1_desc, 0, 0); break; + case SCTLR_EL2: + resx = compute_reg_resx_bits(kvm, &sctlr_el2_desc, 0, 0); + break; case MDCR_EL2: resx = compute_reg_resx_bits(kvm, &mdcr_el2_desc, 0, 0); break; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 96e899dbd919..ed710228484f 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1766,6 +1766,10 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) resx = get_reg_fixed_bits(kvm, SCTLR_EL1); set_sysreg_masks(kvm, SCTLR_EL1, resx); + /* SCTLR_EL2 */ + resx = get_reg_fixed_bits(kvm, SCTLR_EL2); + set_sysreg_masks(kvm, SCTLR_EL2, resx); + /* SCTLR2_ELx */ resx = get_reg_fixed_bits(kvm, SCTLR2_EL1); set_sysreg_masks(kvm, SCTLR2_EL1, resx); From edba407843340c4b66134fce6c54a007c1ac83a2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Feb 2026 18:43:29 +0000 Subject: [PATCH 86/86] KVM: arm64: Add debugfs file dumping computed RESx values Computing RESx values is hard. Verifying that they are correct is harder. Add a debugfs file called "resx" that will dump all the RESx values for a given VM. I found it useful, maybe you will too. Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://patch.msgid.link/20260202184329.2724080-21-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 68 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 88a57ca36d96..d33c39ea8fad 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -5090,12 +5090,80 @@ static const struct seq_operations idregs_debug_sops = { DEFINE_SEQ_ATTRIBUTE(idregs_debug); +static const struct sys_reg_desc *sr_resx_find(struct kvm *kvm, loff_t pos) +{ + unsigned long i, sr_idx = 0; + + for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) { + const struct sys_reg_desc *r = &sys_reg_descs[i]; + + if (r->reg < __SANITISED_REG_START__) + continue; + + if (sr_idx++ == pos) + return r; + } + + return NULL; +} + +static void *sr_resx_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = s->private; + + if (!kvm->arch.sysreg_masks) + return NULL; + + return (void *)sr_resx_find(kvm, *pos); +} + +static void *sr_resx_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct kvm *kvm = s->private; + + (*pos)++; + + return (void *)sr_resx_find(kvm, *pos); +} + +static void sr_resx_stop(struct seq_file *s, void *v) +{ +} + +static int sr_resx_show(struct seq_file *s, void *v) +{ + const struct sys_reg_desc *desc = v; + struct kvm *kvm = s->private; + struct resx resx; + + if (!desc) + return 0; + + resx = kvm_get_sysreg_resx(kvm, desc->reg); + + seq_printf(s, "%20s:\tRES0:%016llx\tRES1:%016llx\n", + desc->name, resx.res0, resx.res1); + + return 0; +} + +static const struct seq_operations sr_resx_sops = { + .start = sr_resx_start, + .next = sr_resx_next, + .stop = sr_resx_stop, + .show = sr_resx_show, +}; + +DEFINE_SEQ_ATTRIBUTE(sr_resx); + void kvm_sys_regs_create_debugfs(struct kvm *kvm) { kvm->arch.idreg_debugfs_iter = ~0; debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm, &idregs_debug_fops); + debugfs_create_file("resx", 0444, kvm->debugfs_dentry, kvm, + &sr_resx_fops); } static void reset_vm_ftr_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *reg)