From c103c2dfe4975da31b67a0fcb95761359f30992d Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 22 Jan 2026 11:22:15 +0000 Subject: [PATCH 1/4] KVM: arm64: Remove dead code resetting HCR_EL2 for pKVM The pKVM lifecycle does not support tearing down the hypervisor and returning to the hyp stub once initialized. The transition to protected mode is one-way. Consequently, the code path in hyp-init.S responsible for resetting EL2 registers (triggered by kexec or hibernation) is unreachable in protected mode. Remove the dead code handling HCR_EL2 reset for ARM64_KVM_PROTECTED_MODE. No functional change intended. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260122112218.531948-2-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index aada42522e7b..0d42eedc7167 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -260,11 +260,6 @@ reset: msr sctlr_el2, x5 isb -alternative_if ARM64_KVM_PROTECTED_MODE - mov_q x5, HCR_HOST_NVHE_FLAGS - msr_hcr_el2 x5 -alternative_else_nop_endif - /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 From f35abcbb8a084db4c24b66ccc8db0405c08e2f61 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 22 Jan 2026 11:22:16 +0000 Subject: [PATCH 2/4] KVM: arm64: Trap MTE access and discovery when MTE is disabled If MTE is not supported by the hardware, or is disabled in the kernel configuration (`CONFIG_ARM64_MTE=n`) or command line (`arm64.nomte`), the kernel stops advertising MTE to userspace and avoids using MTE instructions. However, this is a software-level disable only. When MTE hardware is present and enabled by EL3 firmware, leaving `HCR_EL2.ATA` set allows the host to execute MTE instructions (STG, LDG, etc.) and access allocation tags in physical memory. Prevent this by clearing `HCR_EL2.ATA` when MTE is disabled. Remove it from the `HCR_HOST_NVHE_FLAGS` default, and conditionally set it in `cpu_prepare_hyp_mode()` only when `system_supports_mte()` returns true. This causes MTE instructions to trap to EL2 when `HCR_EL2.ATA` is cleared. Additionally, set `HCR_EL2.TID5` when MTE is disabled. This traps reads of `GMID_EL1` (Multiple tag transfer ID register) to EL2, preventing the discovery of MTE parameters (such as tag block size) when the feature is suppressed. Early boot code in `head.S` temporarily keeps `HCR_ATA` set to avoid special-casing initialization paths. This is safe because this code executes before untrusted code runs and will clear `HCR_ATA` if MTE is disabled. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260122112218.531948-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kernel/head.S | 2 +- arch/arm64/kvm/arm.c | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e500600e4b9b..752e3e1604e8 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -101,7 +101,7 @@ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) -#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ca04b338cb0d..87a822e5c4ca 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -299,7 +299,7 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) isb 0: - init_el2_hcr HCR_HOST_NVHE_FLAGS + init_el2_hcr HCR_HOST_NVHE_FLAGS | HCR_ATA init_el2_state /* Hypervisor stub */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4f80da0c0d1d..aeac113e5e74 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2044,6 +2044,12 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; else params->hcr_el2 = HCR_HOST_NVHE_FLAGS; + + if (system_supports_mte()) + params->hcr_el2 |= HCR_ATA; + else + params->hcr_el2 |= HCR_TID5; + if (cpus_have_final_cap(ARM64_KVM_HVHE)) params->hcr_el2 |= HCR_E2H; params->vttbr = params->vtcr = 0; From 5ee8ad69da07d0e2cffa0ce2f2339c9ad2d587f2 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 22 Jan 2026 11:22:17 +0000 Subject: [PATCH 3/4] KVM: arm64: Inject UNDEF when accessing MTE sysregs with MTE disabled When MTE hardware is present but disabled via software (`arm64.nomte` or `CONFIG_ARM64_MTE=n`), the kernel clears `HCR_EL2.ATA` and sets `HCR_EL2.TID5`, to prevent the use of MTE instructions. Additionally, accesses to certain MTE system registers trap to EL2 with exception class ESR_ELx_EC_SYS64. To emulate hardware without MTE (where such accesses would cause an Undefined Instruction exception), inject UNDEF into the host. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260122112218.531948-4-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 67 ++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a7c689152f68..faed1b38e6cc 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -687,6 +687,69 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) kvm_skip_host_instr(); } +/* + * Inject an Undefined Instruction exception into the host. + * + * This is open-coded to allow control over PSTATE construction without + * complicating the generic exception entry helpers. + */ +static void inject_undef64(void) +{ + u64 spsr_mask, vbar, sctlr, old_spsr, new_spsr, esr, offset; + + spsr_mask = PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT | PSR_DIT_BIT | PSR_PAN_BIT; + + vbar = read_sysreg_el1(SYS_VBAR); + sctlr = read_sysreg_el1(SYS_SCTLR); + old_spsr = read_sysreg_el2(SYS_SPSR); + + new_spsr = old_spsr & spsr_mask; + new_spsr |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT; + new_spsr |= PSR_MODE_EL1h; + + if (!(sctlr & SCTLR_EL1_SPAN)) + new_spsr |= PSR_PAN_BIT; + + if (sctlr & SCTLR_ELx_DSSBS) + new_spsr |= PSR_SSBS_BIT; + + if (system_supports_mte()) + new_spsr |= PSR_TCO_BIT; + + esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT) | ESR_ELx_IL; + offset = CURRENT_EL_SP_ELx_VECTOR + except_type_sync; + + write_sysreg_el1(esr, SYS_ESR); + write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); + write_sysreg_el1(old_spsr, SYS_SPSR); + write_sysreg_el2(vbar + offset, SYS_ELR); + write_sysreg_el2(new_spsr, SYS_SPSR); +} + +static bool handle_host_mte(u64 esr) +{ + switch (esr_sys64_to_sysreg(esr)) { + case SYS_RGSR_EL1: + case SYS_GCR_EL1: + case SYS_TFSR_EL1: + case SYS_TFSRE0_EL1: + /* If we're here for any reason other than MTE, it's a bug. */ + if (read_sysreg(HCR_EL2) & HCR_ATA) + return false; + break; + case SYS_GMID_EL1: + /* If we're here for any reason other than MTE, it's a bug. */ + if (!(read_sysreg(HCR_EL2) & HCR_TID5)) + return false; + break; + default: + return false; + } + + inject_undef64(); + return true; +} + void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); @@ -702,6 +765,10 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); break; + case ESR_ELx_EC_SYS64: + if (handle_host_mte(esr)) + break; + fallthrough; default: BUG(); } From 230b080623fec2e1302df2afe2cf2dcb34f9c89b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Thu, 22 Jan 2026 11:22:18 +0000 Subject: [PATCH 4/4] KVM: arm64: Use kvm_has_mte() in pKVM trap initialization When initializing HCR traps in protected mode, use kvm_has_mte() to check for MTE support rather than kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP). kvm_has_mte() provides a more comprehensive check: - kvm_has_feat() only checks if MTE is in the guest's ID register view (i.e., what we advertise to the guest) - kvm_has_mte() checks both system_supports_mte() AND whether KVM_ARCH_FLAG_MTE_ENABLED is set for this VM instance Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260122112218.531948-5-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 8911338961c5..9b933424e70a 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -82,7 +82,7 @@ static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu) if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) val &= ~(HCR_AMVOFFEN); - if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) { + if (!kvm_has_mte(kvm)) { val |= HCR_TID5; val &= ~(HCR_DCT | HCR_ATA); }