Merge branch kvm-arm64/pkvm-features-6.20 into kvmarm-master/next

* kvm-arm64/pkvm-features-6.20:
  : .
  : pKVM guest feature trapping fixes, courtesy of Fuad Tabba.
  : .
  KVM: arm64: Prevent host from managing timer offsets for protected VMs
  KVM: arm64: Check whether a VM IOCTL is allowed in pKVM
  KVM: arm64: Track KVM IOCTLs and their associated KVM caps
  KVM: arm64: Do not allow KVM_CAP_ARM_MTE for any guest in pKVM
  KVM: arm64: Include VM type when checking VM capabilities in pKVM
  KVM: arm64: Introduce helper to calculate fault IPA offset
  KVM: arm64: Fix MTE flag initialization for protected VMs
  KVM: arm64: Fix Trace Buffer trap polarity for protected VMs
  KVM: arm64: Fix Trace Buffer trapping for protected VMs

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2026-01-23 10:04:47 +00:00
commit c983b3e276
9 changed files with 108 additions and 24 deletions

View file

@ -316,6 +316,8 @@
#define PAR_TO_HPFAR(par) \
(((par) & GENMASK_ULL(52 - 1, 12)) >> 8)
#define FAR_TO_FIPA_OFFSET(far) ((far) & GENMASK_ULL(11, 0))
#define ECN(x) { ESR_ELx_EC_##x, #x }
#define kvm_arm_exception_class \

View file

@ -1656,4 +1656,6 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg
p; \
})
long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext);
#endif /* __ARM64_KVM_HOST_H__ */

View file

@ -9,6 +9,7 @@
#include <linux/arm_ffa.h>
#include <linux/memblock.h>
#include <linux/scatterlist.h>
#include <asm/kvm_host.h>
#include <asm/kvm_pgtable.h>
/* Maximum number of VMs that can co-exist under pKVM. */
@ -23,10 +24,12 @@ void pkvm_destroy_hyp_vm(struct kvm *kvm);
int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu);
/*
* This functions as an allow-list of protected VM capabilities.
* Features not explicitly allowed by this function are denied.
* Check whether the specific capability is allowed in pKVM.
*
* Certain features are allowed only for non-protected VMs in pKVM, which is why
* this takes the VM (kvm) as a parameter.
*/
static inline bool kvm_pvm_ext_allowed(long ext)
static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext)
{
switch (ext) {
case KVM_CAP_IRQCHIP:
@ -42,11 +45,32 @@ static inline bool kvm_pvm_ext_allowed(long ext)
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
case KVM_CAP_ARM_PTRAUTH_GENERIC:
return true;
default:
case KVM_CAP_ARM_MTE:
return false;
default:
return !kvm || !kvm_vm_is_protected(kvm);
}
}
/*
* Check whether the KVM VM IOCTL is allowed in pKVM.
*
* Certain features are allowed only for non-protected VMs in pKVM, which is why
* this takes the VM (kvm) as a parameter.
*/
static inline bool kvm_pkvm_ioctl_allowed(struct kvm *kvm, unsigned int ioctl)
{
long ext;
int r;
r = kvm_get_cap_for_kvm_ioctl(ioctl, &ext);
if (WARN_ON_ONCE(r < 0))
return false;
return kvm_pkvm_ext_allowed(kvm, ext);
}
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);

View file

@ -1056,10 +1056,14 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
ctxt->timer_id = timerid;
if (timerid == TIMER_VTIMER)
ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
else
ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
if (!kvm_vm_is_protected(vcpu->kvm)) {
if (timerid == TIMER_VTIMER)
ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
else
ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
} else {
ctxt->offset.vm_offset = NULL;
}
hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
@ -1083,7 +1087,8 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
timer_context_init(vcpu, i);
/* Synchronize offsets across timers of a VM if not already provided */
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
if (!vcpu_is_protected(vcpu) &&
!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
timer_set_offset(vcpu_ptimer(vcpu), 0);
}
@ -1687,6 +1692,9 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
if (offset->reserved)
return -EINVAL;
if (kvm_vm_is_protected(kvm))
return -EINVAL;
mutex_lock(&kvm->lock);
if (!kvm_trylock_all_vcpus(kvm)) {

View file

@ -58,6 +58,51 @@ enum kvm_wfx_trap_policy {
static enum kvm_wfx_trap_policy kvm_wfi_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK;
static enum kvm_wfx_trap_policy kvm_wfe_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK;
/*
* Tracks KVM IOCTLs and their associated KVM capabilities.
*/
struct kvm_ioctl_cap_map {
unsigned int ioctl;
long ext;
};
/* Make KVM_CAP_NR_VCPUS the reference for features we always supported */
#define KVM_CAP_ARM_BASIC KVM_CAP_NR_VCPUS
/*
* Sorted by ioctl to allow for potential binary search,
* though linear scan is sufficient for this size.
*/
static const struct kvm_ioctl_cap_map vm_ioctl_caps[] = {
{ KVM_CREATE_IRQCHIP, KVM_CAP_IRQCHIP },
{ KVM_ARM_SET_DEVICE_ADDR, KVM_CAP_ARM_SET_DEVICE_ADDR },
{ KVM_ARM_MTE_COPY_TAGS, KVM_CAP_ARM_MTE },
{ KVM_SET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL },
{ KVM_GET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL },
{ KVM_HAS_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL },
{ KVM_ARM_SET_COUNTER_OFFSET, KVM_CAP_COUNTER_OFFSET },
{ KVM_ARM_GET_REG_WRITABLE_MASKS, KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES },
{ KVM_ARM_PREFERRED_TARGET, KVM_CAP_ARM_BASIC },
};
/*
* Set *ext to the capability.
* Return 0 if found, or -EINVAL if no IOCTL matches.
*/
long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext)
{
int i;
for (i = 0; i < ARRAY_SIZE(vm_ioctl_caps); i++) {
if (vm_ioctl_caps[i].ioctl == ioctl) {
*ext = vm_ioctl_caps[i].ext;
return 0;
}
}
return -EINVAL;
}
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base);
@ -87,7 +132,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if (cap->flags)
return -EINVAL;
if (kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(cap->cap))
if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, cap->cap))
return -EINVAL;
switch (cap->cap) {
@ -303,7 +348,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
if (kvm && kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(ext))
if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, ext))
return 0;
switch (ext) {
@ -1894,6 +1939,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
if (is_protected_kvm_enabled() && !kvm_pkvm_ioctl_allowed(kvm, ioctl))
return -EINVAL;
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
int ret;

View file

@ -117,8 +117,8 @@ static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu)
if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
val |= MDCR_EL2_TTRF;
if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP))
val |= MDCR_EL2_E2TB_MASK;
if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP))
val &= ~MDCR_EL2_E2TB_MASK;
/* Trap Debug Communications Channel registers */
if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
@ -339,9 +339,6 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
/* Preserve the vgic model so that GICv3 emulation works */
hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model;
if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
/* No restrictions for non-protected VMs. */
if (!kvm_vm_is_protected(kvm)) {
hyp_vm->kvm.arch.flags = host_arch_flags;
@ -356,20 +353,23 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
return;
}
if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_MTE))
kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_MTE_ENABLED);
bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3))
if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PMU_V3))
set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS))
if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_ADDRESS))
set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC))
if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_GENERIC))
set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) {
if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_SVE)) {
set_bit(KVM_ARM_VCPU_SVE, allowed_features);
kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE);
}

View file

@ -44,7 +44,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
/* Build the full address */
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
/* If not for GICV, move on */
if (fault_ipa < vgic->vgic_cpu_base ||

View file

@ -296,7 +296,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
unsigned long addr, esr;
addr = kvm_vcpu_get_fault_ipa(vcpu);
addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
addr |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
__kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr);

View file

@ -2079,7 +2079,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Falls between the IPA range and the PARange? */
if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) {
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
return kvm_inject_sea(vcpu, is_iabt, fault_ipa);
}
@ -2185,7 +2185,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* faulting VA. This is always 12 bits, irrespective
* of the page size.
*/
ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
ret = io_mem_abort(vcpu, ipa);
goto out_unlock;
}