Merge branch 'kvm-arm64/vgic-lr-overflow' into kvmarm/next

* kvm-arm64/vgic-lr-overflow: (50 commits)
  : Support for VGIC LR overflows, courtesy of Marc Zyngier
  :
  : Address deficiencies in KVM's GIC emulation when a vCPU has more active
  : IRQs than can be represented in the VGIC list registers. Sort the AP
  : list to prioritize inactive and pending IRQs, potentially spilling
  : active IRQs outside of the LRs.
  :
  : Handle deactivation of IRQs outside of the LRs for both EOImode=0/1,
  : which involves special consideration for SPIs being deactivated from a
  : different vCPU than the one that acked it.
  KVM: arm64: Convert ICH_HCR_EL2_TDIR cap to EARLY_LOCAL_CPU_FEATURE
  KVM: arm64: selftests: vgic_irq: Add timer deactivation test
  KVM: arm64: selftests: vgic_irq: Add Group-0 enable test
  KVM: arm64: selftests: vgic_irq: Add asymmetric SPI deaectivation test
  KVM: arm64: selftests: vgic_irq: Perform EOImode==1 deactivation in ack order
  KVM: arm64: selftests: vgic_irq: Remove LR-bound limitation
  KVM: arm64: selftests: vgic_irq: Exclude timer-controlled interrupts
  KVM: arm64: selftests: vgic_irq: Change configuration before enabling interrupt
  KVM: arm64: selftests: vgic_irq: Fix GUEST_ASSERT_IAR_EMPTY() helper
  KVM: arm64: selftests: gic_v3: Disable Group-0 interrupts by default
  KVM: arm64: selftests: gic_v3: Add irq group setting helper
  KVM: arm64: GICv2: Always trap GICV_DIR register
  KVM: arm64: GICv2: Handle deactivation via GICV_DIR traps
  KVM: arm64: GICv2: Handle LR overflow when EOImode==0
  KVM: arm64: GICv3: Force exit to sync ICH_HCR_EL2.En
  KVM: arm64: GICv3: nv: Plug L1 LR sync into deactivation primitive
  KVM: arm64: GICv3: nv: Resync LRs/VMCR/HCR early for better MI emulation
  KVM: arm64: GICv3: Avoid broadcast kick on CPUs lacking TDIR
  KVM: arm64: GICv3: Handle in-LR deactivation when possible
  KVM: arm64: GICv3: Add SPI tracking to handle asymmetric deactivation
  ...

Signed-off-by: Oliver Upton <oupton@kernel.org>
This commit is contained in:
Oliver Upton 2025-12-01 00:47:32 -08:00
commit 938309b028
34 changed files with 1360 additions and 427 deletions

View file

@ -79,7 +79,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,

View file

@ -54,6 +54,7 @@
#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9)
#define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10)
#define KVM_REQ_VGIC_PROCESS_UPDATE KVM_ARCH_REQ(11)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)

View file

@ -77,12 +77,13 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
u64 __gic_v3_get_lr(unsigned int lr);
void __gic_v3_set_lr(u64 val, int lr);
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);

View file

@ -40,8 +40,13 @@
*/
#define HVC_FINALISE_EL2 3
/*
* HVC_GET_ICH_VTR_EL2 - Retrieve the ICH_VTR_EL2 value
*/
#define HVC_GET_ICH_VTR_EL2 4
/* Max number of HYP stub hypercalls */
#define HVC_STUB_HCALL_NR 4
#define HVC_STUB_HCALL_NR 5
/* Error returned when an invalid stub number is passed into x0 */
#define HVC_STUB_ERR 0xbadca11

View file

@ -2303,6 +2303,49 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry
}
#endif
static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
int scope)
{
static const struct midr_range has_vgic_v3[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
{},
};
struct arm_smccc_res res = {};
BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF);
BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY);
if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) &&
!is_midr_in_range_list(has_vgic_v3))
return false;
if (!is_hyp_mode_available())
return false;
if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY))
return true;
if (is_kernel_in_hyp_mode())
res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
else
arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
if (res.a0 == HVC_STUB_ERR)
return false;
return res.a1 & ICH_VTR_EL2_TDS;
}
#ifdef CONFIG_ARM64_BTI
static void bti_enable(const struct arm64_cpu_capabilities *__unused)
{
@ -2814,6 +2857,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_gic_prio_relaxed_sync,
},
#endif
{
/*
* Depends on having GICv3
*/
.desc = "ICV_DIR_EL1 trapping",
.capability = ARM64_HAS_ICH_HCR_EL2_TDIR,
.type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
.matches = can_trap_icv_dir_el1,
},
#ifdef CONFIG_ARM64_E0PD
{
.desc = "E0PD",

View file

@ -54,6 +54,11 @@ SYM_CODE_START_LOCAL(elx_sync)
1: cmp x0, #HVC_FINALISE_EL2
b.eq __finalise_el2
cmp x0, #HVC_GET_ICH_VTR_EL2
b.ne 2f
mrs_s x1, SYS_ICH_VTR_EL2
b 9f
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2

View file

@ -91,6 +91,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
KVM_NVHE_ALIAS(alt_cb_patch_nops);
KVM_NVHE_ALIAS(kvm_compute_ich_hcr_trap_bits);
/* Global kernel state accessed by nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_vgic_global_state);

View file

@ -664,8 +664,7 @@ nommu:
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (is_protected_kvm_enabled()) {
kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
kvm_call_hyp(__vgic_v3_save_aprs, &vcpu->arch.vgic_cpu.vgic_v3);
kvm_call_hyp_nvhe(__pkvm_vcpu_put);
}
@ -1047,6 +1046,10 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
*/
kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
/* Process interrupts deactivated through a trap */
if (kvm_check_request(KVM_REQ_VGIC_PROCESS_UPDATE, vcpu))
kvm_vgic_process_async_update(vcpu);
if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
kvm_update_stolen_time(vcpu);

View file

@ -157,6 +157,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
host_cpu_if->vgic_vmcr = hyp_cpu_if->vgic_vmcr;
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
}
@ -464,11 +465,11 @@ static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
__vgic_v3_init_lrs();
}
static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
__vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if));
__vgic_v3_save_aprs(kern_hyp_va(cpu_if));
}
static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
@ -616,7 +617,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_reserve_vm),
HANDLE_FUNC(__pkvm_unreserve_vm),

View file

@ -337,6 +337,9 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
/* CTR_EL0 is always under host control, even for protected VMs. */
hyp_vm->kvm.arch.ctr_el0 = host_kvm->arch.ctr_el0;
/* Preserve the vgic model so that GICv3 emulation works */
hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model;
if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);

View file

@ -444,6 +444,8 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
/* Scalable Vector Registers are restricted. */
HOST_HANDLED(SYS_ICC_PMR_EL1),
RAZ_WI(SYS_ERRIDR_EL1),
RAZ_WI(SYS_ERRSELR_EL1),
RAZ_WI(SYS_ERXFR_EL1),
@ -457,9 +459,12 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
/* Limited Ordering Regions Registers are restricted. */
HOST_HANDLED(SYS_ICC_DIR_EL1),
HOST_HANDLED(SYS_ICC_RPR_EL1),
HOST_HANDLED(SYS_ICC_SGI1R_EL1),
HOST_HANDLED(SYS_ICC_ASGI1R_EL1),
HOST_HANDLED(SYS_ICC_SGI0R_EL1),
HOST_HANDLED(SYS_ICC_CTLR_EL1),
{ SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, },
HOST_HANDLED(SYS_CCSIDR_EL1),

View file

@ -63,6 +63,10 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
return -1;
}
/* Handle deactivation as a normal exit */
if ((fault_ipa - vgic->vgic_cpu_base) >= GIC_CPU_DEACTIVATE)
return 0;
rd = kvm_vcpu_dabt_get_rd(vcpu);
addr = kvm_vgic_global_state.vcpu_hyp_va;
addr += fault_ipa - vgic->vgic_cpu_base;

View file

@ -14,6 +14,8 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include "../../vgic/vgic.h"
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
@ -58,7 +60,7 @@ u64 __gic_v3_get_lr(unsigned int lr)
unreachable();
}
static void __gic_v3_set_lr(u64 val, int lr)
void __gic_v3_set_lr(u64 val, int lr)
{
switch (lr & 0xf) {
case 0:
@ -196,6 +198,11 @@ static u32 __vgic_v3_read_ap1rn(int n)
return val;
}
static u64 compute_ich_hcr(struct vgic_v3_cpu_if *cpu_if)
{
return cpu_if->vgic_hcr | vgic_ich_hcr_trap_bits();
}
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
{
u64 used_lrs = cpu_if->used_lrs;
@ -212,14 +219,12 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
}
}
if (used_lrs || cpu_if->its_vpe.its_vm) {
if (used_lrs) {
int i;
u32 elrsr;
elrsr = read_gicreg(ICH_ELRSR_EL2);
write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EL2_En, ICH_HCR_EL2);
for (i = 0; i < used_lrs; i++) {
if (elrsr & (1 << i))
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
@ -229,6 +234,23 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
__gic_v3_set_lr(0, i);
}
}
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
if (cpu_if->vgic_hcr & ICH_HCR_EL2_LRENPIE) {
u64 val = read_gicreg(ICH_HCR_EL2);
cpu_if->vgic_hcr &= ~ICH_HCR_EL2_EOIcount;
cpu_if->vgic_hcr |= val & ICH_HCR_EL2_EOIcount;
}
write_gicreg(0, ICH_HCR_EL2);
/*
* Hack alert: On NV, this results in a trap so that the above write
* actually takes effect... No synchronisation is necessary, as we
* only care about the effects when this traps.
*/
read_gicreg(ICH_MISR_EL2);
}
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
@ -236,12 +258,10 @@ void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
u64 used_lrs = cpu_if->used_lrs;
int i;
if (used_lrs || cpu_if->its_vpe.its_vm) {
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
write_gicreg(compute_ich_hcr(cpu_if), ICH_HCR_EL2);
for (i = 0; i < used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
}
for (i = 0; i < used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
/*
* Ensure that writes to the LRs, and on non-VHE systems ensure that
@ -307,24 +327,20 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
* If we need to trap system registers, we must write
* ICH_HCR_EL2 anyway, even if no interrupts are being
* injected. Note that this also applies if we don't expect
* any system register access (no vgic at all).
* If we need to trap system registers, we must write ICH_HCR_EL2
* anyway, even if no interrupts are being injected. Note that this
* also applies if we don't expect any system register access (no
* vgic at all). In any case, no need to provide MI configuration.
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
write_gicreg(vgic_ich_hcr_trap_bits() | ICH_HCR_EL2_En, ICH_HCR_EL2);
}
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
if (!cpu_if->vgic_sre) {
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
}
/* Only restore SRE if the host implements the GICv2 interface */
if (static_branch_unlikely(&vgic_v3_has_v2_compat)) {
val = read_gicreg(ICC_SRE_EL2);
@ -346,7 +362,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@ -507,13 +523,6 @@ static void __vgic_v3_write_vmcr(u32 vmcr)
write_gicreg(vmcr, ICH_VMCR_EL2);
}
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
__vgic_v3_save_aprs(cpu_if);
if (cpu_if->vgic_sre)
cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
}
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
__vgic_v3_compat_mode_enable();
@ -790,7 +799,7 @@ static void __vgic_v3_bump_eoicount(void)
write_gicreg(hcr, ICH_HCR_EL2);
}
static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
static int ___vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 vid = vcpu_get_reg(vcpu, rt);
u64 lr_val;
@ -798,19 +807,25 @@ static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
/* EOImode == 0, nothing to be done here */
if (!(vmcr & ICH_VMCR_EOIM_MASK))
return;
return 1;
/* No deactivate to be performed on an LPI */
if (vid >= VGIC_MIN_LPI)
return;
return 1;
lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
if (lr == -1) {
__vgic_v3_bump_eoicount();
return;
if (lr != -1) {
__vgic_v3_clear_active_lr(lr, lr_val);
return 1;
}
__vgic_v3_clear_active_lr(lr, lr_val);
return 0;
}
static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
if (!___vgic_v3_write_dir(vcpu, vmcr, rt))
__vgic_v3_bump_eoicount();
}
static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
@ -1245,6 +1260,21 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
case SYS_ICC_DIR_EL1:
if (unlikely(is_read))
return 0;
/*
* Full exit if required to handle overflow deactivation,
* unless we can emulate it in the LRs (likely the majority
* of the cases).
*/
if (vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr & ICH_HCR_EL2_TDIR) {
int ret;
ret = ___vgic_v3_write_dir(vcpu, __vgic_v3_read_vmcr(),
kvm_vcpu_sys_get_rt(vcpu));
if (ret)
__kvm_skip_instr(vcpu);
return ret;
}
fn = __vgic_v3_write_dir;
break;
case SYS_ICC_RPR_EL1:

View file

@ -666,6 +666,21 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
return true;
}
static bool access_gic_dir(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_has_gicv3(vcpu->kvm))
return undef_access(vcpu, p, r);
if (!p->is_write)
return undef_access(vcpu, p, r);
vgic_v3_deactivate(vcpu, p->regval);
return true;
}
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@ -3370,7 +3385,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
{ SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir },
{ SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
@ -4495,7 +4510,7 @@ static const struct sys_reg_desc cp15_regs[] = {
{ CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir },
{ CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },

View file

@ -188,6 +188,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
int i;
dist->active_spis = (atomic_t)ATOMIC_INIT(0);
dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
if (!dist->spis)
return -ENOMEM;
@ -353,12 +354,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
return ret;
}
static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
static void kvm_vgic_vcpu_reset(struct kvm_vcpu *vcpu)
{
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_enable(vcpu);
vgic_v2_reset(vcpu);
else
vgic_v3_enable(vcpu);
vgic_v3_reset(vcpu);
}
/*
@ -405,7 +406,7 @@ int vgic_init(struct kvm *kvm)
}
kvm_for_each_vcpu(idx, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
kvm_vgic_vcpu_reset(vcpu);
ret = kvm_vgic_setup_default_irq_routing(kvm);
if (ret)

View file

@ -359,6 +359,16 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
vgic_set_vmcr(vcpu, &vmcr);
}
static void vgic_mmio_write_dir(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_deactivate(vcpu, val);
else
vgic_v3_deactivate(vcpu, val);
}
static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
@ -482,6 +492,10 @@ static const struct vgic_register_region vgic_v2_cpu_registers[] = {
REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_CPU_DEACTIVATE,
vgic_mmio_read_raz, vgic_mmio_write_dir,
vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi,
4, VGIC_ACCESS_32bit),
};
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
@ -494,6 +508,16 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
return SZ_4K;
}
unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev)
{
dev->regions = vgic_v2_cpu_registers;
dev->nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
return KVM_VGIC_V2_CPU_SIZE;
}
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
{
const struct vgic_register_region *region;

View file

@ -213,6 +213,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
const u32 val);
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev);
unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);

View file

@ -9,6 +9,7 @@
#include <kvm/arm_vgic.h>
#include <asm/kvm_mmu.h>
#include "vgic-mmio.h"
#include "vgic.h"
static inline void vgic_v2_write_lr(int lr, u32 val)
@ -26,11 +27,24 @@ void vgic_v2_init_lrs(void)
vgic_v2_write_lr(i, 0);
}
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu,
struct ap_list_summary *als)
{
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
cpuif->vgic_hcr |= GICH_HCR_UIE;
cpuif->vgic_hcr = GICH_HCR_EN;
if (irqs_pending_outside_lrs(als))
cpuif->vgic_hcr |= GICH_HCR_NPIE;
if (irqs_active_outside_lrs(als))
cpuif->vgic_hcr |= GICH_HCR_LRENPIE;
if (irqs_outside_lrs(als))
cpuif->vgic_hcr |= GICH_HCR_UIE;
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP0_MASK) ?
GICH_HCR_VGrp0DIE : GICH_HCR_VGrp0EIE;
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP1_MASK) ?
GICH_HCR_VGrp1DIE : GICH_HCR_VGrp1EIE;
}
static bool lr_signals_eoi_mi(u32 lr_val)
@ -39,43 +53,23 @@ static bool lr_signals_eoi_mi(u32 lr_val)
!(lr_val & GICH_LR_HW);
}
/*
* transfer the content of the LRs back into the corresponding ap_list:
* - active bit is transferred as is
* - pending bit is
* - transferred as is in case of edge sensitive IRQs
* - set to the line-level (resample time) for level sensitive IRQs
*/
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
static void vgic_v2_fold_lr(struct kvm_vcpu *vcpu, u32 val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
int lr;
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
bool deactivated;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
/* Extract the source vCPU id from the LR */
cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val) & 7;
cpuif->vgic_hcr &= ~GICH_HCR_UIE;
/* Notify fds when the guest EOI'ed a level-triggered SPI */
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
kvm_notify_acked_irq(vcpu->kvm, 0,
intid - VGIC_NR_PRIVATE_IRQS);
for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) {
u32 val = cpuif->vgic_lr[lr];
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
bool deactivated;
/* Extract the source vCPU id from the LR */
cpuid = val & GICH_LR_PHYSID_CPUID;
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
cpuid &= 7;
/* Notify fds when the guest EOI'ed a level-triggered SPI */
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
kvm_notify_acked_irq(vcpu->kvm, 0,
intid - VGIC_NR_PRIVATE_IRQS);
irq = vgic_get_vcpu_irq(vcpu, intid);
raw_spin_lock(&irq->irq_lock);
irq = vgic_get_vcpu_irq(vcpu, intid);
scoped_guard(raw_spinlock, &irq->irq_lock) {
/* Always preserve the active bit, note deactivation */
deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
@ -101,29 +95,139 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
/* Handle resampling for mapped interrupts if required */
vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
irq->on_lr = false;
}
vgic_put_irq(vcpu->kvm, irq);
}
static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq);
/*
* transfer the content of the LRs back into the corresponding ap_list:
* - active bit is transferred as is
* - pending bit is
* - transferred as is in case of edge sensitive IRQs
* - set to the line-level (resample time) for level sensitive IRQs
*/
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
u32 eoicount = FIELD_GET(GICH_HCR_EOICOUNT, cpuif->vgic_hcr);
struct vgic_irq *irq;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
for (int lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++)
vgic_v2_fold_lr(vcpu, cpuif->vgic_lr[lr]);
/* See the GICv3 equivalent for the EOIcount handling rationale */
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
u32 lr;
if (!eoicount) {
break;
} else {
guard(raw_spinlock)(&irq->irq_lock);
if (!(likely(vgic_target_oracle(irq) == vcpu) &&
irq->active))
continue;
lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT;
}
if (lr & GICH_LR_HW)
writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr),
kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE);
vgic_v2_fold_lr(vcpu, lr);
eoicount--;
}
cpuif->used_lrs = 0;
}
/*
* Populates the particular LR with the state of a given IRQ:
* - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
* - for a level sensitive IRQ the pending state value is unchanged;
* it is dictated directly by the input level
*
* If @irq describes an SGI with multiple sources, we choose the
* lowest-numbered source VCPU and clear that bit in the source bitmap.
*
* The irq_lock must be held by the caller.
*/
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
struct kvm_vcpu *target_vcpu = NULL;
bool mmio = false;
struct vgic_irq *irq;
unsigned long flags;
u64 lr = 0;
u8 cpuid;
/* Snapshot CPUID, and remove it from the INTID */
cpuid = FIELD_GET(GENMASK_ULL(12, 10), val);
val &= ~GENMASK_ULL(12, 10);
/* We only deal with DIR when EOIMode==1 */
if (!(cpuif->vgic_vmcr & GICH_VMCR_EOI_MODE_MASK))
return;
/* Make sure we're in the same context as LR handling */
local_irq_save(flags);
irq = vgic_get_vcpu_irq(vcpu, val);
if (WARN_ON_ONCE(!irq))
goto out;
/* See the corresponding v3 code for the rationale */
scoped_guard(raw_spinlock, &irq->irq_lock) {
target_vcpu = irq->vcpu;
/* Not on any ap_list? */
if (!target_vcpu)
goto put;
/*
* Urgh. We're deactivating something that we cannot
* observe yet... Big hammer time.
*/
if (irq->on_lr) {
mmio = true;
goto put;
}
/* SGI: check that the cpuid matches */
if (val < VGIC_NR_SGIS && irq->active_source != cpuid) {
target_vcpu = NULL;
goto put;
}
/* (with a Dalek voice) DEACTIVATE!!!! */
lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT;
}
if (lr & GICH_LR_HW)
writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr),
kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE);
vgic_v2_fold_lr(vcpu, lr);
put:
vgic_put_irq(vcpu->kvm, irq);
out:
local_irq_restore(flags);
if (mmio)
vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32));
/* Force the ap_list to be pruned */
if (target_vcpu)
kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu);
}
static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
u32 val = irq->intid;
bool allow_pending = true;
WARN_ON(irq->on_lr);
if (irq->active) {
val |= GICH_LR_ACTIVE_BIT;
if (vgic_irq_is_sgi(irq->intid))
@ -163,22 +267,52 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (allow_pending && irq_is_pending(irq)) {
val |= GICH_LR_PENDING_BIT;
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
irq->intid))
return 0;
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
if (irq->source & ~BIT(src - 1))
val |= GICH_LR_EOI;
}
}
/* The GICv2 LR only holds five bits of priority. */
val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
return val;
}
/*
* Populates the particular LR with the state of a given IRQ:
* - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
* - for a level sensitive IRQ the pending state value is unchanged;
* it is dictated directly by the input level
*
* If @irq describes an SGI with multiple sources, we choose the
* lowest-numbered source VCPU and clear that bit in the source bitmap.
*
* The irq_lock must be held by the caller.
*/
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 val = vgic_v2_compute_lr(vcpu, irq);
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
if (val & GICH_LR_PENDING_BIT) {
if (irq->config == VGIC_CONFIG_EDGE)
irq->pending_latch = false;
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
irq->intid))
return;
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source) {
irq->source &= ~BIT(src - 1);
if (irq->source)
irq->pending_latch = true;
val |= GICH_LR_EOI;
}
}
}
@ -194,7 +328,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
/* The GICv2 LR only holds five bits of priority. */
val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
irq->on_lr = true;
}
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
@ -257,7 +391,7 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
}
void vgic_v2_enable(struct kvm_vcpu *vcpu)
void vgic_v2_reset(struct kvm_vcpu *vcpu)
{
/*
* By forcing VMCR to zero, the GIC will restore the binary
@ -265,9 +399,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu)
* anyway.
*/
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
/* Get the show on the road... */
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
}
/* check for overlapping regions and for regions crossing the end of memory */
@ -289,6 +420,7 @@ static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base)
int vgic_v2_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned int len;
int ret = 0;
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
@ -312,10 +444,20 @@ int vgic_v2_map_resources(struct kvm *kvm)
return ret;
}
len = vgic_v2_init_cpuif_iodev(&dist->cpuif_iodev);
dist->cpuif_iodev.base_addr = dist->vgic_cpu_base;
dist->cpuif_iodev.iodev_type = IODEV_CPUIF;
dist->cpuif_iodev.redist_vcpu = NULL;
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist->vgic_cpu_base,
len, &dist->cpuif_iodev.dev);
if (ret)
return ret;
if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
kvm_vgic_global_state.vcpu_base,
KVM_VGIC_V2_CPU_SIZE, true);
KVM_VGIC_V2_CPU_SIZE - SZ_4K, true);
if (ret) {
kvm_err("Unable to remap VGIC CPU to VCPU\n");
return ret;
@ -385,6 +527,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
kvm_vgic_global_state.can_emulate_gicv2 = true;
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
kvm_vgic_global_state.gicc_base = info->gicc_base;
kvm_vgic_global_state.type = VGIC_V2;
kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
@ -423,16 +566,26 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
void vgic_v2_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
void __iomem *base = kvm_vgic_global_state.vctrl_base;
u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
if (!base)
return;
if (used_lrs) {
cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
if (used_lrs)
save_lrs(vcpu, base);
writel_relaxed(0, base + GICH_HCR);
if (cpu_if->vgic_hcr & GICH_HCR_LRENPIE) {
u32 val = readl_relaxed(base + GICH_HCR);
cpu_if->vgic_hcr &= ~GICH_HCR_EOICOUNT;
cpu_if->vgic_hcr |= val & GICH_HCR_EOICOUNT;
}
writel_relaxed(0, base + GICH_HCR);
}
void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
@ -445,13 +598,10 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
if (!base)
return;
if (used_lrs) {
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
for (i = 0; i < used_lrs; i++) {
writel_relaxed(cpu_if->vgic_lr[i],
base + GICH_LR0 + (i * 4));
}
}
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
for (i = 0; i < used_lrs; i++)
writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
}
void vgic_v2_load(struct kvm_vcpu *vcpu)
@ -468,6 +618,5 @@ void vgic_v2_put(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
}

View file

@ -70,13 +70,14 @@ static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
* - on L2 put: perform the inverse transformation, so that the result of L2
* running becomes visible to L1 in the VNCR-accessible registers.
*
* - there is nothing to do on L2 entry, as everything will have happened
* on load. However, this is the point where we detect that an interrupt
* targeting L1 and prepare the grand switcheroo.
* - there is nothing to do on L2 entry apart from enabling the vgic, as
* everything will have happened on load. However, this is the point where
* we detect that an interrupt targeting L1 and prepare the grand
* switcheroo.
*
* - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
* interrupt. The L0 active state will be cleared by the HW if the L1
* interrupt was itself backed by a HW interrupt.
* - on L2 exit: resync the LRs and VMCR, emulate the HW bit, and deactivate
* corresponding the L1 interrupt. The L0 active state will be cleared by
* the HW if the L1 interrupt was itself backed by a HW interrupt.
*
* Maintenance Interrupt (MI) management:
*
@ -93,8 +94,10 @@ static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
*
* - because most of the ICH_*_EL2 registers live in the VNCR page, the
* quality of emulation is poor: L1 can setup the vgic so that an MI would
* immediately fire, and not observe anything until the next exit. Trying
* to read ICH_MISR_EL2 would do the trick, for example.
* immediately fire, and not observe anything until the next exit.
* Similarly, a pending MI is not immediately disabled by clearing
* ICH_HCR_EL2.En. Trying to read ICH_MISR_EL2 would do the trick, for
* example.
*
* System register emulation:
*
@ -265,16 +268,37 @@ static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
}
void vgic_v3_flush_nested(struct kvm_vcpu *vcpu)
{
u64 val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
write_sysreg_s(val | vgic_ich_hcr_trap_bits(), SYS_ICH_HCR_EL2);
}
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
{
struct shadow_if *shadow_if = get_shadow_if();
int i;
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
struct vgic_irq *irq;
u64 val, host_lr, lr;
if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
host_lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
/* Propagate the new LR state */
lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
val = lr & ~ICH_LR_STATE;
val |= host_lr & ICH_LR_STATE;
__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
/*
* Deactivation of a HW interrupt: the LR must have the HW
* bit set, have been in a non-invalid state before the run,
* and now be in an invalid state. If any of that doesn't
* hold, we're done with this LR.
*/
if (!((lr & ICH_LR_HW) && (lr & ICH_LR_STATE) &&
!(host_lr & ICH_LR_STATE)))
continue;
/*
@ -282,35 +306,27 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
* need to emulate the HW effect between the guest hypervisor
* and the nested guest.
*/
irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
continue;
lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
if (!(lr & ICH_LR_STATE))
irq->active = false;
vgic_put_irq(vcpu->kvm, irq);
vgic_v3_deactivate(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
}
/* We need these to be synchronised to generate the MI */
__vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, read_sysreg_s(SYS_ICH_VMCR_EL2));
__vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, &=, ~ICH_HCR_EL2_EOIcount);
__vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, |=, read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_EOIcount);
write_sysreg_s(0, SYS_ICH_HCR_EL2);
isb();
vgic_v3_nested_update_mi(vcpu);
}
static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
struct vgic_v3_cpu_if *s_cpu_if)
{
struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val = 0;
int i;
/*
* If we're on a system with a broken vgic that requires
* trapping, propagate the trapping requirements.
*
* Ah, the smell of rotten fruits...
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap))
val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR);
s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val;
s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
s_cpu_if->vgic_sre = host_if->vgic_sre;
@ -334,7 +350,8 @@ void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
__vgic_v3_restore_vmcr_aprs(cpu_if);
__vgic_v3_activate_traps(cpu_if);
__vgic_v3_restore_state(cpu_if);
for (int i = 0; i < cpu_if->used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
/*
* Propagate the number of used LRs for the benefit of the HYP
@ -347,36 +364,19 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
{
struct shadow_if *shadow_if = get_shadow_if();
struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
u64 val;
int i;
__vgic_v3_save_vmcr_aprs(s_cpu_if);
__vgic_v3_deactivate_traps(s_cpu_if);
__vgic_v3_save_state(s_cpu_if);
/*
* Translate the shadow state HW fields back to the virtual ones
* before copying the shadow struct back to the nested one.
*/
val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
val &= ~ICH_HCR_EL2_EOIcount_MASK;
val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
__vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val);
__vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr);
__vgic_v3_save_aprs(s_cpu_if);
for (i = 0; i < 4; i++) {
__vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
__vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
}
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
for (i = 0; i < s_cpu_if->used_lrs; i++)
__gic_v3_set_lr(0, i);
val &= ~ICH_LR_STATE;
val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
}
__vgic_v3_deactivate_traps(s_cpu_if);
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
}

View file

@ -12,6 +12,7 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_asm.h>
#include "vgic-mmio.h"
#include "vgic.h"
static bool group0_trap;
@ -20,11 +21,48 @@ static bool common_trap;
static bool dir_trap;
static bool gicv4_enable;
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu,
struct ap_list_summary *als)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
if (!irqchip_in_kernel(vcpu->kvm))
return;
cpuif->vgic_hcr = ICH_HCR_EL2_En;
if (irqs_pending_outside_lrs(als))
cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE;
if (irqs_active_outside_lrs(als))
cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE;
if (irqs_outside_lrs(als))
cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
if (!als->nr_sgi)
cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount;
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ?
ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE;
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ?
ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE;
/*
* Dealing with EOImode=1 is a massive source of headache. Not
* only do we need to track that we have active interrupts
* outside of the LRs and force DIR to be trapped, we also
* need to deal with SPIs that can be deactivated on another
* CPU.
*
* On systems that do not implement TDIR, force the bit in the
* shadow state anyway to avoid IPI-ing on these poor sods.
*
* Note that we set the trap irrespective of EOIMode, as that
* can change behind our back without any warning...
*/
if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) ||
irqs_active_outside_lrs(als) ||
atomic_read(&vcpu->kvm->arch.vgic.active_spis))
cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR;
}
static bool lr_signals_eoi_mi(u64 lr_val)
@ -33,84 +71,238 @@ static bool lr_signals_eoi_mi(u64 lr_val)
!(lr_val & ICH_LR_HW);
}
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 model = vcpu->kvm->arch.vgic.vgic_model;
int lr;
struct vgic_irq *irq;
bool is_v2_sgi = false;
bool deactivated;
u32 intid;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
intid = val & ICH_LR_VIRTUAL_ID_MASK;
} else {
intid = val & GICH_LR_VIRTUALID;
is_v2_sgi = vgic_irq_is_sgi(intid);
}
cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE;
irq = vgic_get_vcpu_irq(vcpu, intid);
if (!irq) /* An LPI could have been unmapped. */
return;
for (lr = 0; lr < cpuif->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr];
u32 intid, cpuid;
struct vgic_irq *irq;
bool is_v2_sgi = false;
bool deactivated;
cpuid = val & GICH_LR_PHYSID_CPUID;
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
if (model == KVM_DEV_TYPE_ARM_VGIC_V3) {
intid = val & ICH_LR_VIRTUAL_ID_MASK;
} else {
intid = val & GICH_LR_VIRTUALID;
is_v2_sgi = vgic_irq_is_sgi(intid);
}
/* Notify fds when the guest EOI'ed a level-triggered IRQ */
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
kvm_notify_acked_irq(vcpu->kvm, 0,
intid - VGIC_NR_PRIVATE_IRQS);
irq = vgic_get_vcpu_irq(vcpu, intid);
if (!irq) /* An LPI could have been unmapped. */
continue;
raw_spin_lock(&irq->irq_lock);
/* Always preserve the active bit, note deactivation */
scoped_guard(raw_spinlock, &irq->irq_lock) {
/* Always preserve the active bit for !LPIs, note deactivation */
if (irq->intid >= VGIC_MIN_LPI)
val &= ~ICH_LR_ACTIVE_BIT;
deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
if (irq->active && is_v2_sgi)
irq->active_source = cpuid;
/* Edge is the only case where we preserve the pending bit */
if (irq->config == VGIC_CONFIG_EDGE &&
(val & ICH_LR_PENDING_BIT)) {
(val & ICH_LR_PENDING_BIT))
irq->pending_latch = true;
if (is_v2_sgi)
irq->source |= (1 << cpuid);
}
/*
* Clear soft pending state when level irqs have been acked.
*/
if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
irq->pending_latch = false;
if (is_v2_sgi) {
u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val);
if (irq->active)
irq->active_source = cpuid;
if (val & ICH_LR_PENDING_BIT)
irq->source |= BIT(cpuid);
}
/* Handle resampling for mapped interrupts if required */
vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
irq->on_lr = false;
}
/* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */
if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) {
kvm_notify_acked_irq(vcpu->kvm, 0,
intid - VGIC_NR_PRIVATE_IRQS);
atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis);
}
vgic_put_irq(vcpu->kvm, irq);
}
static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq);
static void vgic_v3_deactivate_phys(u32 intid)
{
if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI);
else
gic_write_dir(intid);
}
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr);
struct vgic_irq *irq;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
for (int lr = 0; lr < cpuif->used_lrs; lr++)
vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]);
/*
* EOIMode=0: use EOIcount to emulate deactivation. We are
* guaranteed to deactivate in reverse order of the activation, so
* just pick one active interrupt after the other in the ap_list,
* and replay the deactivation as if the CPU was doing it. We also
* rely on priority drop to have taken place, and the list to be
* sorted by priority.
*/
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
u64 lr;
/*
* I would have loved to write this using a scoped_guard(),
* but using 'continue' here is a total train wreck.
*/
if (!eoicount) {
break;
} else {
guard(raw_spinlock)(&irq->irq_lock);
if (!(likely(vgic_target_oracle(irq) == vcpu) &&
irq->active))
continue;
lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
}
if (lr & ICH_LR_HW)
vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
vgic_v3_fold_lr(vcpu, lr);
eoicount--;
}
cpuif->used_lrs = 0;
}
void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 model = vcpu->kvm->arch.vgic.vgic_model;
struct kvm_vcpu *target_vcpu = NULL;
bool mmio = false, is_v2_sgi;
struct vgic_irq *irq;
unsigned long flags;
u64 lr = 0;
u8 cpuid;
/* Snapshot CPUID, and remove it from the INTID */
cpuid = FIELD_GET(GENMASK_ULL(12, 10), val);
val &= ~GENMASK_ULL(12, 10);
is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
val < VGIC_NR_SGIS);
/*
* We only deal with DIR when EOIMode==1, and only for SGI,
* PPI or SPI.
*/
if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) ||
val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)
return;
/* Make sure we're in the same context as LR handling */
local_irq_save(flags);
irq = vgic_get_vcpu_irq(vcpu, val);
if (WARN_ON_ONCE(!irq))
goto out;
/*
* EOIMode=1: we must rely on traps to handle deactivate of
* overflowing interrupts, as there is no ordering guarantee and
* EOIcount isn't being incremented. Priority drop will have taken
* place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs.
*
* Three possibities:
*
* - The irq is not queued on any CPU, and there is nothing to
* do,
*
* - Or the irq is in an LR, meaning that its state is not
* directly observable. Treat it bluntly by making it as if
* this was a write to GICD_ICACTIVER, which will force an
* exit on all vcpus. If it hurts, don't do that.
*
* - Or the irq is active, but not in an LR, and we can
* directly deactivate it by building a pseudo-LR, fold it,
* and queue a request to prune the resulting ap_list,
*
* Special care must be taken to match the source CPUID when
* deactivating a GICv2 SGI.
*/
scoped_guard(raw_spinlock, &irq->irq_lock) {
target_vcpu = irq->vcpu;
/* Not on any ap_list? */
if (!target_vcpu)
goto put;
/*
* Urgh. We're deactivating something that we cannot
* observe yet... Big hammer time.
*/
if (irq->on_lr) {
mmio = true;
goto put;
}
/* GICv2 SGI: check that the cpuid matches */
if (is_v2_sgi && irq->active_source != cpuid) {
target_vcpu = NULL;
goto put;
}
/* (with a Dalek voice) DEACTIVATE!!!! */
lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
}
if (lr & ICH_LR_HW)
vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
vgic_v3_fold_lr(vcpu, lr);
put:
vgic_put_irq(vcpu->kvm, irq);
out:
local_irq_restore(flags);
if (mmio)
vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32));
/* Force the ap_list to be pruned */
if (target_vcpu)
kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu);
}
/* Requires the irq to be locked already */
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u64 val = irq->intid;
bool allow_pending = true, is_v2_sgi;
WARN_ON(irq->on_lr);
is_v2_sgi = (vgic_irq_is_sgi(irq->intid) &&
model == KVM_DEV_TYPE_ARM_VGIC_V2);
@ -150,6 +342,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (allow_pending && irq_is_pending(irq)) {
val |= ICH_LR_PENDING_BIT;
if (is_v2_sgi) {
u32 src = ffs(irq->source);
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
irq->intid))
return 0;
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
if (irq->source & ~BIT(src - 1))
val |= ICH_LR_EOI;
}
}
if (irq->group)
val |= ICH_LR_GROUP;
val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
return val;
}
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u64 val = vgic_v3_compute_lr(vcpu, irq);
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
if (val & ICH_LR_PENDING_BIT) {
if (irq->config == VGIC_CONFIG_EDGE)
irq->pending_latch = false;
@ -157,16 +378,9 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
u32 src = ffs(irq->source);
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
irq->intid))
return;
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source) {
irq->source &= ~BIT(src - 1);
if (irq->source)
irq->pending_latch = true;
val |= ICH_LR_EOI;
}
}
}
@ -179,12 +393,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
irq->line_level = false;
if (irq->group)
val |= ICH_LR_GROUP;
val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
irq->on_lr = true;
}
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
@ -258,7 +467,7 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \
GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable))
void vgic_v3_enable(struct kvm_vcpu *vcpu)
void vgic_v3_reset(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
@ -288,9 +497,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
kvm_vgic_global_state.ich_vtr_el2);
vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits,
kvm_vgic_global_state.ich_vtr_el2) + 1;
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EL2_En;
}
void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
@ -301,20 +507,9 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
return;
/* Hide GICv3 sysreg if necessary */
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
ICH_HCR_EL2_TC);
return;
}
if (group0_trap)
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0;
if (group1_trap)
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1;
if (common_trap)
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC;
if (dir_trap)
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR;
}
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@ -635,8 +830,53 @@ static const struct midr_range broken_seis[] = {
static bool vgic_v3_broken_seis(void)
{
return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) &&
is_midr_in_range_list(broken_seis));
return (is_kernel_in_hyp_mode() &&
is_midr_in_range_list(broken_seis) &&
(read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS));
}
void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr,
int nr_inst)
{
u32 insn, oinsn, rd;
u64 hcr = 0;
if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true;
group1_trap = true;
}
if (vgic_v3_broken_seis()) {
/* We know that these machines have ICH_HCR_EL2.TDIR */
group0_trap = true;
group1_trap = true;
dir_trap = true;
}
if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR))
common_trap = true;
if (group0_trap)
hcr |= ICH_HCR_EL2_TALL0;
if (group1_trap)
hcr |= ICH_HCR_EL2_TALL1;
if (common_trap)
hcr |= ICH_HCR_EL2_TC;
if (dir_trap)
hcr |= ICH_HCR_EL2_TDIR;
/* Compute target register */
oinsn = le32_to_cpu(*origptr);
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
/* movz rd, #(val & 0xffff) */
insn = aarch64_insn_gen_movewide(rd,
(u16)hcr,
0,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_MOVEWIDE_ZERO);
*updptr = cpu_to_le32(insn);
}
/**
@ -650,6 +890,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
{
u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
bool has_v2;
u64 traps;
int ret;
has_v2 = ich_vtr_el2 >> 63;
@ -708,29 +949,18 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (has_v2)
static_branch_enable(&vgic_v3_has_v2_compat);
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true;
group1_trap = true;
}
if (vgic_v3_broken_seis()) {
kvm_info("GICv3 with broken locally generated SEI\n");
kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS;
group0_trap = true;
group1_trap = true;
if (ich_vtr_el2 & ICH_VTR_EL2_TDS)
dir_trap = true;
else
common_trap = true;
}
if (group0_trap || group1_trap || common_trap | dir_trap) {
traps = vgic_ich_hcr_trap_bits();
if (traps) {
kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n",
group0_trap ? "G0" : "",
group1_trap ? "G1" : "",
common_trap ? "C" : "",
dir_trap ? "D" : "");
(traps & ICH_HCR_EL2_TALL0) ? "G0" : "",
(traps & ICH_HCR_EL2_TALL1) ? "G1" : "",
(traps & ICH_HCR_EL2_TC) ? "C" : "",
(traps & ICH_HCR_EL2_TDIR) ? "D" : "");
static_branch_enable(&vgic_v3_cpuif_trap);
}
@ -770,7 +1000,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
}
if (likely(!is_protected_kvm_enabled()))
kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
if (has_vhe())

View file

@ -163,6 +163,7 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);
struct irq_desc *desc;
unsigned long flags;
bool pending;
int ret;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
@ -173,9 +174,11 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
irq->hw = false;
ret = irq_get_irqchip_state(irq->host_irq,
IRQCHIP_STATE_PENDING,
&irq->pending_latch);
&pending);
WARN_ON(ret);
irq->pending_latch = pending;
desc = irq_to_desc(irq->host_irq);
irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
unlock:

View file

@ -237,7 +237,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
*
* Requires the IRQ lock to be held.
*/
static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
{
lockdep_assert_held(&irq->irq_lock);
@ -265,17 +265,20 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
return NULL;
}
struct vgic_sort_info {
struct kvm_vcpu *vcpu;
struct vgic_vmcr vmcr;
};
/*
* The order of items in the ap_lists defines how we'll pack things in LRs as
* well, the first items in the list being the first things populated in the
* LRs.
*
* A hard rule is that active interrupts can never be pushed out of the LRs
* (and therefore take priority) since we cannot reliably trap on deactivation
* of IRQs and therefore they have to be present in the LRs.
*
* Pending, non-active interrupts must be placed at the head of the list.
* Otherwise things should be sorted by the priority field and the GIC
* hardware support will take care of preemption of priority groups etc.
* Interrupts that are not deliverable should be at the end of the list.
*
* Return negative if "a" sorts before "b", 0 to preserve order, and positive
* to sort "b" before "a".
@ -285,6 +288,8 @@ static int vgic_irq_cmp(void *priv, const struct list_head *a,
{
struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
struct vgic_sort_info *info = priv;
struct kvm_vcpu *vcpu = info->vcpu;
bool penda, pendb;
int ret;
@ -298,21 +303,32 @@ static int vgic_irq_cmp(void *priv, const struct list_head *a,
raw_spin_lock(&irqa->irq_lock);
raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
if (irqa->active || irqb->active) {
ret = (int)irqb->active - (int)irqa->active;
/* Undeliverable interrupts should be last */
ret = (int)(vgic_target_oracle(irqb) == vcpu) - (int)(vgic_target_oracle(irqa) == vcpu);
if (ret)
goto out;
}
penda = irqa->enabled && irq_is_pending(irqa);
pendb = irqb->enabled && irq_is_pending(irqb);
if (!penda || !pendb) {
ret = (int)pendb - (int)penda;
/* Same thing for interrupts targeting a disabled group */
ret = (int)(irqb->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
ret -= (int)(irqa->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
if (ret)
goto out;
}
/* Both pending and enabled, sort by priority */
ret = irqa->priority - irqb->priority;
penda = irqa->enabled && irq_is_pending(irqa) && !irqa->active;
pendb = irqb->enabled && irq_is_pending(irqb) && !irqb->active;
ret = (int)pendb - (int)penda;
if (ret)
goto out;
/* Both pending and enabled, sort by priority (lower number first) */
ret = (int)irqa->priority - (int)irqb->priority;
if (ret)
goto out;
/* Finally, HW bit active interrupts have priority over non-HW ones */
ret = (int)irqb->hw - (int)irqa->hw;
out:
raw_spin_unlock(&irqb->irq_lock);
raw_spin_unlock(&irqa->irq_lock);
@ -323,10 +339,12 @@ out:
static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_sort_info info = { .vcpu = vcpu, };
lockdep_assert_held(&vgic_cpu->ap_list_lock);
list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
vgic_get_vmcr(vcpu, &info.vmcr);
list_sort(&info, &vgic_cpu->ap_list_head, vgic_irq_cmp);
}
/*
@ -349,6 +367,20 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne
return false;
}
static bool vgic_model_needs_bcst_kick(struct kvm *kvm)
{
/*
* A GICv3 (or GICv3-like) system exposing a GICv3 to the guest
* needs a broadcast kick to set TDIR globally.
*
* For systems that do not have TDIR (ARM's own v8.0 CPUs), the
* shadow TDIR bit is always set, and so is the register's TC bit,
* so no need to kick the CPUs.
*/
return (cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) &&
kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3);
}
/*
* Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
* Do the queuing if necessary, taking the right locks in the right order.
@ -361,6 +393,7 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
unsigned long flags) __releases(&irq->irq_lock)
{
struct kvm_vcpu *vcpu;
bool bcast;
lockdep_assert_held(&irq->irq_lock);
@ -435,11 +468,20 @@ retry:
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
irq->vcpu = vcpu;
/* A new SPI may result in deactivation trapping on all vcpus */
bcast = (vgic_model_needs_bcst_kick(vcpu->kvm) &&
vgic_valid_spi(vcpu->kvm, irq->intid) &&
atomic_fetch_inc(&vcpu->kvm->arch.vgic.active_spis) == 0);
raw_spin_unlock(&irq->irq_lock);
raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
if (!bcast) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
} else {
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_IRQ_PENDING);
}
return true;
}
@ -791,98 +833,148 @@ static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
vgic_v3_clear_lr(vcpu, lr);
}
static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
{
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_set_underflow(vcpu);
else
vgic_v3_set_underflow(vcpu);
}
/* Requires the ap_list_lock to be held. */
static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
bool *multi_sgi)
static void summarize_ap_list(struct kvm_vcpu *vcpu,
struct ap_list_summary *als)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq;
int count = 0;
*multi_sgi = false;
lockdep_assert_held(&vgic_cpu->ap_list_lock);
*als = (typeof(*als)){};
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
int w;
guard(raw_spinlock)(&irq->irq_lock);
raw_spin_lock(&irq->irq_lock);
/* GICv2 SGIs can count for more than one... */
w = vgic_irq_get_lr_count(irq);
raw_spin_unlock(&irq->irq_lock);
if (unlikely(vgic_target_oracle(irq) != vcpu))
continue;
count += w;
*multi_sgi |= (w > 1);
if (!irq->active)
als->nr_pend++;
else
als->nr_act++;
if (irq->intid < VGIC_NR_SGIS)
als->nr_sgi++;
}
return count;
}
/* Requires the VCPU's ap_list_lock to be held. */
/*
* Dealing with LR overflow is close to black magic -- dress accordingly.
*
* We have to present an almost infinite number of interrupts through a very
* limited number of registers. Therefore crucial decisions must be made to
* ensure we feed the most relevant interrupts into the LRs, and yet have
* some facilities to let the guest interact with those that are not there.
*
* All considerations below are in the context of interrupts targeting a
* single vcpu with non-idle state (either pending, active, or both),
* colloquially called the ap_list:
*
* - Pending interrupts must have priority over active interrupts. This also
* excludes pending+active interrupts. This ensures that a guest can
* perform priority drops on any number of interrupts, and yet be
* presented the next pending one.
*
* - Deactivation of interrupts outside of the LRs must be tracked by using
* either the EOIcount-driven maintenance interrupt, and sometimes by
* trapping the DIR register.
*
* - For EOImode=0, a non-zero EOIcount means walking the ap_list past the
* point that made it into the LRs, and deactivate interrupts that would
* have made it onto the LRs if we had the space.
*
* - The MI-generation bits must be used to try and force an exit when the
* guest has done enough changes to the LRs that we want to reevaluate the
* situation:
*
* - if the total number of pending interrupts exceeds the number of
* LR, NPIE must be set in order to exit once no pending interrupts
* are present in the LRs, allowing us to populate the next batch.
*
* - if there are active interrupts outside of the LRs, then LRENPIE
* must be set so that we exit on deactivation of one of these, and
* work out which one is to be deactivated. Note that this is not
* enough to deal with EOImode=1, see below.
*
* - if the overall number of interrupts exceeds the number of LRs,
* then UIE must be set to allow refilling of the LRs once the
* majority of them has been processed.
*
* - as usual, MI triggers are only an optimisation, since we cannot
* rely on the MI being delivered in timely manner...
*
* - EOImode=1 creates some additional problems:
*
* - deactivation can happen in any order, and we cannot rely on
* EOImode=0's coupling of priority-drop and deactivation which
* imposes strict reverse Ack order. This means that DIR must
* trap if we have active interrupts outside of the LRs.
*
* - deactivation of SPIs can occur on any CPU, while the SPI is only
* present in the ap_list of the CPU that actually ack-ed it. In that
* case, EOIcount doesn't provide enough information, and we must
* resort to trapping DIR even if we don't overflow the LRs. Bonus
* point for not trapping DIR when no SPIs are pending or active in
* the whole VM.
*
* - LPIs do not suffer the same problem as SPIs on deactivation, as we
* have to essentially discard the active state, see below.
*
* - Virtual LPIs have an active state (surprise!), which gets removed on
* priority drop (EOI). However, EOIcount doesn't get bumped when the LPI
* is not present in the LR (surprise again!). Special care must therefore
* be taken to remove the active state from any activated LPI when exiting
* from the guest. This is in a way no different from what happens on the
* physical side. We still rely on the running priority to have been
* removed from the APRs, irrespective of the LPI being present in the LRs
* or not.
*
* - Virtual SGIs directly injected via GICv4.1 must not affect EOIcount, as
* they are not managed in SW and don't have a true active state. So only
* set vSGIEOICount when no SGIs are in the ap_list.
*
* - GICv2 SGIs with multiple sources are injected one source at a time, as
* if they were made pending sequentially. This may mean that we don't
* always present the HPPI if other interrupts with lower priority are
* pending in the LRs. Big deal.
*/
static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct ap_list_summary als;
struct vgic_irq *irq;
int count;
bool multi_sgi;
u8 prio = 0xff;
int i = 0;
int count = 0;
lockdep_assert_held(&vgic_cpu->ap_list_lock);
count = compute_ap_list_depth(vcpu, &multi_sgi);
if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
summarize_ap_list(vcpu, &als);
if (irqs_outside_lrs(&als))
vgic_sort_ap_list(vcpu);
count = 0;
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
raw_spin_lock(&irq->irq_lock);
scoped_guard(raw_spinlock, &irq->irq_lock) {
if (likely(vgic_target_oracle(irq) == vcpu)) {
vgic_populate_lr(vcpu, irq, count++);
}
}
/*
* If we have multi-SGIs in the pipeline, we need to
* guarantee that they are all seen before any IRQ of
* lower priority. In that case, we need to filter out
* these interrupts by exiting early. This is easy as
* the AP list has been sorted already.
*/
if (multi_sgi && irq->priority > prio) {
raw_spin_unlock(&irq->irq_lock);
if (count == kvm_vgic_global_state.nr_lr)
break;
}
if (likely(vgic_target_oracle(irq) == vcpu)) {
vgic_populate_lr(vcpu, irq, count++);
if (irq->source)
prio = irq->priority;
}
raw_spin_unlock(&irq->irq_lock);
if (count == kvm_vgic_global_state.nr_lr) {
if (!list_is_last(&irq->ap_list,
&vgic_cpu->ap_list_head))
vgic_set_underflow(vcpu);
break;
}
}
/* Nuke remaining LRs */
for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
for (int i = count ; i < kvm_vgic_global_state.nr_lr; i++)
vgic_clear_lr(vcpu, i);
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
else
vgic_v2_configure_hcr(vcpu, &als);
} else {
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
vgic_v3_configure_hcr(vcpu, &als);
}
}
static inline bool can_access_vgic_from_kernel(void)
@ -906,8 +998,6 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
int used_lrs;
/* If nesting, emulate the HW effect from L0 to L1 */
if (vgic_state_is_nested(vcpu)) {
vgic_v3_sync_nested(vcpu);
@ -917,23 +1007,24 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
if (vcpu_has_nv(vcpu))
vgic_v3_nested_update_mi(vcpu);
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
if (can_access_vgic_from_kernel())
vgic_save_state(vcpu);
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
else
used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
if (used_lrs)
vgic_fold_lr_state(vcpu);
vgic_fold_lr_state(vcpu);
vgic_prune_ap_list(vcpu);
}
/* Sync interrupts that were deactivated through a DIR trap */
void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu)
{
unsigned long flags;
/* Make sure we're in the same context as LR handling */
local_irq_save(flags);
vgic_prune_ap_list(vcpu);
local_irq_restore(flags);
}
static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
{
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
@ -958,8 +1049,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
* abort the entry procedure and inject the exception at the
* beginning of the run loop.
*
* - Otherwise, do exactly *NOTHING*. The guest state is
* already loaded, and we can carry on with running it.
* - Otherwise, do exactly *NOTHING* apart from enabling the virtual
* CPU interface. The guest state is already loaded, and we can
* carry on with running it.
*
* If we have NV, but are not in a nested state, compute the
* maintenance interrupt state, as it may fire.
@ -968,35 +1060,17 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
if (kvm_vgic_vcpu_pending_irq(vcpu))
kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
vgic_v3_flush_nested(vcpu);
return;
}
if (vcpu_has_nv(vcpu))
vgic_v3_nested_update_mi(vcpu);
/*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
* taking any lock. There is a potential race with someone injecting
* interrupts to the VCPU, but it is a benign race as the VCPU will
* either observe the new interrupt before or after doing this check,
* and introducing additional synchronization mechanism doesn't change
* this.
*
* Note that we still need to go through the whole thing if anything
* can be directly injected (GICv4).
*/
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
!vgic_supports_direct_irqs(vcpu->kvm))
return;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
vgic_flush_lr_state(vcpu);
raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
}
if (can_access_vgic_from_kernel())
vgic_restore_state(vcpu);

View file

@ -164,6 +164,22 @@ static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa,
return ret;
}
void kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
static inline u64 vgic_ich_hcr_trap_bits(void)
{
u64 hcr;
/* All the traps are in the bottom 16bits */
asm volatile(ALTERNATIVE_CB("movz %0, #0\n",
ARM64_ALWAYS_SYSTEM,
kvm_compute_ich_hcr_trap_bits)
: "=r" (hcr));
return hcr;
}
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
@ -220,6 +236,21 @@ struct its_ite {
u32 event_id;
};
struct ap_list_summary {
unsigned int nr_pend; /* purely pending, not active */
unsigned int nr_act; /* active, or active+pending */
unsigned int nr_sgi; /* any SGI */
};
#define irqs_outside_lrs(s) \
(((s)->nr_pend + (s)->nr_act) > kvm_vgic_global_state.nr_lr)
#define irqs_pending_outside_lrs(s) \
((s)->nr_pend > kvm_vgic_global_state.nr_lr)
#define irqs_active_outside_lrs(s) \
((s)->nr_act && irqs_outside_lrs(s))
int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
struct vgic_reg_attr *reg_attr);
int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
@ -230,6 +261,7 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid);
struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq);
bool vgic_get_phys_line_level(struct vgic_irq *irq);
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
@ -245,8 +277,9 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val);
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als);
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
@ -254,7 +287,7 @@ int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v2_enable(struct kvm_vcpu *vcpu);
void vgic_v2_reset(struct kvm_vcpu *vcpu);
int vgic_v2_probe(const struct gic_kvm_info *info);
int vgic_v2_map_resources(struct kvm *kvm);
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
@ -286,10 +319,11 @@ static inline void vgic_get_irq_ref(struct vgic_irq *irq)
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val);
void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als);
void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_enable(struct kvm_vcpu *vcpu);
void vgic_v3_reset(struct kvm_vcpu *vcpu);
int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
@ -412,6 +446,7 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
}
void vgic_v3_flush_nested(struct kvm_vcpu *vcpu);
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
void vgic_v3_put_nested(struct kvm_vcpu *vcpu);

View file

@ -40,6 +40,7 @@ HAS_GICV5_CPUIF
HAS_GICV5_LEGACY
HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_ICH_HCR_EL2_TDIR
HAS_HCR_NV1
HAS_HCX
HAS_LDAPR

View file

@ -411,12 +411,15 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
if (is_kernel_in_hyp_mode() &&
(read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) &&
read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {
u64 val;
generic_handle_domain_irq(aic_irqc->hw_domain,
AIC_FIQ_HWIRQ(AIC_VGIC_MI));
if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) &&
read_sysreg_s(SYS_ICH_MISR_EL2))) {
pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n");
(val = read_sysreg_s(SYS_ICH_MISR_EL2)))) {
pr_err_ratelimited("vGIC IRQ fired and not handled by KVM (MISR=%llx), disabling.\n",
val);
sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
}
}

View file

@ -1459,6 +1459,8 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
if (ret)
return;
gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base;
if (static_branch_likely(&supports_deactivate_key))
vgic_set_kvm_info(&gic_v2_kvm_info);
}
@ -1620,6 +1622,7 @@ static void __init gic_acpi_setup_kvm_info(void)
return;
gic_v2_kvm_info.maint_irq = irq;
gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base;
vgic_set_kvm_info(&gic_v2_kvm_info);
}

View file

@ -59,6 +59,9 @@ struct vgic_global {
/* virtual control interface mapping, HYP VA */
void __iomem *vctrl_hyp;
/* Physical CPU interface, kernel VA */
void __iomem *gicc_base;
/* Number of implemented list registers */
int nr_lr;
@ -120,6 +123,7 @@ struct irq_ops {
struct vgic_irq {
raw_spinlock_t irq_lock; /* Protects the content of the struct */
u32 intid; /* Guest visible INTID */
struct rcu_head rcu;
struct list_head ap_list;
@ -134,17 +138,18 @@ struct vgic_irq {
* affinity reg (v3).
*/
u32 intid; /* Guest visible INTID */
bool line_level; /* Level only */
bool pending_latch; /* The pending latch state used to calculate
* the pending state for both level
* and edge triggered IRQs. */
bool active;
bool pending_release; /* Used for LPIs only, unreferenced IRQ
bool pending_release:1; /* Used for LPIs only, unreferenced IRQ
* pending a release */
bool enabled;
bool hw; /* Tied to HW IRQ */
bool pending_latch:1; /* The pending latch state used to calculate
* the pending state for both level
* and edge triggered IRQs. */
enum vgic_irq_config config:1; /* Level or edge */
bool line_level:1; /* Level only */
bool enabled:1;
bool active:1;
bool hw:1; /* Tied to HW IRQ */
bool on_lr:1; /* Present in a CPU LR */
refcount_t refcount; /* Used for LPIs */
u32 hwintid; /* HW INTID number */
unsigned int host_irq; /* linux irq corresponding to hwintid */
@ -156,7 +161,6 @@ struct vgic_irq {
u8 active_source; /* GICv2 SGIs only */
u8 priority;
u8 group; /* 0 == group 0, 1 == group 1 */
enum vgic_irq_config config; /* Level or edge */
struct irq_ops *ops;
@ -259,6 +263,9 @@ struct vgic_dist {
/* The GIC maintenance IRQ for nested hypervisors. */
u32 mi_intid;
/* Track the number of in-flight active SPIs */
atomic_t active_spis;
/* base addresses in guest physical address space: */
gpa_t vgic_dist_base; /* distributor */
union {
@ -280,6 +287,7 @@ struct vgic_dist {
struct vgic_irq *spis;
struct vgic_io_device dist_iodev;
struct vgic_io_device cpuif_iodev;
bool has_its;
bool table_write_in_progress;
@ -417,6 +425,7 @@ bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu);
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1);

View file

@ -86,7 +86,13 @@
#define GICH_HCR_EN (1 << 0)
#define GICH_HCR_UIE (1 << 1)
#define GICH_HCR_LRENPIE (1 << 2)
#define GICH_HCR_NPIE (1 << 3)
#define GICH_HCR_VGrp0EIE (1 << 4)
#define GICH_HCR_VGrp0DIE (1 << 5)
#define GICH_HCR_VGrp1EIE (1 << 6)
#define GICH_HCR_VGrp1DIE (1 << 7)
#define GICH_HCR_EOICOUNT GENMASK(31, 27)
#define GICH_LR_VIRTUALID (0x3ff << 0)
#define GICH_LR_PHYSID_CPUID_SHIFT (10)

View file

@ -24,6 +24,8 @@ struct gic_kvm_info {
enum gic_type type;
/* Virtual CPU interface */
struct resource vcpu;
/* GICv2 GICC VA */
void __iomem *gicc_base;
/* Interrupt number */
unsigned int maint_irq;
/* No interrupt mask, no need to use the above field */

View file

@ -29,6 +29,7 @@ struct test_args {
bool level_sensitive; /* 1 is level, 0 is edge */
int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
uint32_t shared_data;
};
/*
@ -205,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
do { \
uint32_t _intid; \
_intid = gic_get_and_ack_irq(); \
GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
GUEST_ASSERT(_intid == IAR_SPURIOUS); \
} while (0)
#define CAT_HELPER(a, b) a ## b
@ -359,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void)
* interrupts for the whole test.
*/
static void test_inject_preemption(struct test_args *args,
uint32_t first_intid, int num,
kvm_inject_cmd cmd)
uint32_t first_intid, int num,
const unsigned long *exclude,
kvm_inject_cmd cmd)
{
uint32_t intid, prio, step = KVM_PRIO_STEPS;
int i;
@ -379,6 +381,10 @@ static void test_inject_preemption(struct test_args *args,
for (i = 0; i < num; i++) {
uint32_t tmp;
intid = i + first_intid;
if (exclude && test_bit(i, exclude))
continue;
KVM_INJECT(cmd, intid);
/* Each successive IRQ will preempt the previous one. */
tmp = wait_for_and_activate_irq();
@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args,
/* finish handling the IRQs starting with the highest priority one. */
for (i = 0; i < num; i++) {
intid = num - i - 1 + first_intid;
if (exclude && test_bit(intid - first_intid, exclude))
continue;
gic_set_eoi(intid);
if (args->eoi_split)
gic_set_dir(intid);
}
if (args->eoi_split) {
for (i = 0; i < num; i++) {
intid = i + first_intid;
if (exclude && test_bit(i, exclude))
continue;
if (args->eoi_split)
gic_set_dir(intid);
}
}
local_irq_enable();
for (i = 0; i < num; i++)
for (i = 0; i < num; i++) {
if (exclude && test_bit(i, exclude))
continue;
GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
}
GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
GUEST_ASSERT_IAR_EMPTY();
@ -436,33 +460,32 @@ static void test_injection_failure(struct test_args *args,
static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
{
/*
* Test up to 4 levels of preemption. The reason is that KVM doesn't
* currently implement the ability to have more than the number-of-LRs
* number of concurrently active IRQs. The number of LRs implemented is
* IMPLEMENTATION DEFINED, however, it seems that most implement 4.
*/
/* Timer PPIs cannot be injected from userspace */
static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
BIT(30 - MIN_PPI) |
BIT(28 - MIN_PPI) |
BIT(26 - MIN_PPI));
if (f->sgi)
test_inject_preemption(args, MIN_SGI, 4, f->cmd);
test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
if (f->ppi)
test_inject_preemption(args, MIN_PPI, 4, f->cmd);
test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
if (f->spi)
test_inject_preemption(args, MIN_SPI, 4, f->cmd);
test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
}
static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
{
/* Test up to 4 active IRQs. Same reason as in test_preemption. */
if (f->sgi)
guest_restore_active(args, MIN_SGI, 4, f->cmd);
guest_restore_active(args, MIN_SGI, 16, f->cmd);
if (f->ppi)
guest_restore_active(args, MIN_PPI, 4, f->cmd);
guest_restore_active(args, MIN_PPI, 16, f->cmd);
if (f->spi)
guest_restore_active(args, MIN_SPI, 4, f->cmd);
guest_restore_active(args, MIN_SPI, 31, f->cmd);
}
static void guest_code(struct test_args *args)
@ -473,12 +496,12 @@ static void guest_code(struct test_args *args)
gic_init(GIC_V3, 1);
for (i = 0; i < nr_irqs; i++)
gic_irq_enable(i);
for (i = MIN_SPI; i < nr_irqs; i++)
gic_irq_set_config(i, !level_sensitive);
for (i = 0; i < nr_irqs; i++)
gic_irq_enable(i);
gic_set_eoi_split(args->eoi_split);
reset_priorities(args);
@ -779,6 +802,221 @@ done:
kvm_vm_free(vm);
}
static void guest_code_asym_dir(struct test_args *args, int cpuid)
{
gic_init(GIC_V3, 2);
gic_set_eoi_split(1);
gic_set_priority_mask(CPU_PRIO_MASK);
if (cpuid == 0) {
uint32_t intid;
local_irq_disable();
gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
gic_irq_enable(MIN_SPI);
gic_irq_set_pending(MIN_SPI);
intid = wait_for_and_activate_irq();
GUEST_ASSERT_EQ(intid, MIN_SPI);
gic_set_eoi(intid);
isb();
WRITE_ONCE(args->shared_data, MIN_SPI);
dsb(ishst);
do {
dsb(ishld);
} while (READ_ONCE(args->shared_data) == MIN_SPI);
GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
} else {
do {
dsb(ishld);
} while (READ_ONCE(args->shared_data) != MIN_SPI);
gic_set_dir(MIN_SPI);
isb();
WRITE_ONCE(args->shared_data, 0);
dsb(ishst);
}
GUEST_DONE();
}
static void guest_code_group_en(struct test_args *args, int cpuid)
{
uint32_t intid;
gic_init(GIC_V3, 2);
gic_set_eoi_split(0);
gic_set_priority_mask(CPU_PRIO_MASK);
/* SGI0 is G0, which is disabled */
gic_irq_set_group(0, 0);
/* Configure all SGIs with decreasing priority */
for (intid = 0; intid < MIN_PPI; intid++) {
gic_set_priority(intid, (intid + 1) * 8);
gic_irq_enable(intid);
gic_irq_set_pending(intid);
}
/* Ack and EOI all G1 interrupts */
for (int i = 1; i < MIN_PPI; i++) {
intid = wait_for_and_activate_irq();
GUEST_ASSERT(intid < MIN_PPI);
gic_set_eoi(intid);
isb();
}
/*
* Check that SGI0 is still pending, inactive, and that we cannot
* ack anything.
*/
GUEST_ASSERT(gic_irq_get_pending(0));
GUEST_ASSERT(!gic_irq_get_active(0));
GUEST_ASSERT_IAR_EMPTY();
GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
/* Open the G0 gates, and verify we can ack SGI0 */
write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
isb();
do {
intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
} while (intid == IAR_SPURIOUS);
GUEST_ASSERT(intid == 0);
GUEST_DONE();
}
static void guest_code_timer_spi(struct test_args *args, int cpuid)
{
uint32_t intid;
u64 val;
gic_init(GIC_V3, 2);
gic_set_eoi_split(1);
gic_set_priority_mask(CPU_PRIO_MASK);
/* Add a pending SPI so that KVM starts trapping DIR */
gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
gic_irq_set_pending(MIN_SPI + cpuid);
/* Configure the timer with a higher priority, make it pending */
gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
isb();
val = read_sysreg(cntvct_el0);
write_sysreg(val, cntv_cval_el0);
write_sysreg(1, cntv_ctl_el0);
isb();
GUEST_ASSERT(gic_irq_get_pending(27));
/* Enable both interrupts */
gic_irq_enable(MIN_SPI + cpuid);
gic_irq_enable(27);
/* The timer must fire */
intid = wait_for_and_activate_irq();
GUEST_ASSERT(intid == 27);
/* Check that we can deassert it */
write_sysreg(0, cntv_ctl_el0);
isb();
GUEST_ASSERT(!gic_irq_get_pending(27));
/*
* Priority drop, deactivation -- we expect that the host
* deactivation will have been effective
*/
gic_set_eoi(27);
gic_set_dir(27);
GUEST_ASSERT(!gic_irq_get_active(27));
/* Do it one more time */
isb();
val = read_sysreg(cntvct_el0);
write_sysreg(val, cntv_cval_el0);
write_sysreg(1, cntv_ctl_el0);
isb();
GUEST_ASSERT(gic_irq_get_pending(27));
/* The timer must fire again */
intid = wait_for_and_activate_irq();
GUEST_ASSERT(intid == 27);
GUEST_DONE();
}
static void *test_vcpu_run(void *arg)
{
struct kvm_vcpu *vcpu = arg;
struct ucall uc;
while (1) {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
return NULL;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
return NULL;
}
static void test_vgic_two_cpus(void *gcode)
{
pthread_t thr[2];
struct kvm_vcpu *vcpus[2];
struct test_args args = {};
struct kvm_vm *vm;
vm_vaddr_t args_gva;
int gic_fd, ret;
vm = vm_create_with_vcpus(2, gcode, vcpus);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vcpus[0]);
vcpu_init_descriptor_tables(vcpus[1]);
/* Setup the guest args page (so it gets the args). */
args_gva = vm_vaddr_alloc_page(vm);
memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
vcpu_args_set(vcpus[0], 2, args_gva, 0);
vcpu_args_set(vcpus[1], 2, args_gva, 1);
gic_fd = vgic_v3_setup(vm, 2, 64);
ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
if (ret)
TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
if (ret)
TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
pthread_join(thr[0], NULL);
pthread_join(thr[1], NULL);
close(gic_fd);
kvm_vm_free(vm);
}
static void help(const char *name)
{
printf(
@ -835,6 +1073,9 @@ int main(int argc, char **argv)
test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
test_vgic_two_cpus(guest_code_asym_dir);
test_vgic_two_cpus(guest_code_group_en);
test_vgic_two_cpus(guest_code_timer_spi);
} else {
test_vgic(nr_irqs, level_sensitive, eoi_split);
}

View file

@ -57,6 +57,7 @@ void gic_irq_set_pending(unsigned int intid);
void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
void gic_irq_set_group(unsigned int intid, bool group);
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
vm_paddr_t pend_table);

View file

@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_config(intid, is_edge);
}
void gic_irq_set_group(unsigned int intid, bool group)
{
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_group(intid, group);
}

View file

@ -25,6 +25,7 @@ struct gic_common_ops {
void (*gic_irq_clear_pending)(uint32_t intid);
bool (*gic_irq_get_pending)(uint32_t intid);
void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
void (*gic_irq_set_group)(uint32_t intid, bool group);
};
extern const struct gic_common_ops gicv3_ops;

View file

@ -293,6 +293,20 @@ static void gicv3_enable_redist(volatile void *redist_base)
}
}
static void gicv3_set_group(uint32_t intid, bool grp)
{
uint32_t cpu_or_dist;
uint32_t val;
cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
if (grp)
val |= BIT(intid % 32);
else
val &= ~BIT(intid % 32);
gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
}
static void gicv3_cpu_init(unsigned int cpu)
{
volatile void *sgi_base;
@ -333,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu)
/* Set a default priority threshold */
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
/* Disable Group-0 interrupts */
write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
/* Enable non-secure Group-1 interrupts */
write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
@ -405,6 +421,7 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_clear_pending = gicv3_irq_clear_pending,
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
.gic_irq_set_group = gicv3_set_group,
};
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,