linux/drivers/irqchip/irq-gic-v5.c
Linus Torvalds cb5573868e Loongarch:
- Add more CPUCFG mask bits.
 
 - Improve feature detection.
 
 - Add lazy load support for FPU and binary translation (LBT) register state.
 
 - Fix return value for memory reads from and writes to in-kernel devices.
 
 - Add support for detecting preemption from within a guest.
 
 - Add KVM steal time test case to tools/selftests.
 
 ARM:
 
 - Add support for FEAT_IDST, allowing ID registers that are not
   implemented to be reported as a normal trap rather than as an UNDEF
   exception.
 
 - Add sanitisation of the VTCR_EL2 register, fixing a number of
   UXN/PXN/XN bugs in the process.
 
 - Full handling of RESx bits, instead of only RES0, and resulting in
   SCTLR_EL2 being added to the list of sanitised registers.
 
 - More pKVM fixes for features that are not supposed to be exposed to
   guests.
 
 - Make sure that MTE being disabled on the pKVM host doesn't give it
   the ability to attack the hypervisor.
 
 - Allow pKVM's host stage-2 mappings to use the Force Write Back
   version of the memory attributes by using the "pass-through'
   encoding.
 
 - Fix trapping of ICC_DIR_EL1 on GICv5 hosts emulating GICv3 for the
   guest.
 
 - Preliminary work for guest GICv5 support.
 
 - A bunch of debugfs fixes, removing pointless custom iterators stored
   in guest data structures.
 
 - A small set of FPSIMD cleanups.
 
 - Selftest fixes addressing the incorrect alignment of page
   allocation.
 
 - Other assorted low-impact fixes and spelling fixes.
 
 RISC-V:
 
 - Fixes for issues discoverd by KVM API fuzzing in
   kvm_riscv_aia_imsic_has_attr(), kvm_riscv_aia_imsic_rw_attr(),
   and kvm_riscv_vcpu_aia_imsic_update()
 
 - Allow Zalasr, Zilsd and Zclsd extensions for Guest/VM
 
 - Transparent huge page support for hypervisor page tables
 
 - Adjust the number of available guest irq files based on MMIO
   register sizes found in the device tree or the ACPI tables
 
 - Add RISC-V specific paging modes to KVM selftests
 
 - Detect paging mode at runtime for selftests
 
 s390:
 
 - Performance improvement for vSIE (aka nested virtualization)
 
 - Completely new memory management.  s390 was a special snowflake that enlisted
   help from the architecture's page table management to build hypervisor
   page tables, in particular enabling sharing the last level of page
   tables.  This however was a lot of code (~3K lines) in order to support
   KVM, and also blocked several features.  The biggest advantages is
   that the page size of userspace is completely independent of the
   page size used by the guest: userspace can mix normal pages, THPs and
   hugetlbfs as it sees fit, and in fact transparent hugepages were not
   possible before.  It's also now possible to have nested guests and
   guests with huge pages running on the same host.
 
 - Maintainership change for s390 vfio-pci
 
 - Small quality of life improvement for protected guests
 
 x86:
 
 - Add support for giving the guest full ownership of PMU hardware (contexted
   switched around the fastpath run loop) and allowing direct access to data
   MSRs and PMCs (restricted by the vPMU model).  KVM still intercepts
   access to control registers, e.g. to enforce event filtering and to
   prevent the guest from profiling sensitive host state.  This is more
   accurate, since it has no risk of contention and thus dropped events, and
   also has significantly less overhead.
 
   For more information, see the commit message for merge commit bf2c3138ae
   ("Merge tag 'kvm-x86-pmu-6.20' of https://github.com/kvm-x86/linux into HEAD").
 
 - Disallow changing the virtual CPU model if L2 is active, for all the same
   reasons KVM disallows change the model after the first KVM_RUN.
 
 - Fix a bug where KVM would incorrectly reject host accesses to PV MSRs
   when running with KVM_CAP_ENFORCE_PV_FEATURE_CPUID enabled, even if those
   were advertised as supported to userspace,
 
 - Fix a bug with protected guest state (SEV-ES/SNP and TDX) VMs, where KVM
   would attempt to read CR3 configuring an async #PF entry.
 
 - Fail the build if EXPORT_SYMBOL_GPL or EXPORT_SYMBOL is used in KVM (for x86
   only) to enforce usage of EXPORT_SYMBOL_FOR_KVM_INTERNAL.  Only a few exports
   that are intended for external usage, and those are allowed explicitly.
 
 - When checking nested events after a vCPU is unblocked, ignore -EBUSY instead
   of WARNing.  Userspace can sometimes put the vCPU into what should be an
   impossible state, and spurious exit to userspace on -EBUSY does not really
   do anything to solve the issue.
 
 - Also throw in the towel and drop the WARN on INIT/SIPI being blocked when vCPU
   is in Wait-For-SIPI, which also resulted in playing whack-a-mole with syzkaller
   stuffing architecturally impossible states into KVM.
 
 - Add support for new Intel instructions that don't require anything beyond
   enumerating feature flags to userspace.
 
 - Grab SRCU when reading PDPTRs in KVM_GET_SREGS2.
 
 - Add WARNs to guard against modifying KVM's CPU caps outside of the intended
   setup flow, as nested VMX in particular is sensitive to unexpected changes
   in KVM's golden configuration.
 
 - Add a quirk to allow userspace to opt-in to actually suppress EOI broadcasts
   when the suppression feature is enabled by the guest (currently limited to
   split IRQCHIP, i.e. userspace I/O APIC).  Sadly, simply fixing KVM to honor
   Suppress EOI Broadcasts isn't an option as some userspaces have come to rely
   on KVM's buggy behavior (KVM advertises Supress EOI Broadcast irrespective
   of whether or not userspace I/O APIC supports Directed EOIs).
 
 - Clean up KVM's handling of marking mapped vCPU pages dirty.
 
 - Drop a pile of *ancient* sanity checks hidden behind in KVM's unused
   ASSERT() macro, most of which could be trivially triggered by the guest
   and/or user, and all of which were useless.
 
 - Fold "struct dest_map" into its sole user, "struct rtc_status", to make it
   more obvious what the weird parameter is used for, and to allow fropping
   these RTC shenanigans if CONFIG_KVM_IOAPIC=n.
 
 - Bury all of ioapic.h, i8254.h and related ioctls (including
   KVM_CREATE_IRQCHIP) behind CONFIG_KVM_IOAPIC=y.
 
 - Add a regression test for recent APICv update fixes.
 
 - Handle "hardware APIC ISR", a.k.a. SVI, updates in kvm_apic_update_apicv()
   to consolidate the updates, and to co-locate SVI updates with the updates
   for KVM's own cache of ISR information.
 
 - Drop a dead function declaration.
 
 - Minor cleanups.
 
 x86 (Intel):
 
 - Rework KVM's handling of VMCS updates while L2 is active to temporarily
   switch to vmcs01 instead of deferring the update until the next nested
   VM-Exit.  The deferred updates approach directly contributed to several
   bugs, was proving to be a maintenance burden due to the difficulty in
   auditing the correctness of deferred updates, and was polluting
   "struct nested_vmx" with a growing pile of booleans.
 
 - Fix an SGX bug where KVM would incorrectly try to handle EPCM page faults,
   and instead always reflect them into the guest.  Since KVM doesn't shadow
   EPCM entries, EPCM violations cannot be due to KVM interference and
   can't be resolved by KVM.
 
 - Fix a bug where KVM would register its posted interrupt wakeup handler even
   if loading kvm-intel.ko ultimately failed.
 
 - Disallow access to vmcb12 fields that aren't fully supported, mostly to
   avoid weirdness and complexity for FRED and other features, where KVM wants
   enable VMCS shadowing for fields that conditionally exist.
 
 - Print out the "bad" offsets and values if kvm-intel.ko refuses to load (or
   refuses to online a CPU) due to a VMCS config mismatch.
 
 x86 (AMD):
 
 - Drop a user-triggerable WARN on nested_svm_load_cr3() failure.
 
 - Add support for virtualizing ERAPS.  Note, correct virtualization of ERAPS
   relies on an upcoming, publicly announced change in the APM to reduce the
   set of conditions where hardware (i.e. KVM) *must* flush the RAP.
 
 - Ignore nSVM intercepts for instructions that are not supported according to
   L1's virtual CPU model.
 
 - Add support for expedited writes to the fast MMIO bus, a la VMX's fastpath
   for EPT Misconfig.
 
 - Don't set GIF when clearing EFER.SVME, as GIF exists independently of SVM,
   and allow userspace to restore nested state with GIF=0.
 
 - Treat exit_code as an unsigned 64-bit value through all of KVM.
 
 - Add support for fetching SNP certificates from userspace.
 
 - Fix a bug where KVM would use vmcb02 instead of vmcb01 when emulating VMLOAD
   or VMSAVE on behalf of L2.
 
 - Misc fixes and cleanups.
 
 x86 selftests:
 
 - Add a regression test for TPR<=>CR8 synchronization and IRQ masking.
 
 - Overhaul selftest's MMU infrastructure to genericize stage-2 MMU support,
   and extend x86's infrastructure to support EPT and NPT (for L2 guests).
 
 - Extend several nested VMX tests to also cover nested SVM.
 
 - Add a selftest for nested VMLOAD/VMSAVE.
 
 - Rework the nested dirty log test, originally added as a regression test for
   PML where KVM logged L2 GPAs instead of L1 GPAs, to improve test coverage
   and to hopefully make the test easier to understand and maintain.
 
 guest_memfd:
 
 - Remove kvm_gmem_populate()'s preparation tracking and half-baked hugepage
   handling.  SEV/SNP was the only user of the tracking and it can do it via
   the RMP.
 
 - Retroactively document and enforce (for SNP) that KVM_SEV_SNP_LAUNCH_UPDATE
   and KVM_TDX_INIT_MEM_REGION require the source page to be 4KiB aligned, to
   avoid non-trivial complexity for something that no known VMM seems to be
   doing and to avoid an API special case for in-place conversion, which
   simply can't support unaligned sources.
 
 - When populating guest_memfd memory, GUP the source page in common code and
   pass the refcounted page to the vendor callback, instead of letting vendor
   code do the heavy lifting.  Doing so avoids a looming deadlock bug with
   in-place due an AB-BA conflict betwee mmap_lock and guest_memfd's filemap
   invalidate lock.
 
 Generic:
 
 - Fix a bug where KVM would ignore the vCPU's selected address space when
   creating a vCPU-specific mapping of guest memory.  Actually this bug
   could not be hit even on x86, the only architecture with multiple
   address spaces, but it's a bug nevertheless.
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCgAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmmNqwwUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroPaZAf/cJx5B67lnST272esz0j29MIuT/Ti
 jnf6PI9b7XubKYOtNvlu5ZW4Jsa5dqRG0qeO/JmcXDlwBf5/UkWOyvqIXyiuTl0l
 KcSUlKPtTgKZSoZpJpTppuuDE8FSYqEdcCmjNvoYzcJoPjmaeJbK6aqO0AkBbb6e
 L5InrLV7nV9iua6rFvA0s/G8/Eq2DG8M9hTRHe6NcI/z4hvslOudvpUXtC8Jygoo
 cV8vFavUwc+atrmvhAOLvSitnrjfNa4zcG6XMOlwXPfIdvi3zqTlQTgUpwGKiAGQ
 RIDUVZ/9bcWgJqbPRsdEWwaYRkNQWc5nmrAHRpEEaYV/NeBBNf4v6qfKSw==
 =SkJ1
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "Loongarch:

   - Add more CPUCFG mask bits

   - Improve feature detection

   - Add lazy load support for FPU and binary translation (LBT) register
     state

   - Fix return value for memory reads from and writes to in-kernel
     devices

   - Add support for detecting preemption from within a guest

   - Add KVM steal time test case to tools/selftests

  ARM:

   - Add support for FEAT_IDST, allowing ID registers that are not
     implemented to be reported as a normal trap rather than as an UNDEF
     exception

   - Add sanitisation of the VTCR_EL2 register, fixing a number of
     UXN/PXN/XN bugs in the process

   - Full handling of RESx bits, instead of only RES0, and resulting in
     SCTLR_EL2 being added to the list of sanitised registers

   - More pKVM fixes for features that are not supposed to be exposed to
     guests

   - Make sure that MTE being disabled on the pKVM host doesn't give it
     the ability to attack the hypervisor

   - Allow pKVM's host stage-2 mappings to use the Force Write Back
     version of the memory attributes by using the "pass-through'
     encoding

   - Fix trapping of ICC_DIR_EL1 on GICv5 hosts emulating GICv3 for the
     guest

   - Preliminary work for guest GICv5 support

   - A bunch of debugfs fixes, removing pointless custom iterators
     stored in guest data structures

   - A small set of FPSIMD cleanups

   - Selftest fixes addressing the incorrect alignment of page
     allocation

   - Other assorted low-impact fixes and spelling fixes

  RISC-V:

   - Fixes for issues discoverd by KVM API fuzzing in
     kvm_riscv_aia_imsic_has_attr(), kvm_riscv_aia_imsic_rw_attr(), and
     kvm_riscv_vcpu_aia_imsic_update()

   - Allow Zalasr, Zilsd and Zclsd extensions for Guest/VM

   - Transparent huge page support for hypervisor page tables

   - Adjust the number of available guest irq files based on MMIO
     register sizes found in the device tree or the ACPI tables

   - Add RISC-V specific paging modes to KVM selftests

   - Detect paging mode at runtime for selftests

  s390:

   - Performance improvement for vSIE (aka nested virtualization)

   - Completely new memory management. s390 was a special snowflake that
     enlisted help from the architecture's page table management to
     build hypervisor page tables, in particular enabling sharing the
     last level of page tables. This however was a lot of code (~3K
     lines) in order to support KVM, and also blocked several features.
     The biggest advantages is that the page size of userspace is
     completely independent of the page size used by the guest:
     userspace can mix normal pages, THPs and hugetlbfs as it sees fit,
     and in fact transparent hugepages were not possible before. It's
     also now possible to have nested guests and guests with huge pages
     running on the same host

   - Maintainership change for s390 vfio-pci

   - Small quality of life improvement for protected guests

  x86:

   - Add support for giving the guest full ownership of PMU hardware
     (contexted switched around the fastpath run loop) and allowing
     direct access to data MSRs and PMCs (restricted by the vPMU model).

     KVM still intercepts access to control registers, e.g. to enforce
     event filtering and to prevent the guest from profiling sensitive
     host state. This is more accurate, since it has no risk of
     contention and thus dropped events, and also has significantly less
     overhead.

     For more information, see the commit message for merge commit
     bf2c3138ae ("Merge tag 'kvm-x86-pmu-6.20' ...")

   - Disallow changing the virtual CPU model if L2 is active, for all
     the same reasons KVM disallows change the model after the first
     KVM_RUN

   - Fix a bug where KVM would incorrectly reject host accesses to PV
     MSRs when running with KVM_CAP_ENFORCE_PV_FEATURE_CPUID enabled,
     even if those were advertised as supported to userspace,

   - Fix a bug with protected guest state (SEV-ES/SNP and TDX) VMs,
     where KVM would attempt to read CR3 configuring an async #PF entry

   - Fail the build if EXPORT_SYMBOL_GPL or EXPORT_SYMBOL is used in KVM
     (for x86 only) to enforce usage of EXPORT_SYMBOL_FOR_KVM_INTERNAL.
     Only a few exports that are intended for external usage, and those
     are allowed explicitly

   - When checking nested events after a vCPU is unblocked, ignore
     -EBUSY instead of WARNing. Userspace can sometimes put the vCPU
     into what should be an impossible state, and spurious exit to
     userspace on -EBUSY does not really do anything to solve the issue

   - Also throw in the towel and drop the WARN on INIT/SIPI being
     blocked when vCPU is in Wait-For-SIPI, which also resulted in
     playing whack-a-mole with syzkaller stuffing architecturally
     impossible states into KVM

   - Add support for new Intel instructions that don't require anything
     beyond enumerating feature flags to userspace

   - Grab SRCU when reading PDPTRs in KVM_GET_SREGS2

   - Add WARNs to guard against modifying KVM's CPU caps outside of the
     intended setup flow, as nested VMX in particular is sensitive to
     unexpected changes in KVM's golden configuration

   - Add a quirk to allow userspace to opt-in to actually suppress EOI
     broadcasts when the suppression feature is enabled by the guest
     (currently limited to split IRQCHIP, i.e. userspace I/O APIC).
     Sadly, simply fixing KVM to honor Suppress EOI Broadcasts isn't an
     option as some userspaces have come to rely on KVM's buggy behavior
     (KVM advertises Supress EOI Broadcast irrespective of whether or
     not userspace I/O APIC supports Directed EOIs)

   - Clean up KVM's handling of marking mapped vCPU pages dirty

   - Drop a pile of *ancient* sanity checks hidden behind in KVM's
     unused ASSERT() macro, most of which could be trivially triggered
     by the guest and/or user, and all of which were useless

   - Fold "struct dest_map" into its sole user, "struct rtc_status", to
     make it more obvious what the weird parameter is used for, and to
     allow fropping these RTC shenanigans if CONFIG_KVM_IOAPIC=n

   - Bury all of ioapic.h, i8254.h and related ioctls (including
     KVM_CREATE_IRQCHIP) behind CONFIG_KVM_IOAPIC=y

   - Add a regression test for recent APICv update fixes

   - Handle "hardware APIC ISR", a.k.a. SVI, updates in
     kvm_apic_update_apicv() to consolidate the updates, and to
     co-locate SVI updates with the updates for KVM's own cache of ISR
     information

   - Drop a dead function declaration

   - Minor cleanups

  x86 (Intel):

   - Rework KVM's handling of VMCS updates while L2 is active to
     temporarily switch to vmcs01 instead of deferring the update until
     the next nested VM-Exit.

     The deferred updates approach directly contributed to several bugs,
     was proving to be a maintenance burden due to the difficulty in
     auditing the correctness of deferred updates, and was polluting
     "struct nested_vmx" with a growing pile of booleans

   - Fix an SGX bug where KVM would incorrectly try to handle EPCM page
     faults, and instead always reflect them into the guest. Since KVM
     doesn't shadow EPCM entries, EPCM violations cannot be due to KVM
     interference and can't be resolved by KVM

   - Fix a bug where KVM would register its posted interrupt wakeup
     handler even if loading kvm-intel.ko ultimately failed

   - Disallow access to vmcb12 fields that aren't fully supported,
     mostly to avoid weirdness and complexity for FRED and other
     features, where KVM wants enable VMCS shadowing for fields that
     conditionally exist

   - Print out the "bad" offsets and values if kvm-intel.ko refuses to
     load (or refuses to online a CPU) due to a VMCS config mismatch

  x86 (AMD):

   - Drop a user-triggerable WARN on nested_svm_load_cr3() failure

   - Add support for virtualizing ERAPS. Note, correct virtualization of
     ERAPS relies on an upcoming, publicly announced change in the APM
     to reduce the set of conditions where hardware (i.e. KVM) *must*
     flush the RAP

   - Ignore nSVM intercepts for instructions that are not supported
     according to L1's virtual CPU model

   - Add support for expedited writes to the fast MMIO bus, a la VMX's
     fastpath for EPT Misconfig

   - Don't set GIF when clearing EFER.SVME, as GIF exists independently
     of SVM, and allow userspace to restore nested state with GIF=0

   - Treat exit_code as an unsigned 64-bit value through all of KVM

   - Add support for fetching SNP certificates from userspace

   - Fix a bug where KVM would use vmcb02 instead of vmcb01 when
     emulating VMLOAD or VMSAVE on behalf of L2

   - Misc fixes and cleanups

  x86 selftests:

   - Add a regression test for TPR<=>CR8 synchronization and IRQ masking

   - Overhaul selftest's MMU infrastructure to genericize stage-2 MMU
     support, and extend x86's infrastructure to support EPT and NPT
     (for L2 guests)

   - Extend several nested VMX tests to also cover nested SVM

   - Add a selftest for nested VMLOAD/VMSAVE

   - Rework the nested dirty log test, originally added as a regression
     test for PML where KVM logged L2 GPAs instead of L1 GPAs, to
     improve test coverage and to hopefully make the test easier to
     understand and maintain

  guest_memfd:

   - Remove kvm_gmem_populate()'s preparation tracking and half-baked
     hugepage handling. SEV/SNP was the only user of the tracking and it
     can do it via the RMP

   - Retroactively document and enforce (for SNP) that
     KVM_SEV_SNP_LAUNCH_UPDATE and KVM_TDX_INIT_MEM_REGION require the
     source page to be 4KiB aligned, to avoid non-trivial complexity for
     something that no known VMM seems to be doing and to avoid an API
     special case for in-place conversion, which simply can't support
     unaligned sources

   - When populating guest_memfd memory, GUP the source page in common
     code and pass the refcounted page to the vendor callback, instead
     of letting vendor code do the heavy lifting. Doing so avoids a
     looming deadlock bug with in-place due an AB-BA conflict betwee
     mmap_lock and guest_memfd's filemap invalidate lock

  Generic:

   - Fix a bug where KVM would ignore the vCPU's selected address space
     when creating a vCPU-specific mapping of guest memory. Actually
     this bug could not be hit even on x86, the only architecture with
     multiple address spaces, but it's a bug nevertheless"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (267 commits)
  KVM: s390: Increase permitted SE header size to 1 MiB
  MAINTAINERS: Replace backup for s390 vfio-pci
  KVM: s390: vsie: Fix race in acquire_gmap_shadow()
  KVM: s390: vsie: Fix race in walk_guest_tables()
  KVM: s390: Use guest address to mark guest page dirty
  irqchip/riscv-imsic: Adjust the number of available guest irq files
  RISC-V: KVM: Transparent huge page support
  RISC-V: KVM: selftests: Add Zalasr extensions to get-reg-list test
  RISC-V: KVM: Allow Zalasr extensions for Guest/VM
  KVM: riscv: selftests: Add riscv vm satp modes
  KVM: riscv: selftests: add Zilsd and Zclsd extension to get-reg-list test
  riscv: KVM: allow Zilsd and Zclsd extensions for Guest/VM
  RISC-V: KVM: Skip IMSIC update if vCPU IMSIC state is not initialized
  RISC-V: KVM: Fix null pointer dereference in kvm_riscv_aia_imsic_rw_attr()
  RISC-V: KVM: Fix null pointer dereference in kvm_riscv_aia_imsic_has_attr()
  RISC-V: KVM: Remove unnecessary 'ret' assignment
  KVM: s390: Add explicit padding to struct kvm_s390_keyop
  KVM: LoongArch: selftests: Add steal time test case
  LoongArch: KVM: Add paravirt vcpu_is_preempted() support in guest side
  LoongArch: KVM: Add paravirt preempt feature in hypervisor side
  ...
2026-02-13 11:31:15 -08:00

1240 lines
30 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2024-2025 ARM Limited, All Rights Reserved.
*/
#define pr_fmt(fmt) "GICv5: " fmt
#include <linux/acpi_iort.h>
#include <linux/cpuhotplug.h>
#include <linux/idr.h>
#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/wordpart.h>
#include <linux/irqchip.h>
#include <linux/irqchip/arm-gic-v5.h>
#include <linux/irqchip/arm-vgic-info.h>
#include <asm/cpufeature.h>
#include <asm/exception.h>
static u8 pri_bits __ro_after_init = 5;
#define GICV5_IRQ_PRI_MASK 0x1f
#define GICV5_IRQ_PRI_MI (GICV5_IRQ_PRI_MASK & GENMASK(4, 5 - pri_bits))
#define PPI_NR 128
static bool gicv5_cpuif_has_gcie(void)
{
return this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF);
}
struct gicv5_chip_data gicv5_global_data __read_mostly;
static DEFINE_IDA(lpi_ida);
static u32 num_lpis __ro_after_init;
void __init gicv5_init_lpis(u32 lpis)
{
num_lpis = lpis;
}
void __init gicv5_deinit_lpis(void)
{
num_lpis = 0;
}
static int alloc_lpi(void)
{
if (!num_lpis)
return -ENOSPC;
return ida_alloc_max(&lpi_ida, num_lpis - 1, GFP_KERNEL);
}
static void release_lpi(u32 lpi)
{
ida_free(&lpi_ida, lpi);
}
int gicv5_alloc_lpi(void)
{
return alloc_lpi();
}
void gicv5_free_lpi(u32 lpi)
{
release_lpi(lpi);
}
static void gicv5_ppi_priority_init(void)
{
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR0_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR1_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR2_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR3_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR4_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR5_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR6_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR7_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR8_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR9_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR10_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR11_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR12_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR13_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR14_EL1);
write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR15_EL1);
/*
* Context syncronization required to make sure system register writes
* effects are synchronised.
*/
isb();
}
static void gicv5_hwirq_init(irq_hw_number_t hwirq, u8 priority, u8 hwirq_type)
{
u64 cdpri, cdaff;
u16 iaffid;
int ret;
if (hwirq_type == GICV5_HWIRQ_TYPE_LPI || hwirq_type == GICV5_HWIRQ_TYPE_SPI) {
cdpri = FIELD_PREP(GICV5_GIC_CDPRI_PRIORITY_MASK, priority) |
FIELD_PREP(GICV5_GIC_CDPRI_TYPE_MASK, hwirq_type) |
FIELD_PREP(GICV5_GIC_CDPRI_ID_MASK, hwirq);
gic_insn(cdpri, CDPRI);
ret = gicv5_irs_cpu_to_iaffid(smp_processor_id(), &iaffid);
if (WARN_ON_ONCE(ret))
return;
cdaff = FIELD_PREP(GICV5_GIC_CDAFF_IAFFID_MASK, iaffid) |
FIELD_PREP(GICV5_GIC_CDAFF_TYPE_MASK, hwirq_type) |
FIELD_PREP(GICV5_GIC_CDAFF_ID_MASK, hwirq);
gic_insn(cdaff, CDAFF);
}
}
static void gicv5_ppi_irq_mask(struct irq_data *d)
{
u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64);
if (d->hwirq < 64)
sysreg_clear_set_s(SYS_ICC_PPI_ENABLER0_EL1, hwirq_id_bit, 0);
else
sysreg_clear_set_s(SYS_ICC_PPI_ENABLER1_EL1, hwirq_id_bit, 0);
/*
* We must ensure that the disable takes effect immediately to
* guarantee that the lazy-disabled IRQ mechanism works.
* A context synchronization event is required to guarantee it.
* Reference: I_ZLTKB/R_YRGMH GICv5 specification - section 2.9.1.
*/
isb();
}
static void gicv5_iri_irq_mask(struct irq_data *d, u8 hwirq_type)
{
u64 cddis;
cddis = FIELD_PREP(GICV5_GIC_CDDIS_ID_MASK, d->hwirq) |
FIELD_PREP(GICV5_GIC_CDDIS_TYPE_MASK, hwirq_type);
gic_insn(cddis, CDDIS);
/*
* We must make sure that GIC CDDIS write effects are propagated
* immediately to make sure the disable takes effect to guarantee
* that the lazy-disabled IRQ mechanism works.
* Rule R_XCLJC states that the effects of a GIC system instruction
* complete in finite time.
* The GSB ensures completion of the GIC instruction and prevents
* loads, stores and GIC instructions from executing part of their
* functionality before the GSB SYS.
*/
gsb_sys();
}
static void gicv5_spi_irq_mask(struct irq_data *d)
{
gicv5_iri_irq_mask(d, GICV5_HWIRQ_TYPE_SPI);
}
static void gicv5_lpi_irq_mask(struct irq_data *d)
{
gicv5_iri_irq_mask(d, GICV5_HWIRQ_TYPE_LPI);
}
static void gicv5_ppi_irq_unmask(struct irq_data *d)
{
u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64);
if (d->hwirq < 64)
sysreg_clear_set_s(SYS_ICC_PPI_ENABLER0_EL1, 0, hwirq_id_bit);
else
sysreg_clear_set_s(SYS_ICC_PPI_ENABLER1_EL1, 0, hwirq_id_bit);
/*
* We must ensure that the enable takes effect in finite time - a
* context synchronization event is required to guarantee it, we
* can not take for granted that would happen (eg a core going straight
* into idle after enabling a PPI).
* Reference: I_ZLTKB/R_YRGMH GICv5 specification - section 2.9.1.
*/
isb();
}
static void gicv5_iri_irq_unmask(struct irq_data *d, u8 hwirq_type)
{
u64 cden;
cden = FIELD_PREP(GICV5_GIC_CDEN_ID_MASK, d->hwirq) |
FIELD_PREP(GICV5_GIC_CDEN_TYPE_MASK, hwirq_type);
/*
* Rule R_XCLJC states that the effects of a GIC system instruction
* complete in finite time and that's the only requirement when
* unmasking an SPI/LPI IRQ.
*/
gic_insn(cden, CDEN);
}
static void gicv5_spi_irq_unmask(struct irq_data *d)
{
gicv5_iri_irq_unmask(d, GICV5_HWIRQ_TYPE_SPI);
}
static void gicv5_lpi_irq_unmask(struct irq_data *d)
{
gicv5_iri_irq_unmask(d, GICV5_HWIRQ_TYPE_LPI);
}
static void gicv5_hwirq_eoi(u32 hwirq_id, u8 hwirq_type)
{
u64 cddi;
cddi = FIELD_PREP(GICV5_GIC_CDDI_ID_MASK, hwirq_id) |
FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, hwirq_type);
gic_insn(cddi, CDDI);
gic_insn(0, CDEOI);
}
static void gicv5_ppi_irq_eoi(struct irq_data *d)
{
/* Skip deactivate for forwarded PPI interrupts */
if (irqd_is_forwarded_to_vcpu(d)) {
gic_insn(0, CDEOI);
return;
}
gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_PPI);
}
static void gicv5_spi_irq_eoi(struct irq_data *d)
{
gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_SPI);
}
static void gicv5_lpi_irq_eoi(struct irq_data *d)
{
gicv5_hwirq_eoi(d->hwirq, GICV5_HWIRQ_TYPE_LPI);
}
static int gicv5_iri_irq_set_affinity(struct irq_data *d,
const struct cpumask *mask_val,
bool force, u8 hwirq_type)
{
int ret, cpuid;
u16 iaffid;
u64 cdaff;
if (force)
cpuid = cpumask_first(mask_val);
else
cpuid = cpumask_any_and(mask_val, cpu_online_mask);
ret = gicv5_irs_cpu_to_iaffid(cpuid, &iaffid);
if (ret)
return ret;
cdaff = FIELD_PREP(GICV5_GIC_CDAFF_IAFFID_MASK, iaffid) |
FIELD_PREP(GICV5_GIC_CDAFF_TYPE_MASK, hwirq_type) |
FIELD_PREP(GICV5_GIC_CDAFF_ID_MASK, d->hwirq);
gic_insn(cdaff, CDAFF);
irq_data_update_effective_affinity(d, cpumask_of(cpuid));
return IRQ_SET_MASK_OK_DONE;
}
static int gicv5_spi_irq_set_affinity(struct irq_data *d,
const struct cpumask *mask_val,
bool force)
{
return gicv5_iri_irq_set_affinity(d, mask_val, force,
GICV5_HWIRQ_TYPE_SPI);
}
static int gicv5_lpi_irq_set_affinity(struct irq_data *d,
const struct cpumask *mask_val,
bool force)
{
return gicv5_iri_irq_set_affinity(d, mask_val, force,
GICV5_HWIRQ_TYPE_LPI);
}
enum ppi_reg {
PPI_PENDING,
PPI_ACTIVE,
PPI_HM
};
static __always_inline u64 read_ppi_sysreg_s(unsigned int irq,
const enum ppi_reg which)
{
switch (which) {
case PPI_PENDING:
return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_SPENDR0_EL1) :
read_sysreg_s(SYS_ICC_PPI_SPENDR1_EL1);
case PPI_ACTIVE:
return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_SACTIVER0_EL1) :
read_sysreg_s(SYS_ICC_PPI_SACTIVER1_EL1);
case PPI_HM:
return irq < 64 ? read_sysreg_s(SYS_ICC_PPI_HMR0_EL1) :
read_sysreg_s(SYS_ICC_PPI_HMR1_EL1);
default:
BUILD_BUG_ON(1);
}
}
static __always_inline void write_ppi_sysreg_s(unsigned int irq, bool set,
const enum ppi_reg which)
{
u64 bit = BIT_ULL(irq % 64);
switch (which) {
case PPI_PENDING:
if (set) {
if (irq < 64)
write_sysreg_s(bit, SYS_ICC_PPI_SPENDR0_EL1);
else
write_sysreg_s(bit, SYS_ICC_PPI_SPENDR1_EL1);
} else {
if (irq < 64)
write_sysreg_s(bit, SYS_ICC_PPI_CPENDR0_EL1);
else
write_sysreg_s(bit, SYS_ICC_PPI_CPENDR1_EL1);
}
return;
case PPI_ACTIVE:
if (set) {
if (irq < 64)
write_sysreg_s(bit, SYS_ICC_PPI_SACTIVER0_EL1);
else
write_sysreg_s(bit, SYS_ICC_PPI_SACTIVER1_EL1);
} else {
if (irq < 64)
write_sysreg_s(bit, SYS_ICC_PPI_CACTIVER0_EL1);
else
write_sysreg_s(bit, SYS_ICC_PPI_CACTIVER1_EL1);
}
return;
default:
BUILD_BUG_ON(1);
}
}
static int gicv5_ppi_irq_get_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool *state)
{
u64 hwirq_id_bit = BIT_ULL(d->hwirq % 64);
switch (which) {
case IRQCHIP_STATE_PENDING:
*state = !!(read_ppi_sysreg_s(d->hwirq, PPI_PENDING) & hwirq_id_bit);
return 0;
case IRQCHIP_STATE_ACTIVE:
*state = !!(read_ppi_sysreg_s(d->hwirq, PPI_ACTIVE) & hwirq_id_bit);
return 0;
default:
pr_debug("Unexpected PPI irqchip state\n");
return -EINVAL;
}
}
static int gicv5_iri_irq_get_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool *state, u8 hwirq_type)
{
u64 icsr, cdrcfg;
cdrcfg = d->hwirq | FIELD_PREP(GICV5_GIC_CDRCFG_TYPE_MASK, hwirq_type);
gic_insn(cdrcfg, CDRCFG);
isb();
icsr = read_sysreg_s(SYS_ICC_ICSR_EL1);
if (FIELD_GET(ICC_ICSR_EL1_F, icsr)) {
pr_err("ICSR_EL1 is invalid\n");
return -EINVAL;
}
switch (which) {
case IRQCHIP_STATE_PENDING:
*state = !!(FIELD_GET(ICC_ICSR_EL1_Pending, icsr));
return 0;
case IRQCHIP_STATE_ACTIVE:
*state = !!(FIELD_GET(ICC_ICSR_EL1_Active, icsr));
return 0;
default:
pr_debug("Unexpected irqchip_irq_state\n");
return -EINVAL;
}
}
static int gicv5_spi_irq_get_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool *state)
{
return gicv5_iri_irq_get_irqchip_state(d, which, state,
GICV5_HWIRQ_TYPE_SPI);
}
static int gicv5_lpi_irq_get_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool *state)
{
return gicv5_iri_irq_get_irqchip_state(d, which, state,
GICV5_HWIRQ_TYPE_LPI);
}
static int gicv5_ppi_irq_set_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool state)
{
switch (which) {
case IRQCHIP_STATE_PENDING:
write_ppi_sysreg_s(d->hwirq, state, PPI_PENDING);
return 0;
case IRQCHIP_STATE_ACTIVE:
write_ppi_sysreg_s(d->hwirq, state, PPI_ACTIVE);
return 0;
default:
pr_debug("Unexpected PPI irqchip state\n");
return -EINVAL;
}
}
static void gicv5_iri_irq_write_pending_state(struct irq_data *d, bool state,
u8 hwirq_type)
{
u64 cdpend;
cdpend = FIELD_PREP(GICV5_GIC_CDPEND_TYPE_MASK, hwirq_type) |
FIELD_PREP(GICV5_GIC_CDPEND_ID_MASK, d->hwirq) |
FIELD_PREP(GICV5_GIC_CDPEND_PENDING_MASK, state);
gic_insn(cdpend, CDPEND);
}
static void gicv5_spi_irq_write_pending_state(struct irq_data *d, bool state)
{
gicv5_iri_irq_write_pending_state(d, state, GICV5_HWIRQ_TYPE_SPI);
}
static void gicv5_lpi_irq_write_pending_state(struct irq_data *d, bool state)
{
gicv5_iri_irq_write_pending_state(d, state, GICV5_HWIRQ_TYPE_LPI);
}
static int gicv5_spi_irq_set_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool state)
{
switch (which) {
case IRQCHIP_STATE_PENDING:
gicv5_spi_irq_write_pending_state(d, state);
break;
default:
pr_debug("Unexpected irqchip_irq_state\n");
return -EINVAL;
}
return 0;
}
static int gicv5_lpi_irq_set_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool state)
{
switch (which) {
case IRQCHIP_STATE_PENDING:
gicv5_lpi_irq_write_pending_state(d, state);
break;
default:
pr_debug("Unexpected irqchip_irq_state\n");
return -EINVAL;
}
return 0;
}
static int gicv5_spi_irq_retrigger(struct irq_data *data)
{
return !gicv5_spi_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING,
true);
}
static int gicv5_lpi_irq_retrigger(struct irq_data *data)
{
return !gicv5_lpi_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING,
true);
}
static void gicv5_ipi_send_single(struct irq_data *d, unsigned int cpu)
{
/* Mark the LPI pending */
irq_chip_retrigger_hierarchy(d);
}
static bool gicv5_ppi_irq_is_level(irq_hw_number_t hwirq)
{
u64 bit = BIT_ULL(hwirq % 64);
return !!(read_ppi_sysreg_s(hwirq, PPI_HM) & bit);
}
static int gicv5_ppi_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
{
if (vcpu)
irqd_set_forwarded_to_vcpu(d);
else
irqd_clr_forwarded_to_vcpu(d);
return 0;
}
static const struct irq_chip gicv5_ppi_irq_chip = {
.name = "GICv5-PPI",
.irq_mask = gicv5_ppi_irq_mask,
.irq_unmask = gicv5_ppi_irq_unmask,
.irq_eoi = gicv5_ppi_irq_eoi,
.irq_get_irqchip_state = gicv5_ppi_irq_get_irqchip_state,
.irq_set_irqchip_state = gicv5_ppi_irq_set_irqchip_state,
.irq_set_vcpu_affinity = gicv5_ppi_irq_set_vcpu_affinity,
.flags = IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND,
};
static const struct irq_chip gicv5_spi_irq_chip = {
.name = "GICv5-SPI",
.irq_mask = gicv5_spi_irq_mask,
.irq_unmask = gicv5_spi_irq_unmask,
.irq_eoi = gicv5_spi_irq_eoi,
.irq_set_type = gicv5_spi_irq_set_type,
.irq_set_affinity = gicv5_spi_irq_set_affinity,
.irq_retrigger = gicv5_spi_irq_retrigger,
.irq_get_irqchip_state = gicv5_spi_irq_get_irqchip_state,
.irq_set_irqchip_state = gicv5_spi_irq_set_irqchip_state,
.flags = IRQCHIP_SET_TYPE_MASKED |
IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND,
};
static const struct irq_chip gicv5_lpi_irq_chip = {
.name = "GICv5-LPI",
.irq_mask = gicv5_lpi_irq_mask,
.irq_unmask = gicv5_lpi_irq_unmask,
.irq_eoi = gicv5_lpi_irq_eoi,
.irq_set_affinity = gicv5_lpi_irq_set_affinity,
.irq_retrigger = gicv5_lpi_irq_retrigger,
.irq_get_irqchip_state = gicv5_lpi_irq_get_irqchip_state,
.irq_set_irqchip_state = gicv5_lpi_irq_set_irqchip_state,
.flags = IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND,
};
static const struct irq_chip gicv5_ipi_irq_chip = {
.name = "GICv5-IPI",
.irq_mask = irq_chip_mask_parent,
.irq_unmask = irq_chip_unmask_parent,
.irq_eoi = irq_chip_eoi_parent,
.irq_set_affinity = irq_chip_set_affinity_parent,
.irq_get_irqchip_state = irq_chip_get_parent_state,
.irq_set_irqchip_state = irq_chip_set_parent_state,
.ipi_send_single = gicv5_ipi_send_single,
.flags = IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND,
};
static __always_inline int gicv5_irq_domain_translate(struct irq_domain *d,
struct irq_fwspec *fwspec,
irq_hw_number_t *hwirq,
unsigned int *type,
const u8 hwirq_type)
{
unsigned int hwirq_trigger;
u8 fwspec_irq_type;
if (is_of_node(fwspec->fwnode)) {
if (fwspec->param_count < 3)
return -EINVAL;
fwspec_irq_type = fwspec->param[0];
if (fwspec->param[0] != hwirq_type)
return -EINVAL;
*hwirq = fwspec->param[1];
hwirq_trigger = fwspec->param[2];
}
if (is_fwnode_irqchip(fwspec->fwnode)) {
if (fwspec->param_count != 2)
return -EINVAL;
fwspec_irq_type = FIELD_GET(GICV5_HWIRQ_TYPE, fwspec->param[0]);
if (fwspec_irq_type != hwirq_type)
return -EINVAL;
*hwirq = FIELD_GET(GICV5_HWIRQ_ID, fwspec->param[0]);
hwirq_trigger = fwspec->param[1];
}
switch (hwirq_type) {
case GICV5_HWIRQ_TYPE_PPI:
/*
* Handling mode is hardcoded for PPIs, set the type using
* HW reported value.
*/
*type = gicv5_ppi_irq_is_level(*hwirq) ? IRQ_TYPE_LEVEL_LOW :
IRQ_TYPE_EDGE_RISING;
break;
case GICV5_HWIRQ_TYPE_SPI:
*type = hwirq_trigger & IRQ_TYPE_SENSE_MASK;
break;
default:
BUILD_BUG_ON(1);
}
return 0;
}
static int gicv5_irq_ppi_domain_translate(struct irq_domain *d,
struct irq_fwspec *fwspec,
irq_hw_number_t *hwirq,
unsigned int *type)
{
return gicv5_irq_domain_translate(d, fwspec, hwirq, type,
GICV5_HWIRQ_TYPE_PPI);
}
static int gicv5_irq_ppi_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
unsigned int type = IRQ_TYPE_NONE;
struct irq_fwspec *fwspec = arg;
irq_hw_number_t hwirq;
int ret;
if (WARN_ON_ONCE(nr_irqs != 1))
return -EINVAL;
ret = gicv5_irq_ppi_domain_translate(domain, fwspec, &hwirq, &type);
if (ret)
return ret;
if (type & IRQ_TYPE_LEVEL_MASK)
irq_set_status_flags(virq, IRQ_LEVEL);
irq_set_percpu_devid(virq);
irq_domain_set_info(domain, virq, hwirq, &gicv5_ppi_irq_chip, NULL,
handle_percpu_devid_irq, NULL, NULL);
return 0;
}
static void gicv5_irq_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *d;
if (WARN_ON_ONCE(nr_irqs != 1))
return;
d = irq_domain_get_irq_data(domain, virq);
irq_set_handler(virq, NULL);
irq_domain_reset_irq_data(d);
}
static int gicv5_irq_ppi_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token)
{
u32 hwirq_type;
if (fwspec->fwnode != d->fwnode)
return 0;
if (is_of_node(fwspec->fwnode))
hwirq_type = fwspec->param[0];
if (is_fwnode_irqchip(fwspec->fwnode))
hwirq_type = FIELD_GET(GICV5_HWIRQ_TYPE, fwspec->param[0]);
if (hwirq_type != GICV5_HWIRQ_TYPE_PPI)
return 0;
return (d == gicv5_global_data.ppi_domain);
}
static const struct irq_domain_ops gicv5_irq_ppi_domain_ops = {
.translate = gicv5_irq_ppi_domain_translate,
.alloc = gicv5_irq_ppi_domain_alloc,
.free = gicv5_irq_domain_free,
.select = gicv5_irq_ppi_domain_select
};
static int gicv5_irq_spi_domain_translate(struct irq_domain *d,
struct irq_fwspec *fwspec,
irq_hw_number_t *hwirq,
unsigned int *type)
{
return gicv5_irq_domain_translate(d, fwspec, hwirq, type,
GICV5_HWIRQ_TYPE_SPI);
}
static int gicv5_irq_spi_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct gicv5_irs_chip_data *chip_data;
unsigned int type = IRQ_TYPE_NONE;
struct irq_fwspec *fwspec = arg;
struct irq_data *irqd;
irq_hw_number_t hwirq;
int ret;
if (WARN_ON_ONCE(nr_irqs != 1))
return -EINVAL;
ret = gicv5_irq_spi_domain_translate(domain, fwspec, &hwirq, &type);
if (ret)
return ret;
irqd = irq_desc_get_irq_data(irq_to_desc(virq));
chip_data = gicv5_irs_lookup_by_spi_id(hwirq);
irq_domain_set_info(domain, virq, hwirq, &gicv5_spi_irq_chip, chip_data,
handle_fasteoi_irq, NULL, NULL);
irq_set_probe(virq);
irqd_set_single_target(irqd);
gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_SPI);
return 0;
}
static int gicv5_irq_spi_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token)
{
u32 hwirq_type;
if (fwspec->fwnode != d->fwnode)
return 0;
if (is_of_node(fwspec->fwnode))
hwirq_type = fwspec->param[0];
if (is_fwnode_irqchip(fwspec->fwnode))
hwirq_type = FIELD_GET(GICV5_HWIRQ_TYPE, fwspec->param[0]);
if (hwirq_type != GICV5_HWIRQ_TYPE_SPI)
return 0;
return (d == gicv5_global_data.spi_domain);
}
static const struct irq_domain_ops gicv5_irq_spi_domain_ops = {
.translate = gicv5_irq_spi_domain_translate,
.alloc = gicv5_irq_spi_domain_alloc,
.free = gicv5_irq_domain_free,
.select = gicv5_irq_spi_domain_select
};
static void gicv5_lpi_config_reset(struct irq_data *d)
{
u64 cdhm;
/*
* Reset LPIs handling mode to edge by default and clear pending
* state to make sure we start the LPI with a clean state from
* previous incarnations.
*/
cdhm = FIELD_PREP(GICV5_GIC_CDHM_HM_MASK, 0) |
FIELD_PREP(GICV5_GIC_CDHM_TYPE_MASK, GICV5_HWIRQ_TYPE_LPI) |
FIELD_PREP(GICV5_GIC_CDHM_ID_MASK, d->hwirq);
gic_insn(cdhm, CDHM);
gicv5_lpi_irq_write_pending_state(d, false);
}
static int gicv5_irq_lpi_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
irq_hw_number_t hwirq;
struct irq_data *irqd;
u32 *lpi = arg;
int ret;
if (WARN_ON_ONCE(nr_irqs != 1))
return -EINVAL;
hwirq = *lpi;
irqd = irq_domain_get_irq_data(domain, virq);
irq_domain_set_info(domain, virq, hwirq, &gicv5_lpi_irq_chip, NULL,
handle_fasteoi_irq, NULL, NULL);
irqd_set_single_target(irqd);
ret = gicv5_irs_iste_alloc(hwirq);
if (ret < 0)
return ret;
gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_LPI);
gicv5_lpi_config_reset(irqd);
return 0;
}
static const struct irq_domain_ops gicv5_irq_lpi_domain_ops = {
.alloc = gicv5_irq_lpi_domain_alloc,
.free = gicv5_irq_domain_free,
};
void __init gicv5_init_lpi_domain(void)
{
struct irq_domain *d;
d = irq_domain_create_tree(NULL, &gicv5_irq_lpi_domain_ops, NULL);
gicv5_global_data.lpi_domain = d;
}
void __init gicv5_free_lpi_domain(void)
{
irq_domain_remove(gicv5_global_data.lpi_domain);
gicv5_global_data.lpi_domain = NULL;
}
static int gicv5_irq_ipi_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct irq_data *irqd;
int ret, i;
u32 lpi;
for (i = 0; i < nr_irqs; i++) {
ret = gicv5_alloc_lpi();
if (ret < 0)
return ret;
lpi = ret;
ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi);
if (ret) {
gicv5_free_lpi(lpi);
return ret;
}
irqd = irq_domain_get_irq_data(domain, virq + i);
irq_domain_set_hwirq_and_chip(domain, virq + i, i,
&gicv5_ipi_irq_chip, NULL);
irqd_set_single_target(irqd);
irq_set_handler(virq + i, handle_percpu_irq);
}
return 0;
}
static void gicv5_irq_ipi_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *d;
unsigned int i;
for (i = 0; i < nr_irqs; i++) {
d = irq_domain_get_irq_data(domain, virq + i);
if (!d)
return;
gicv5_free_lpi(d->parent_data->hwirq);
irq_set_handler(virq + i, NULL);
irq_domain_reset_irq_data(d);
irq_domain_free_irqs_parent(domain, virq + i, 1);
}
}
static const struct irq_domain_ops gicv5_irq_ipi_domain_ops = {
.alloc = gicv5_irq_ipi_domain_alloc,
.free = gicv5_irq_ipi_domain_free,
};
static void handle_irq_per_domain(u32 hwirq)
{
u8 hwirq_type = FIELD_GET(GICV5_HWIRQ_TYPE, hwirq);
u32 hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, hwirq);
struct irq_domain *domain;
switch (hwirq_type) {
case GICV5_HWIRQ_TYPE_PPI:
domain = gicv5_global_data.ppi_domain;
break;
case GICV5_HWIRQ_TYPE_SPI:
domain = gicv5_global_data.spi_domain;
break;
case GICV5_HWIRQ_TYPE_LPI:
domain = gicv5_global_data.lpi_domain;
break;
default:
pr_err_once("Unknown IRQ type, bail out\n");
return;
}
if (generic_handle_domain_irq(domain, hwirq_id)) {
pr_err_once("Could not handle, hwirq = 0x%x", hwirq_id);
gicv5_hwirq_eoi(hwirq_id, hwirq_type);
}
}
static void __exception_irq_entry gicv5_handle_irq(struct pt_regs *regs)
{
bool valid;
u32 hwirq;
u64 ia;
ia = gicr_insn(CDIA);
valid = GICV5_GICR_CDIA_VALID(ia);
if (!valid)
return;
/*
* Ensure that the CDIA instruction effects (ie IRQ activation) are
* completed before handling the interrupt.
*/
gsb_ack();
/*
* Ensure instruction ordering between an acknowledgment and subsequent
* instructions in the IRQ handler using an ISB.
*/
isb();
hwirq = FIELD_GET(GICV5_HWIRQ_INTID, ia);
handle_irq_per_domain(hwirq);
}
static void gicv5_cpu_disable_interrupts(void)
{
u64 cr0;
cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 0);
write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
}
static void gicv5_cpu_enable_interrupts(void)
{
u64 cr0, pcr;
write_sysreg_s(0, SYS_ICC_PPI_ENABLER0_EL1);
write_sysreg_s(0, SYS_ICC_PPI_ENABLER1_EL1);
gicv5_ppi_priority_init();
pcr = FIELD_PREP(ICC_PCR_EL1_PRIORITY, GICV5_IRQ_PRI_MI);
write_sysreg_s(pcr, SYS_ICC_PCR_EL1);
cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 1);
write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
}
static int base_ipi_virq;
static int gicv5_starting_cpu(unsigned int cpu)
{
if (WARN(!gicv5_cpuif_has_gcie(),
"GICv5 system components present but CPU does not have FEAT_GCIE"))
return -ENODEV;
gicv5_cpu_enable_interrupts();
return gicv5_irs_register_cpu(cpu);
}
static void __init gicv5_smp_init(void)
{
unsigned int num_ipis = GICV5_IPIS_PER_CPU * nr_cpu_ids;
cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
"irqchip/arm/gicv5:starting",
gicv5_starting_cpu, NULL);
base_ipi_virq = irq_domain_alloc_irqs(gicv5_global_data.ipi_domain,
num_ipis, NUMA_NO_NODE, NULL);
if (WARN(base_ipi_virq <= 0, "IPI IRQ allocation was not successful"))
return;
set_smp_ipi_range_percpu(base_ipi_virq, GICV5_IPIS_PER_CPU, nr_cpu_ids);
}
static void __init gicv5_free_domains(void)
{
if (gicv5_global_data.ppi_domain)
irq_domain_remove(gicv5_global_data.ppi_domain);
if (gicv5_global_data.spi_domain)
irq_domain_remove(gicv5_global_data.spi_domain);
if (gicv5_global_data.ipi_domain)
irq_domain_remove(gicv5_global_data.ipi_domain);
gicv5_global_data.ppi_domain = NULL;
gicv5_global_data.spi_domain = NULL;
gicv5_global_data.ipi_domain = NULL;
}
static int __init gicv5_init_domains(struct fwnode_handle *handle)
{
u32 spi_count = gicv5_global_data.global_spi_count;
struct irq_domain *d;
d = irq_domain_create_linear(handle, PPI_NR, &gicv5_irq_ppi_domain_ops, NULL);
if (!d)
return -ENOMEM;
irq_domain_update_bus_token(d, DOMAIN_BUS_WIRED);
gicv5_global_data.ppi_domain = d;
if (spi_count) {
d = irq_domain_create_linear(handle, spi_count,
&gicv5_irq_spi_domain_ops, NULL);
if (!d) {
gicv5_free_domains();
return -ENOMEM;
}
gicv5_global_data.spi_domain = d;
irq_domain_update_bus_token(d, DOMAIN_BUS_WIRED);
}
if (!WARN(!gicv5_global_data.lpi_domain,
"LPI domain uninitialized, can't set up IPIs")) {
d = irq_domain_create_hierarchy(gicv5_global_data.lpi_domain,
0, GICV5_IPIS_PER_CPU * nr_cpu_ids,
NULL, &gicv5_irq_ipi_domain_ops,
NULL);
if (!d) {
gicv5_free_domains();
return -ENOMEM;
}
gicv5_global_data.ipi_domain = d;
}
gicv5_global_data.fwnode = handle;
return 0;
}
static void gicv5_set_cpuif_pribits(void)
{
u64 icc_idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1);
switch (FIELD_GET(ICC_IDR0_EL1_PRI_BITS, icc_idr0)) {
case ICC_IDR0_EL1_PRI_BITS_4BITS:
gicv5_global_data.cpuif_pri_bits = 4;
break;
case ICC_IDR0_EL1_PRI_BITS_5BITS:
gicv5_global_data.cpuif_pri_bits = 5;
break;
default:
pr_err("Unexpected ICC_IDR0_EL1_PRI_BITS value, default to 4");
gicv5_global_data.cpuif_pri_bits = 4;
break;
}
}
static void gicv5_set_cpuif_idbits(void)
{
u32 icc_idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1);
switch (FIELD_GET(ICC_IDR0_EL1_ID_BITS, icc_idr0)) {
case ICC_IDR0_EL1_ID_BITS_16BITS:
gicv5_global_data.cpuif_id_bits = 16;
break;
case ICC_IDR0_EL1_ID_BITS_24BITS:
gicv5_global_data.cpuif_id_bits = 24;
break;
default:
pr_err("Unexpected ICC_IDR0_EL1_ID_BITS value, default to 16");
gicv5_global_data.cpuif_id_bits = 16;
break;
}
}
#ifdef CONFIG_KVM
static struct gic_kvm_info gic_v5_kvm_info __initdata;
static void __init gic_of_setup_kvm_info(struct device_node *node)
{
/*
* If we don't have native GICv5 virtualisation support, then
* we also don't have FEAT_GCIE_LEGACY - the architecture
* forbids this combination.
*/
if (!gicv5_global_data.virt_capable) {
pr_info("GIC implementation is not virtualization capable\n");
return;
}
gic_v5_kvm_info.type = GIC_V5;
/* GIC Virtual CPU interface maintenance interrupt */
gic_v5_kvm_info.no_maint_irq_mask = false;
gic_v5_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
if (!gic_v5_kvm_info.maint_irq) {
pr_warn("cannot find GICv5 virtual CPU interface maintenance interrupt\n");
return;
}
vgic_set_kvm_info(&gic_v5_kvm_info);
}
#else
static inline void __init gic_of_setup_kvm_info(struct device_node *node)
{
}
#endif // CONFIG_KVM
static int __init gicv5_init_common(struct fwnode_handle *parent_domain)
{
int ret = gicv5_init_domains(parent_domain);
if (ret)
return ret;
gicv5_set_cpuif_pribits();
gicv5_set_cpuif_idbits();
pri_bits = min_not_zero(gicv5_global_data.cpuif_pri_bits,
gicv5_global_data.irs_pri_bits);
ret = gicv5_starting_cpu(smp_processor_id());
if (ret)
goto out_dom;
ret = set_handle_irq(gicv5_handle_irq);
if (ret)
goto out_int;
ret = gicv5_irs_enable();
if (ret)
goto out_int;
gicv5_smp_init();
gicv5_irs_its_probe();
return 0;
out_int:
gicv5_cpu_disable_interrupts();
out_dom:
gicv5_free_domains();
return ret;
}
static int __init gicv5_of_init(struct device_node *node, struct device_node *parent)
{
int ret = gicv5_irs_of_probe(node);
if (ret)
return ret;
ret = gicv5_init_common(of_fwnode_handle(node));
if (ret)
goto out_irs;
gic_of_setup_kvm_info(node);
return 0;
out_irs:
gicv5_irs_remove();
return ret;
}
IRQCHIP_DECLARE(gic_v5, "arm,gic-v5", gicv5_of_init);
#ifdef CONFIG_ACPI
static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header,
struct acpi_probe_entry *ape)
{
struct acpi_madt_gicv5_irs *irs = (struct acpi_madt_gicv5_irs *)header;
return (irs->version == ape->driver_data);
}
static struct fwnode_handle *gsi_domain_handle;
static struct fwnode_handle *gic_v5_get_gsi_domain_id(u32 gsi)
{
if (FIELD_GET(GICV5_GSI_IC_TYPE, gsi) == GICV5_GSI_IWB_TYPE)
return iort_iwb_handle(FIELD_GET(GICV5_GSI_IWB_FRAME_ID, gsi));
return gsi_domain_handle;
}
static int __init gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_gicv5_irs *irs = (struct acpi_madt_gicv5_irs *)header;
int ret;
if (gsi_domain_handle)
return 0;
gsi_domain_handle = irq_domain_alloc_fwnode(&irs->config_base_address);
if (!gsi_domain_handle)
return -ENOMEM;
ret = gicv5_irs_acpi_probe();
if (ret)
goto out_fwnode;
ret = gicv5_init_common(gsi_domain_handle);
if (ret)
goto out_irs;
acpi_set_irq_model(ACPI_IRQ_MODEL_GIC_V5, gic_v5_get_gsi_domain_id);
return 0;
out_irs:
gicv5_irs_remove();
out_fwnode:
irq_domain_free_fwnode(gsi_domain_handle);
return ret;
}
IRQCHIP_ACPI_DECLARE(gic_v5, ACPI_MADT_TYPE_GICV5_IRS,
acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V5,
gic_acpi_init);
#endif