arm64 fixes for -rc2

- Fix cpufreq warning due to attempting a cross-call with interrupts
   masked when reading local AMU counters.
 
 - Fix DEBUG_PREEMPT warning from the delay loop when it tries to access
   per-cpu errata workaround state for the virtual counter.
 
 - Re-jig and optimise our TLB invalidation errata workarounds in
   preparation for more hardware brokenness.
 
 - Fix GCS mappings to interact properly with PROT_NONE and to avoid
   corrupting the pte on CPUs with FEAT_LPA2.
 
 - Fix ioremap_prot() to extract only the memory attributes from the
   user pte and ignore all the other 'prot' bits.
 -----BEGIN PGP SIGNATURE-----
 
 iQFEBAABCgAuFiEEPxTL6PPUbjXGY88ct6xw3ITBYzQFAmmh458QHHdpbGxAa2Vy
 bmVsLm9yZwAKCRC3rHDchMFjNOA1B/9RhZoGSDEuTKVjue1UtVIaFXs5v0LFqgIf
 c5eCtYzHU2djw0zVUv4DHZaPvxmxjzu4G4safzjB23IwMDkeGM2hmDBigw/HgWXN
 Nm46UWFnmWrd/58w895r3QAe5wbTuhwzVdj9bUbYNGf9/Tqey/hbn8e2BJFdKC1H
 Dt/uBYNuyRLSe94iYTzKcWPKH5CnSAkQ7lshxOhHSq5WF+ybmNywJnSqcTy+2kHb
 cU9/TXWd6ck5MqK/pzu9nCLPPGpSmOYCjWPiEB2pWiHRPlykK3r7fhvYlqfySRtx
 r14TlLIpevstCc8dr7NS2rj6lkjXKzggWOm0fD/rPusSj5Jn7qkk
 =MmjE
 -----END PGP SIGNATURE-----

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
 "The diffstat is dominated by changes to our TLB invalidation errata
  handling and the introduction of a new GCS selftest to catch one of
  the issues that is fixed here relating to PROT_NONE mappings.

   - Fix cpufreq warning due to attempting a cross-call with interrupts
     masked when reading local AMU counters

   - Fix DEBUG_PREEMPT warning from the delay loop when it tries to
     access per-cpu errata workaround state for the virtual counter

   - Re-jig and optimise our TLB invalidation errata workarounds in
     preparation for more hardware brokenness

   - Fix GCS mappings to interact properly with PROT_NONE and to avoid
     corrupting the pte on CPUs with FEAT_LPA2

   - Fix ioremap_prot() to extract only the memory attributes from the
     user pte and ignore all the other 'prot' bits"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: topology: Fix false warning in counters_read_on_cpu() for same-CPU reads
  arm64: Fix sampling the "stable" virtual counter in preemptible section
  arm64: tlb: Optimize ARM64_WORKAROUND_REPEAT_TLBI
  arm64: tlb: Allow XZR argument to TLBI ops
  kselftest: arm64: Check access to GCS after mprotect(PROT_NONE)
  arm64: gcs: Honour mprotect(PROT_NONE) on shadow stack mappings
  arm64: gcs: Do not set PTE_SHARED on GCS mappings if FEAT_LPA2 is enabled
  arm64: io: Extract user memory type in ioremap_prot()
  arm64: io: Rename ioremap_prot() to __ioremap_prot()
This commit is contained in:
Linus Torvalds 2026-02-27 13:40:30 -08:00
commit 4d349ee5c7
14 changed files with 179 additions and 60 deletions

View file

@ -264,19 +264,33 @@ __iowrite64_copy(void __iomem *to, const void *from, size_t count)
typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size,
pgprot_t *prot);
int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
void __iomem *__ioremap_prot(phys_addr_t phys, size_t size, pgprot_t prot);
static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size,
pgprot_t user_prot)
{
pgprot_t prot;
ptdesc_t user_prot_val = pgprot_val(user_prot);
if (WARN_ON_ONCE(!(user_prot_val & PTE_USER)))
return NULL;
prot = __pgprot_modify(PAGE_KERNEL, PTE_ATTRINDX_MASK,
user_prot_val & PTE_ATTRINDX_MASK);
return __ioremap_prot(phys, size, prot);
}
#define ioremap_prot ioremap_prot
#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
#define ioremap(addr, size) \
__ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) \
ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
__ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
#define ioremap_np(addr, size) \
ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
__ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
#define ioremap_encrypted(addr, size) \
ioremap_prot((addr), (size), PAGE_KERNEL)
__ioremap_prot((addr), (size), PAGE_KERNEL)
/*
* io{read,write}{16,32,64}be() macros
@ -297,7 +311,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
if (pfn_is_map_memory(__phys_to_pfn(addr)))
return (void __iomem *)__phys_to_virt(addr);
return ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
return __ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
}
/*

View file

@ -164,9 +164,6 @@ static inline bool __pure lpa2_is_enabled(void)
#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER)
#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER)
#define PAGE_GCS __pgprot(_PAGE_GCS)
#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
#define PIE_E0 ( \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \

View file

@ -31,19 +31,11 @@
*/
#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \
"tlbi " #op "\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op, \
ARM64_WORKAROUND_REPEAT_TLBI, \
CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : )
#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \
"tlbi " #op ", %0\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op ", %0", \
ARM64_WORKAROUND_REPEAT_TLBI, \
CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : "r" (arg))
"tlbi " #op ", %x0\n" \
: : "rZ" (arg))
#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
@ -181,6 +173,34 @@ static inline unsigned long get_trans_granule(void)
(__pages >> (5 * (scale) + 1)) - 1; \
})
#define __repeat_tlbi_sync(op, arg...) \
do { \
if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) \
break; \
__tlbi(op, ##arg); \
dsb(ish); \
} while (0)
/*
* Complete broadcast TLB maintenance issued by the host which invalidates
* stage 1 information in the host's own translation regime.
*/
static inline void __tlbi_sync_s1ish(void)
{
dsb(ish);
__repeat_tlbi_sync(vale1is, 0);
}
/*
* Complete broadcast TLB maintenance issued by hyp code which invalidates
* stage 1 translation information in any translation regime.
*/
static inline void __tlbi_sync_s1ish_hyp(void)
{
dsb(ish);
__repeat_tlbi_sync(vale2is, 0);
}
/*
* TLB Invalidation
* ================
@ -279,7 +299,7 @@ static inline void flush_tlb_all(void)
{
dsb(ishst);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish();
isb();
}
@ -291,7 +311,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
asid = __TLBI_VADDR(0, ASID(mm));
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
dsb(ish);
__tlbi_sync_s1ish();
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
@ -345,20 +365,11 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr)
{
flush_tlb_page_nosync(vma, uaddr);
dsb(ish);
__tlbi_sync_s1ish();
}
static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
{
/*
* TLB flush deferral is not required on systems which are affected by
* ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
* will have two consecutive TLBI instructions with a dsb(ish) in between
* defeating the purpose (i.e save overall 'dsb ish' cost).
*/
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
return false;
return true;
}
@ -374,7 +385,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
*/
static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
dsb(ish);
__tlbi_sync_s1ish();
}
/*
@ -509,7 +520,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
{
__flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
last_level, tlb_level);
dsb(ish);
__tlbi_sync_s1ish();
}
static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
@ -557,7 +568,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
dsb(ishst);
__flush_tlb_range_op(vaale1is, start, pages, stride, 0,
TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
dsb(ish);
__tlbi_sync_s1ish();
isb();
}
@ -571,7 +582,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
dsb(ishst);
__tlbi(vaae1is, addr);
dsb(ish);
__tlbi_sync_s1ish();
isb();
}

View file

@ -377,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
prot = __acpi_get_writethrough_mem_attribute();
}
}
return ioremap_prot(phys, size, prot);
return __ioremap_prot(phys, size, prot);
}
/*

View file

@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
* We pick the reserved-ASID to minimise the impact.
*/
__tlbi(aside1is, __TLBI_VADDR(0, 0));
dsb(ish);
__tlbi_sync_s1ish();
}
ret = caches_clean_inval_user_pou(start, start + chunk);

View file

@ -400,16 +400,25 @@ static inline
int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
{
/*
* Abort call on counterless CPU or when interrupts are
* disabled - can lead to deadlock in smp sync call.
* Abort call on counterless CPU.
*/
if (!cpu_has_amu_feat(cpu))
return -EOPNOTSUPP;
if (WARN_ON_ONCE(irqs_disabled()))
return -EPERM;
smp_call_function_single(cpu, func, val, 1);
if (irqs_disabled()) {
/*
* When IRQs are disabled (tick path: sched_tick ->
* topology_scale_freq_tick or cppc_scale_freq_tick), only local
* CPU counter reads are allowed. Remote CPU counter read would
* require smp_call_function_single() which is unsafe with IRQs
* disabled.
*/
if (WARN_ON_ONCE(cpu != smp_processor_id()))
return -EPERM;
func(val);
} else {
smp_call_function_single(cpu, func, val, 1);
}
return 0;
}

View file

@ -271,7 +271,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
*/
dsb(ishst);
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
}

View file

@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
*/
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@ -226,7 +226,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@ -240,7 +240,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@ -266,5 +266,5 @@ void __kvm_flush_vm_context(void)
/* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
}

View file

@ -501,7 +501,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
*unmapped += granule;
}
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
mm_ops->put_page(ctx->ptep);

View file

@ -115,7 +115,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
*/
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@ -176,7 +176,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@ -192,7 +192,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
exit_vmid_context(&cxt);
@ -217,7 +217,7 @@ void __kvm_flush_vm_context(void)
{
dsb(ishst);
__tlbi(alle1is);
dsb(ish);
__tlbi_sync_s1ish_hyp();
}
/*
@ -358,7 +358,7 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding)
default:
ret = -EINVAL;
}
dsb(ish);
__tlbi_sync_s1ish_hyp();
isb();
if (mmu)

View file

@ -32,7 +32,11 @@ static inline unsigned long xloops_to_cycles(unsigned long xloops)
* Note that userspace cannot change the offset behind our back either,
* as the vcpu mutex is held as long as KVM_RUN is in progress.
*/
#define __delay_cycles() __arch_counter_get_cntvct_stable()
static cycles_t notrace __delay_cycles(void)
{
guard(preempt_notrace)();
return __arch_counter_get_cntvct_stable();
}
void __delay(unsigned long cycles)
{

View file

@ -14,8 +14,8 @@ int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook)
return 0;
}
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
pgprot_t pgprot)
void __iomem *__ioremap_prot(phys_addr_t phys_addr, size_t size,
pgprot_t pgprot)
{
unsigned long last_addr = phys_addr + size - 1;
@ -39,7 +39,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
return generic_ioremap_prot(phys_addr, size, pgprot);
}
EXPORT_SYMBOL(ioremap_prot);
EXPORT_SYMBOL(__ioremap_prot);
/*
* Must be called after early_fixmap_init

View file

@ -34,6 +34,8 @@ static pgprot_t protection_map[16] __ro_after_init = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC
};
static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO;
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
* away in the future.
@ -73,9 +75,11 @@ static int __init adjust_protection_map(void)
protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY;
}
if (lpa2_is_enabled())
if (lpa2_is_enabled()) {
for (int i = 0; i < ARRAY_SIZE(protection_map); i++)
pgprot_val(protection_map[i]) &= ~PTE_SHARED;
gcs_page_prot &= ~PTE_SHARED;
}
return 0;
}
@ -87,7 +91,11 @@ pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
/* Short circuit GCS to avoid bloating the table. */
if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
prot = _PAGE_GCS_RO;
/* Honour mprotect(PROT_NONE) on shadow stack mappings */
if (vm_flags & VM_ACCESS_FLAGS)
prot = gcs_page_prot;
else
prot = pgprot_val(protection_map[VM_NONE]);
} else {
prot = pgprot_val(protection_map[vm_flags &
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);

View file

@ -0,0 +1,76 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2026 ARM Limited
*/
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
#include "testcases.h"
static uint64_t *gcs_page;
static bool post_mprotect;
#ifndef __NR_map_shadow_stack
#define __NR_map_shadow_stack 453
#endif
static bool alloc_gcs(struct tdescr *td)
{
long page_size = sysconf(_SC_PAGE_SIZE);
gcs_page = (void *)syscall(__NR_map_shadow_stack, 0,
page_size, 0);
if (gcs_page == MAP_FAILED) {
fprintf(stderr, "Failed to map %ld byte GCS: %d\n",
page_size, errno);
return false;
}
return true;
}
static int gcs_prot_none_fault_trigger(struct tdescr *td)
{
/* Verify that the page is readable (ie, not completely unmapped) */
fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]);
if (mprotect(gcs_page, sysconf(_SC_PAGE_SIZE), PROT_NONE) != 0) {
fprintf(stderr, "mprotect(PROT_NONE) failed: %d\n", errno);
return 0;
}
post_mprotect = true;
/* This should trigger a fault if PROT_NONE is honoured for the GCS page */
fprintf(stderr, "Read value after mprotect(PROT_NONE) 0x%lx\n", gcs_page[0]);
return 0;
}
static int gcs_prot_none_fault_signal(struct tdescr *td, siginfo_t *si,
ucontext_t *uc)
{
ASSERT_GOOD_CONTEXT(uc);
/* A fault before mprotect(PROT_NONE) is unexpected. */
if (!post_mprotect)
return 0;
return 1;
}
struct tdescr tde = {
.name = "GCS PROT_NONE fault",
.descr = "Read from GCS after mprotect(PROT_NONE) segfaults",
.feats_required = FEAT_GCS,
.timeout = 3,
.sig_ok = SIGSEGV,
.sanity_disabled = true,
.init = alloc_gcs,
.trigger = gcs_prot_none_fault_trigger,
.run = gcs_prot_none_fault_signal,
};