x86/mm: simplify clear_page_*

clear_page_rep() and clear_page_erms() are wrappers around "REP; STOS"
variations.  Inlining gets rid of an unnecessary CALL/RET (which isn't
free when using RETHUNK speculative execution mitigations.) Fixup and
rename clear_page_orig() to adapt to the changed calling convention.

Also add a comment from Dave Hansen detailing various clearing mechanisms
used in clear_page().

Link: https://lkml.kernel.org/r/20260107072009.1615991-5-ankur.a.arora@oracle.com
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Tested-by: Raghavendra K T <raghavendra.kt@amd.com>
Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Konrad Rzessutek Wilk <konrad.wilk@oracle.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Li Zhe <lizhe.67@bytedance.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Ankur Arora 2026-01-06 23:20:05 -08:00 committed by Andrew Morton
parent 8d846b723e
commit 54a6b89a3d
3 changed files with 66 additions and 46 deletions

View file

@ -17,6 +17,12 @@ extern unsigned long __phys_addr(unsigned long);
#include <linux/string.h>
/**
* clear_page() - clear a page using a kernel virtual address.
* @page: address of kernel page
*
* Does absolutely no exception handling.
*/
static inline void clear_page(void *page)
{
memset(page, 0, PAGE_SIZE);

View file

@ -48,26 +48,63 @@ static inline unsigned long __phys_addr_symbol(unsigned long x)
#define __phys_reloc_hide(x) (x)
void clear_page_orig(void *page);
void clear_page_rep(void *page);
void clear_page_erms(void *page);
KCFI_REFERENCE(clear_page_orig);
KCFI_REFERENCE(clear_page_rep);
KCFI_REFERENCE(clear_page_erms);
void __clear_pages_unrolled(void *page);
KCFI_REFERENCE(__clear_pages_unrolled);
static inline void clear_page(void *page)
/**
* clear_page() - clear a page using a kernel virtual address.
* @addr: address of kernel page
*
* Switch between three implementations of page clearing based on CPU
* capabilities:
*
* - __clear_pages_unrolled(): the oldest, slowest and universally
* supported method. Zeroes via 8-byte MOV instructions unrolled 8x
* to write a 64-byte cacheline in each loop iteration.
*
* - "REP; STOSQ": really old CPUs had crummy REP implementations.
* Vendor CPU setup code sets 'REP_GOOD' on CPUs where REP can be
* trusted. The instruction writes 8-byte per REP iteration but
* CPUs can internally batch these together and do larger writes.
*
* - "REP; STOSB": used on CPUs with "enhanced REP MOVSB/STOSB",
* which enumerate 'ERMS' and provide an implementation which
* unlike "REP; STOSQ" above wasn't overly picky about alignment.
* The instruction writes 1-byte per REP iteration with CPUs
* internally batching these together into larger writes and is
* generally fastest of the three.
*
* Note that when running as a guest, features exposed by the CPU
* might be mediated by the hypervisor. So, the STOSQ variant might
* be in active use on some systems even when the hardware enumerates
* ERMS.
*
* Does absolutely no exception handling.
*/
static inline void clear_page(void *addr)
{
u64 len = PAGE_SIZE;
/*
* Clean up KMSAN metadata for the page being cleared. The assembly call
* below clobbers @page, so we perform unpoisoning before it.
* below clobbers @addr, so perform unpoisoning before it.
*/
kmsan_unpoison_memory(page, PAGE_SIZE);
alternative_call_2(clear_page_orig,
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,
"=D" (page),
"D" (page),
"cc", "memory", "rax", "rcx");
kmsan_unpoison_memory(addr, len);
/*
* The inline asm embeds a CALL instruction and usually that is a no-no
* due to the compiler not knowing that and thus being unable to track
* callee-clobbered registers.
*
* In this case that is fine because the registers clobbered by
* __clear_pages_unrolled() are part of the inline asm register
* specification.
*/
asm volatile(ALTERNATIVE_2("call __clear_pages_unrolled",
"shrq $3, %%rcx; rep stosq", X86_FEATURE_REP_GOOD,
"rep stosb", X86_FEATURE_ERMS)
: "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT
: "a" (0)
: "cc", "memory");
}
void copy_page(void *to, void *from);

View file

@ -6,30 +6,15 @@
#include <asm/asm.h>
/*
* Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
* recommended to use this when possible and we do use them by default.
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
* Otherwise, use original.
* Zero page aligned region.
* %rdi - dest
* %rcx - length
*/
/*
* Zero a page.
* %rdi - page
*/
SYM_TYPED_FUNC_START(clear_page_rep)
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
RET
SYM_FUNC_END(clear_page_rep)
EXPORT_SYMBOL_GPL(clear_page_rep)
SYM_TYPED_FUNC_START(clear_page_orig)
xorl %eax,%eax
movl $4096/64,%ecx
SYM_TYPED_FUNC_START(__clear_pages_unrolled)
shrq $6, %rcx
.p2align 4
.Lloop:
decl %ecx
decq %rcx
#define PUT(x) movq %rax,x*8(%rdi)
movq %rax,(%rdi)
PUT(1)
@ -43,16 +28,8 @@ SYM_TYPED_FUNC_START(clear_page_orig)
jnz .Lloop
nop
RET
SYM_FUNC_END(clear_page_orig)
EXPORT_SYMBOL_GPL(clear_page_orig)
SYM_TYPED_FUNC_START(clear_page_erms)
movl $4096,%ecx
xorl %eax,%eax
rep stosb
RET
SYM_FUNC_END(clear_page_erms)
EXPORT_SYMBOL_GPL(clear_page_erms)
SYM_FUNC_END(__clear_pages_unrolled)
EXPORT_SYMBOL_GPL(__clear_pages_unrolled)
/*
* Default clear user-space.