mm.git review status for linus..mm-stable

Total patches:       36
 Reviews/patch:       1.77
 Reviewed rate:       83%
 
 - The 2 patch series "mm/vmscan: fix demotion targets checks in
   reclaim/demotion" from Bing Jiao fixes a couple of issues in the
   demotion code - pages were failed demotion and were finding themselves
   demoted into disallowed nodes.
 
 - The 11 patch series "Remove XA_ZERO from error recovery of dup_mmap()"
   from Liam Howlett fixes a rare mapledtree race and performs a number of
   cleanups.
 
 - The 13 patch series "mm: add bitmap VMA flag helpers and convert all
   mmap_prepare to use them" from Lorenzo Stoakes implements a lot of
   cleanups following on from the conversion of the VMA flags into a
   bitmap.
 
 - The 5 patch series "support batch checking of references and unmapping
   for large folios" from Baolin Wang implements batching to greatly
   improve the performance of reclaiming clean file-backed large folios.
 
 - The 3 patch series "selftests/mm: add memory failure selftests" from
   Miaohe Lin does as claimed.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaZaIEQAKCRDdBJ7gKXxA
 jj73AQCQDwLoipDiQRGyjB5BDYydymWuDoiB1tlDPHfYAP3b/QD/UQtVlOEXqwM3
 naOKs3NQ1pwnfhDaQMirGw2eAnJ1SQY=
 =6Iif
 -----END PGP SIGNATURE-----

Merge tag 'mm-stable-2026-02-18-19-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull more MM  updates from Andrew Morton:

 - "mm/vmscan: fix demotion targets checks in reclaim/demotion" fixes a
   couple of issues in the demotion code - pages were failed demotion
   and were finding themselves demoted into disallowed nodes (Bing Jiao)

 - "Remove XA_ZERO from error recovery of dup_mmap()" fixes a rare
   mapledtree race and performs a number of cleanups (Liam Howlett)

 - "mm: add bitmap VMA flag helpers and convert all mmap_prepare to use
   them" implements a lot of cleanups following on from the conversion
   of the VMA flags into a bitmap (Lorenzo Stoakes)

 - "support batch checking of references and unmapping for large folios"
   implements batching to greatly improve the performance of reclaiming
   clean file-backed large folios (Baolin Wang)

 - "selftests/mm: add memory failure selftests" does as claimed (Miaohe
   Lin)

* tag 'mm-stable-2026-02-18-19-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (36 commits)
  mm/page_alloc: clear page->private in free_pages_prepare()
  selftests/mm: add memory failure dirty pagecache test
  selftests/mm: add memory failure clean pagecache test
  selftests/mm: add memory failure anonymous page test
  mm: rmap: support batched unmapping for file large folios
  arm64: mm: implement the architecture-specific clear_flush_young_ptes()
  arm64: mm: support batch clearing of the young flag for large folios
  arm64: mm: factor out the address and ptep alignment into a new helper
  mm: rmap: support batched checks of the references for large folios
  tools/testing/vma: add VMA userland tests for VMA flag functions
  tools/testing/vma: separate out vma_internal.h into logical headers
  tools/testing/vma: separate VMA userland tests into separate files
  mm: make vm_area_desc utilise vma_flags_t only
  mm: update all remaining mmap_prepare users to use vma_flags_t
  mm: update shmem_[kernel]_file_*() functions to use vma_flags_t
  mm: update secretmem to use VMA flags on mmap_prepare
  mm: update hugetlbfs to use VMA flags on mmap_prepare
  mm: add basic VMA flag operation helper functions
  tools: bitmap: add missing bitmap_[subset(), andnot()]
  mm: add mk_vma_flags() bitmap flag macro helper
  ...
This commit is contained in:
Linus Torvalds 2026-02-18 20:50:32 -08:00
commit eeccf287a2
82 changed files with 3941 additions and 2521 deletions

View file

@ -11845,6 +11845,7 @@ F: include/linux/memory-failure.h
F: include/trace/events/memory-failure.h
F: mm/hwpoison-inject.c
F: mm/memory-failure.c
F: tools/testing/selftests/mm/memory-failure.c
HYCON HY46XX TOUCHSCREEN SUPPORT
M: Giulio Benetti <giulio.benetti@benettiengineering.com>

View file

@ -1648,10 +1648,10 @@ extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
unsigned long addr, pte_t *ptep,
unsigned int nr, int full);
extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep);
extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep);
int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep, unsigned int nr);
int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep, unsigned int nr);
extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr);
extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
@ -1823,7 +1823,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
if (likely(!pte_valid_cont(orig_pte)))
return __ptep_test_and_clear_young(vma, addr, ptep);
return contpte_ptep_test_and_clear_young(vma, addr, ptep);
return contpte_test_and_clear_young_ptes(vma, addr, ptep, 1);
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@ -1835,7 +1835,18 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
if (likely(!pte_valid_cont(orig_pte)))
return __ptep_clear_flush_young(vma, addr, ptep);
return contpte_ptep_clear_flush_young(vma, addr, ptep);
return contpte_clear_flush_young_ptes(vma, addr, ptep, 1);
}
#define clear_flush_young_ptes clear_flush_young_ptes
static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
unsigned int nr)
{
if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
return __ptep_clear_flush_young(vma, addr, ptep);
return contpte_clear_flush_young_ptes(vma, addr, ptep, nr);
}
#define wrprotect_ptes wrprotect_ptes

View file

@ -26,6 +26,26 @@ static inline pte_t *contpte_align_down(pte_t *ptep)
return PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
}
static inline pte_t *contpte_align_addr_ptep(unsigned long *start,
unsigned long *end, pte_t *ptep,
unsigned int nr)
{
/*
* Note: caller must ensure these nr PTEs are consecutive (present)
* PTEs that map consecutive pages of the same large folio within a
* single VMA and a single page table.
*/
if (pte_cont(__ptep_get(ptep + nr - 1)))
*end = ALIGN(*end, CONT_PTE_SIZE);
if (pte_cont(__ptep_get(ptep))) {
*start = ALIGN_DOWN(*start, CONT_PTE_SIZE);
ptep = contpte_align_down(ptep);
}
return ptep;
}
static void contpte_try_unfold_partial(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr)
{
@ -488,8 +508,9 @@ pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
}
EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes);
int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
unsigned int nr)
{
/*
* ptep_clear_flush_young() technically requires us to clear the access
@ -498,41 +519,45 @@ int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
* contig range when the range is covered by a single folio, we can get
* away with clearing young for the whole contig range here, so we avoid
* having to unfold.
*
* The 'nr' means consecutive (present) PTEs that map consecutive pages
* of the same large folio in a single VMA and a single page table.
*/
unsigned long end = addr + nr * PAGE_SIZE;
int young = 0;
int i;
ptep = contpte_align_down(ptep);
addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE)
ptep = contpte_align_addr_ptep(&addr, &end, ptep, nr);
for (; addr != end; ptep++, addr += PAGE_SIZE)
young |= __ptep_test_and_clear_young(vma, addr, ptep);
return young;
}
EXPORT_SYMBOL_GPL(contpte_ptep_test_and_clear_young);
EXPORT_SYMBOL_GPL(contpte_test_and_clear_young_ptes);
int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
unsigned int nr)
{
int young;
young = contpte_ptep_test_and_clear_young(vma, addr, ptep);
young = contpte_test_and_clear_young_ptes(vma, addr, ptep, nr);
if (young) {
unsigned long end = addr + nr * PAGE_SIZE;
contpte_align_addr_ptep(&addr, &end, ptep, nr);
/*
* See comment in __ptep_clear_flush_young(); same rationale for
* eliding the trailing DSB applies here.
*/
addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
__flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE,
__flush_tlb_range_nosync(vma->vm_mm, addr, end,
PAGE_SIZE, true, 3);
}
return young;
}
EXPORT_SYMBOL_GPL(contpte_ptep_clear_flush_young);
EXPORT_SYMBOL_GPL(contpte_clear_flush_young_ptes);
void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr)
@ -569,14 +594,7 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
unsigned long start = addr;
unsigned long end = start + nr * PAGE_SIZE;
if (pte_cont(__ptep_get(ptep + nr - 1)))
end = ALIGN(end, CONT_PTE_SIZE);
if (pte_cont(__ptep_get(ptep))) {
start = ALIGN_DOWN(start, CONT_PTE_SIZE);
ptep = contpte_align_down(ptep);
}
ptep = contpte_align_addr_ptep(&start, &end, ptep, nr);
__clear_young_dirty_ptes(vma, start, ptep, (end - start) / PAGE_SIZE, flags);
}
EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);

View file

@ -83,7 +83,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
encl_size = secs->size + PAGE_SIZE;
backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
VM_NORESERVE);
mk_vma_flags(VMA_NORESERVE_BIT));
if (IS_ERR(backing)) {
ret = PTR_ERR(backing);
goto err_out_shrink;

View file

@ -306,7 +306,7 @@ static unsigned zero_mmap_capabilities(struct file *file)
/* can't do an in-place private mapping if there's no MMU */
static inline int private_mapping_ok(struct vm_area_desc *desc)
{
return is_nommu_shared_mapping(desc->vm_flags);
return is_nommu_shared_vma_flags(&desc->vma_flags);
}
#else
@ -360,7 +360,7 @@ static int mmap_mem_prepare(struct vm_area_desc *desc)
desc->vm_ops = &mmap_mem_ops;
/* Remap-pfn-range will mark the range VM_IO. */
/* Remap-pfn-range will mark the range with the I/O flag. */
mmap_action_remap_full(desc, desc->pgoff);
/* We filter remap errors to -EAGAIN. */
desc->action.error_hook = mmap_filter_error;
@ -520,7 +520,7 @@ static int mmap_zero_prepare(struct vm_area_desc *desc)
#ifndef CONFIG_MMU
return -ENOSYS;
#endif
if (desc->vm_flags & VM_SHARED)
if (vma_desc_test_flags(desc, VMA_SHARED_BIT))
return shmem_zero_setup_desc(desc);
desc->action.success_hook = mmap_zero_private_success;

View file

@ -13,7 +13,7 @@
#include "dax-private.h"
#include "bus.h"
static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags,
static int __check_vma(struct dev_dax *dev_dax, vma_flags_t flags,
unsigned long start, unsigned long end, struct file *file,
const char *func)
{
@ -24,7 +24,7 @@ static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags,
return -ENXIO;
/* prevent private mappings from being established */
if ((vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
if (!vma_flags_test(&flags, VMA_MAYSHARE_BIT)) {
dev_info_ratelimited(dev,
"%s: %s: fail, attempted private mapping\n",
current->comm, func);
@ -53,7 +53,7 @@ static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags,
static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
const char *func)
{
return __check_vma(dev_dax, vma->vm_flags, vma->vm_start, vma->vm_end,
return __check_vma(dev_dax, vma->flags, vma->vm_start, vma->vm_end,
vma->vm_file, func);
}
@ -306,14 +306,14 @@ static int dax_mmap_prepare(struct vm_area_desc *desc)
* fault time.
*/
id = dax_read_lock();
rc = __check_vma(dev_dax, desc->vm_flags, desc->start, desc->end, filp,
rc = __check_vma(dev_dax, desc->vma_flags, desc->start, desc->end, filp,
__func__);
dax_read_unlock(id);
if (rc)
return rc;
desc->vm_ops = &dax_vm_ops;
desc->vm_flags |= VM_HUGEPAGE;
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
return 0;
}

View file

@ -186,15 +186,16 @@ int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj,
{
struct vfsmount *huge_mnt;
struct file *filp;
const vma_flags_t flags = mk_vma_flags(VMA_NORESERVE_BIT);
drm_gem_private_object_init(dev, obj, size);
huge_mnt = drm_gem_get_huge_mnt(dev);
if (huge_mnt)
filp = shmem_file_setup_with_mnt(huge_mnt, "drm mm object",
size, VM_NORESERVE);
size, flags);
else
filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
filp = shmem_file_setup("drm mm object", size, flags);
if (IS_ERR(filp))
return PTR_ERR(filp);

View file

@ -499,7 +499,7 @@ static int __create_shmem(struct drm_i915_private *i915,
resource_size_t size,
unsigned int flags)
{
unsigned long shmem_flags = VM_NORESERVE;
const vma_flags_t shmem_flags = mk_vma_flags(VMA_NORESERVE_BIT);
struct vfsmount *huge_mnt;
struct file *filp;

View file

@ -200,7 +200,8 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
struct address_space *mapping;
gfp_t mask;
filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE);
filp = shmem_file_setup("i915-shmem-tt", size,
mk_vma_flags(VMA_NORESERVE_BIT));
if (IS_ERR(filp))
return PTR_ERR(filp);

View file

@ -19,7 +19,8 @@ struct file *shmem_create_from_data(const char *name, void *data, size_t len)
struct file *file;
int err;
file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE);
file = shmem_file_setup(name, PAGE_ALIGN(len),
mk_vma_flags(VMA_NORESERVE_BIT));
if (IS_ERR(file))
return file;

View file

@ -143,7 +143,7 @@ static void ttm_tt_fini_shmem(struct kunit *test)
err = ttm_tt_init(tt, bo, 0, caching, 0);
KUNIT_ASSERT_EQ(test, err, 0);
shmem = shmem_file_setup("ttm swap", BO_SIZE, 0);
shmem = shmem_file_setup("ttm swap", BO_SIZE, EMPTY_VMA_FLAGS);
tt->swap_storage = shmem;
ttm_tt_fini(tt);

View file

@ -178,5 +178,6 @@ EXPORT_SYMBOL_GPL(ttm_backup_bytes_avail);
*/
struct file *ttm_backup_shmem_create(loff_t size)
{
return shmem_file_setup("ttm shmem backup", size, 0);
return shmem_file_setup("ttm shmem backup", size,
EMPTY_VMA_FLAGS);
}

View file

@ -330,7 +330,7 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm,
struct page *to_page;
int i, ret;
swap_storage = shmem_file_setup("ttm swap", size, 0);
swap_storage = shmem_file_setup("ttm swap", size, EMPTY_VMA_FLAGS);
if (IS_ERR(swap_storage)) {
pr_err("Failed allocating swap storage\n");
return PTR_ERR(swap_storage);

View file

@ -394,7 +394,7 @@ static const struct vm_operations_struct aio_ring_vm_ops = {
static int aio_ring_mmap_prepare(struct vm_area_desc *desc)
{
desc->vm_flags |= VM_DONTEXPAND;
vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT);
desc->vm_ops = &aio_ring_vm_ops;
return 0;
}

View file

@ -473,11 +473,12 @@ static int erofs_file_mmap_prepare(struct vm_area_desc *desc)
if (!IS_DAX(file_inode(desc->file)))
return generic_file_readonly_mmap_prepare(desc);
if ((desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE))
if (vma_desc_test_flags(desc, VMA_SHARED_BIT) &&
vma_desc_test_flags(desc, VMA_MAYWRITE_BIT))
return -EINVAL;
desc->vm_ops = &erofs_dax_vm_ops;
desc->vm_flags |= VM_HUGEPAGE;
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
return 0;
}
#else

View file

@ -818,13 +818,13 @@ static int ext4_file_mmap_prepare(struct vm_area_desc *desc)
* We don't support synchronous mappings for non-DAX files and
* for DAX files if underneath dax_device is not synchronous.
*/
if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file), dax_dev))
if (!daxdev_mapping_supported(desc, file_inode(file), dax_dev))
return -EOPNOTSUPP;
file_accessed(file);
if (IS_DAX(file_inode(file))) {
desc->vm_ops = &ext4_dax_vm_ops;
desc->vm_flags |= VM_HUGEPAGE;
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
} else {
desc->vm_ops = &ext4_file_vm_ops;
}

View file

@ -109,7 +109,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
vm_flags_t vm_flags;
vma_flags_t vma_flags;
/*
* vma address alignment (but not the pgoff alignment) has
@ -119,7 +119,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
* way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma_desc_set_flags(desc, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT);
desc->vm_ops = &hugetlb_vm_ops;
/*
@ -148,23 +148,23 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
ret = -ENOMEM;
vm_flags = desc->vm_flags;
vma_flags = desc->vma_flags;
/*
* for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
* reserving here. Note: only for SHM hugetlbfs file, the inode
* flag S_PRIVATE is set.
*/
if (inode->i_flags & S_PRIVATE)
vm_flags |= VM_NORESERVE;
vma_flags_set(&vma_flags, VMA_NORESERVE_BIT);
if (hugetlb_reserve_pages(inode,
desc->pgoff >> huge_page_order(h),
len >> huge_page_shift(h), desc,
vm_flags) < 0)
vma_flags) < 0)
goto out;
ret = 0;
if ((desc->vm_flags & VM_WRITE) && inode->i_size < len)
if (vma_desc_test_flags(desc, VMA_WRITE_BIT) && inode->i_size < len)
i_size_write(inode, len);
out:
inode_unlock(inode);
@ -1527,7 +1527,7 @@ static int get_hstate_idx(int page_size_log)
* otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
*/
struct file *hugetlb_file_setup(const char *name, size_t size,
vm_flags_t acctflag, int creat_flags,
vma_flags_t acctflag, int creat_flags,
int page_size_log)
{
struct inode *inode;

View file

@ -276,7 +276,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc)
struct file *file = desc->file;
struct inode *inode = file_inode(file);
struct ntfs_inode *ni = ntfs_i(inode);
bool rw = desc->vm_flags & VM_WRITE;
const bool rw = vma_desc_test_flags(desc, VMA_WRITE_BIT);
int err;
/* Avoid any operation if inode is bad. */

View file

@ -411,8 +411,8 @@ static int orangefs_file_mmap_prepare(struct vm_area_desc *desc)
"orangefs_file_mmap: called on %pD\n", file);
/* set the sequential readahead hint */
desc->vm_flags |= VM_SEQ_READ;
desc->vm_flags &= ~VM_RAND_READ;
vma_desc_set_flags(desc, VMA_SEQ_READ_BIT);
vma_desc_clear_flags(desc, VMA_RAND_READ_BIT);
file_accessed(file);
desc->vm_ops = &orangefs_file_vm_ops;

View file

@ -264,7 +264,7 @@ out:
*/
static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc)
{
if (!is_nommu_shared_mapping(desc->vm_flags))
if (!is_nommu_shared_vma_flags(&desc->vma_flags))
return -ENOSYS;
file_accessed(desc->file);

View file

@ -1044,7 +1044,7 @@ static int pseudo_lock_dev_mmap_prepare(struct vm_area_desc *desc)
* Ensure changes are carried directly to the memory being mapped,
* do not allow copy-on-write mapping.
*/
if (!(desc->vm_flags & VM_SHARED)) {
if (!vma_desc_test_flags(desc, VMA_SHARED_BIT)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}

View file

@ -63,7 +63,7 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
*/
static int romfs_mmap_prepare(struct vm_area_desc *desc)
{
return is_nommu_shared_mapping(desc->vm_flags) ? 0 : -ENOSYS;
return is_nommu_shared_vma_flags(&desc->vma_flags) ? 0 : -ENOSYS;
}
static unsigned romfs_mmap_capabilities(struct file *file)

View file

@ -61,7 +61,8 @@ xfile_create(
if (!xf)
return -ENOMEM;
xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
xf->file = shmem_kernel_file_setup(description, isize,
mk_vma_flags(VMA_NORESERVE_BIT));
if (IS_ERR(xf->file)) {
error = PTR_ERR(xf->file);
goto out_xfile;

View file

@ -62,7 +62,7 @@ xmbuf_alloc(
if (!btp)
return -ENOMEM;
file = shmem_kernel_file_setup(descr, 0, 0);
file = shmem_kernel_file_setup(descr, 0, EMPTY_VMA_FLAGS);
if (IS_ERR(file)) {
error = PTR_ERR(file);
goto out_free_btp;

View file

@ -2010,14 +2010,14 @@ xfs_file_mmap_prepare(
* We don't support synchronous mappings for non-DAX files and
* for DAX files if underneath dax_device is not synchronous.
*/
if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file),
if (!daxdev_mapping_supported(desc, file_inode(file),
target->bt_daxdev))
return -EOPNOTSUPP;
file_accessed(file);
desc->vm_ops = &xfs_file_vm_ops;
if (IS_DAX(inode))
desc->vm_flags |= VM_HUGEPAGE;
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
return 0;
}

View file

@ -333,7 +333,8 @@ static int zonefs_file_mmap_prepare(struct vm_area_desc *desc)
* ordering between msync() and page cache writeback.
*/
if (zonefs_inode_is_seq(file_inode(file)) &&
(desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE))
vma_desc_test_flags(desc, VMA_SHARED_BIT) &&
vma_desc_test_flags(desc, VMA_MAYWRITE_BIT))
return -EINVAL;
file_accessed(file);

View file

@ -176,7 +176,7 @@ static inline void set_mems_allowed(nodemask_t nodemask)
task_unlock(current);
}
extern bool cpuset_node_allowed(struct cgroup *cgroup, int nid);
extern void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask);
#else /* !CONFIG_CPUSETS */
static inline bool cpusets_enabled(void) { return false; }
@ -299,9 +299,9 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
return false;
}
static inline bool cpuset_node_allowed(struct cgroup *cgroup, int nid)
static inline void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask)
{
return true;
nodes_copy(*mask, node_states[N_MEMORY]);
}
#endif /* !CONFIG_CPUSETS */

View file

@ -65,11 +65,11 @@ size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
/*
* Check if given mapping is supported by the file / underlying device.
*/
static inline bool daxdev_mapping_supported(vm_flags_t vm_flags,
static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc,
const struct inode *inode,
struct dax_device *dax_dev)
{
if (!(vm_flags & VM_SYNC))
if (!vma_desc_test_flags(desc, VMA_SYNC_BIT))
return true;
if (!IS_DAX(inode))
return false;
@ -111,11 +111,11 @@ static inline void set_dax_nomc(struct dax_device *dax_dev)
static inline void set_dax_synchronous(struct dax_device *dax_dev)
{
}
static inline bool daxdev_mapping_supported(vm_flags_t vm_flags,
static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc,
const struct inode *inode,
struct dax_device *dax_dev)
{
return !(vm_flags & VM_SYNC);
return !vma_desc_test_flags(desc, VMA_SYNC_BIT);
}
static inline size_t dax_recovery_write(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)

View file

@ -148,7 +148,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
struct folio **foliop);
#endif /* CONFIG_USERFAULTFD */
long hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_desc *desc, vm_flags_t vm_flags);
struct vm_area_desc *desc, vma_flags_t vma_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
@ -527,7 +527,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
}
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
struct file *hugetlb_file_setup(const char *name, size_t size, vma_flags_t acct,
int creat_flags, int page_size_log);
static inline bool is_file_hugepages(const struct file *file)
@ -543,7 +543,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
#define is_file_hugepages(file) false
static inline struct file *
hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
hugetlb_file_setup(const char *name, size_t size, vma_flags_t acctflag,
int creat_flags, int page_size_log)
{
return ERR_PTR(-ENOSYS);

View file

@ -11,6 +11,11 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
return !!(vm_flags & VM_HUGETLB);
}
static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
{
return vma_flags_test(flags, VMA_HUGETLB_BIT);
}
#else
static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
@ -18,6 +23,11 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
return false;
}
static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
{
return false;
}
#endif
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)

View file

@ -1758,7 +1758,7 @@ static inline void count_objcg_events(struct obj_cgroup *objcg,
rcu_read_unlock();
}
bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid);
void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg, nodemask_t *mask);
void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg);
@ -1829,9 +1829,9 @@ static inline ino_t page_cgroup_ino(struct page *page)
return 0;
}
static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid)
static inline void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg,
nodemask_t *mask)
{
return true;
}
static inline void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg)

View file

@ -53,11 +53,11 @@ struct memory_dev_type *mt_find_alloc_memory_type(int adist,
struct list_head *memory_types);
void mt_put_memory_types(struct list_head *memory_types);
#ifdef CONFIG_MIGRATION
int next_demotion_node(int node);
int next_demotion_node(int node, const nodemask_t *allowed_mask);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
bool node_is_toptier(int node);
#else
static inline int next_demotion_node(int node)
static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
{
return NUMA_NO_NODE;
}
@ -101,7 +101,7 @@ static inline void clear_node_memory_type(int node, struct memory_dev_type *memt
}
static inline int next_demotion_node(int node)
static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
{
return NUMA_NO_NODE;
}

View file

@ -2,6 +2,7 @@
#ifndef _LINUX_MM_H
#define _LINUX_MM_H
#include <linux/args.h>
#include <linux/errno.h>
#include <linux/mmdebug.h>
#include <linux/gfp.h>
@ -551,17 +552,18 @@ enum {
/*
* Physically remapped pages are special. Tell the
* rest of the world about it:
* VM_IO tells people not to look at these pages
* IO tells people not to look at these pages
* (accesses can have side effects).
* VM_PFNMAP tells the core MM that the base pages are just
* PFNMAP tells the core MM that the base pages are just
* raw PFN mappings, and do not have a "struct page" associated
* with them.
* VM_DONTEXPAND
* DONTEXPAND
* Disable vma merging and expanding with mremap().
* VM_DONTDUMP
* DONTDUMP
* Omit vma from core dump, even when VM_IO turned off.
*/
#define VM_REMAP_FLAGS (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
#define VMA_REMAP_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT, \
VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)
/* This mask prevents VMA from being scanned with khugepaged */
#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB)
@ -945,7 +947,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma,
* system word.
*/
if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) {
unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags);
unsigned long *bitmap = vma->flags.__vma_flags;
bitmap_zero(&bitmap[1], NUM_VMA_FLAG_BITS - BITS_PER_LONG);
}
@ -989,8 +991,7 @@ static inline void vm_flags_mod(struct vm_area_struct *vma,
__vm_flags_mod(vma, set, clear);
}
static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma,
vma_flag_t bit)
static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_t bit)
{
const vm_flags_t mask = BIT((__force int)bit);
@ -1005,13 +1006,12 @@ static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma,
* Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific
* valid flags are allowed to do this.
*/
static inline void vma_flag_set_atomic(struct vm_area_struct *vma,
vma_flag_t bit)
static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit)
{
unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags);
unsigned long *bitmap = vma->flags.__vma_flags;
vma_assert_stabilised(vma);
if (__vma_flag_atomic_valid(vma, bit))
if (__vma_atomic_valid_flag(vma, bit))
set_bit((__force int)bit, bitmap);
}
@ -1022,15 +1022,211 @@ static inline void vma_flag_set_atomic(struct vm_area_struct *vma,
* This is necessarily racey, so callers must ensure that serialisation is
* achieved through some other means, or that races are permissible.
*/
static inline bool vma_flag_test_atomic(struct vm_area_struct *vma,
vma_flag_t bit)
static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit)
{
if (__vma_flag_atomic_valid(vma, bit))
if (__vma_atomic_valid_flag(vma, bit))
return test_bit((__force int)bit, &vma->vm_flags);
return false;
}
/* Set an individual VMA flag in flags, non-atomically. */
static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
{
unsigned long *bitmap = flags->__vma_flags;
__set_bit((__force int)bit, bitmap);
}
static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
{
vma_flags_t flags;
int i;
vma_flags_clear_all(&flags);
for (i = 0; i < count; i++)
vma_flag_set(&flags, bits[i]);
return flags;
}
/*
* Helper macro which bitwise-or combines the specified input flags into a
* vma_flags_t bitmap value. E.g.:
*
* vma_flags_t flags = mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT,
* VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT);
*
* The compiler cleverly optimises away all of the work and this ends up being
* equivalent to aggregating the values manually.
*/
#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
(const vma_flag_t []){__VA_ARGS__})
/* Test each of to_test flags in flags, non-atomically. */
static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
vma_flags_t to_test)
{
const unsigned long *bitmap = flags->__vma_flags;
const unsigned long *bitmap_to_test = to_test.__vma_flags;
return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
}
/*
* Test whether any specified VMA flag is set, e.g.:
*
* if (vma_flags_test(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
*/
#define vma_flags_test(flags, ...) \
vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
/* Test that ALL of the to_test flags are set, non-atomically. */
static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
vma_flags_t to_test)
{
const unsigned long *bitmap = flags->__vma_flags;
const unsigned long *bitmap_to_test = to_test.__vma_flags;
return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
}
/*
* Test whether ALL specified VMA flags are set, e.g.:
*
* if (vma_flags_test_all(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
*/
#define vma_flags_test_all(flags, ...) \
vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
/* Set each of the to_set flags in flags, non-atomically. */
static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
{
unsigned long *bitmap = flags->__vma_flags;
const unsigned long *bitmap_to_set = to_set.__vma_flags;
bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS);
}
/*
* Set all specified VMA flags, e.g.:
*
* vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
*/
#define vma_flags_set(flags, ...) \
vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
/* Clear all of the to-clear flags in flags, non-atomically. */
static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
{
unsigned long *bitmap = flags->__vma_flags;
const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS);
}
/*
* Clear all specified individual flags, e.g.:
*
* vma_flags_clear(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
*/
#define vma_flags_clear(flags, ...) \
vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
/*
* Helper to test that ALL specified flags are set in a VMA.
*
* Note: appropriate locks must be held, this function does not acquire them for
* you.
*/
static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
vma_flags_t flags)
{
return vma_flags_test_all_mask(&vma->flags, flags);
}
/*
* Helper macro for checking that ALL specified flags are set in a VMA, e.g.:
*
* if (vma_test_all_flags(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... }
*/
#define vma_test_all_flags(vma, ...) \
vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
/*
* Helper to set all VMA flags in a VMA.
*
* Note: appropriate locks must be held, this function does not acquire them for
* you.
*/
static inline void vma_set_flags_mask(struct vm_area_struct *vma,
vma_flags_t flags)
{
vma_flags_set_mask(&vma->flags, flags);
}
/*
* Helper macro for specifying VMA flags in a VMA, e.g.:
*
* vma_set_flags(vma, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
* VMA_DONTDUMP_BIT);
*
* Note: appropriate locks must be held, this function does not acquire them for
* you.
*/
#define vma_set_flags(vma, ...) \
vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
/* Helper to test all VMA flags in a VMA descriptor. */
static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
vma_flags_t flags)
{
return vma_flags_test_mask(&desc->vma_flags, flags);
}
/*
* Helper macro for testing VMA flags for an input pointer to a struct
* vm_area_desc object describing a proposed VMA, e.g.:
*
* if (vma_desc_test_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT,
* VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... }
*/
#define vma_desc_test_flags(desc, ...) \
vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
/* Helper to set all VMA flags in a VMA descriptor. */
static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
vma_flags_t flags)
{
vma_flags_set_mask(&desc->vma_flags, flags);
}
/*
* Helper macro for specifying VMA flags for an input pointer to a struct
* vm_area_desc object describing a proposed VMA, e.g.:
*
* vma_desc_set_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
* VMA_DONTDUMP_BIT);
*/
#define vma_desc_set_flags(desc, ...) \
vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
/* Helper to clear all VMA flags in a VMA descriptor. */
static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
vma_flags_t flags)
{
vma_flags_clear_mask(&desc->vma_flags, flags);
}
/*
* Helper macro for clearing VMA flags for an input pointer to a struct
* vm_area_desc object describing a proposed VMA, e.g.:
*
* vma_desc_clear_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
* VMA_DONTDUMP_BIT);
*/
#define vma_desc_clear_flags(desc, ...) \
vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
static inline void vma_set_anonymous(struct vm_area_struct *vma)
{
vma->vm_ops = NULL;
@ -1096,15 +1292,20 @@ static inline bool vma_is_accessible(const struct vm_area_struct *vma)
return vma->vm_flags & VM_ACCESS_FLAGS;
}
static inline bool is_shared_maywrite(vm_flags_t vm_flags)
static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
{
return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
(VM_SHARED | VM_MAYWRITE);
}
static inline bool is_shared_maywrite(const vma_flags_t *flags)
{
return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
}
static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma)
{
return is_shared_maywrite(vma->vm_flags);
return is_shared_maywrite(&vma->flags);
}
static inline
@ -1732,6 +1933,14 @@ static inline bool is_cow_mapping(vm_flags_t flags)
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
}
static inline bool vma_desc_is_cow_mapping(struct vm_area_desc *desc)
{
const vma_flags_t *flags = &desc->vma_flags;
return vma_flags_test(flags, VMA_MAYWRITE_BIT) &&
!vma_flags_test(flags, VMA_SHARED_BIT);
}
#ifndef CONFIG_MMU
static inline bool is_nommu_shared_mapping(vm_flags_t flags)
{
@ -1745,6 +1954,11 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags)
*/
return flags & (VM_MAYSHARE | VM_MAYOVERLAY);
}
static inline bool is_nommu_shared_vma_flags(const vma_flags_t *flags)
{
return vma_flags_test(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT);
}
#endif
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@ -2627,10 +2841,6 @@ static inline void zap_vma_pages(struct vm_area_struct *vma)
zap_page_range_single(vma, vma->vm_start,
vma->vm_end - vma->vm_start, NULL);
}
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *start_vma, unsigned long start,
unsigned long end, unsigned long tree_end);
struct mmu_notifier_range;
void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,

View file

@ -844,7 +844,7 @@ struct mmap_action {
/*
* If specified, this hook is invoked when an error occurred when
* attempting the selection action.
* attempting the selected action.
*
* The hook can return an error code in order to filter the error, but
* it is not valid to clear the error here.
@ -866,7 +866,9 @@ struct mmap_action {
#define NUM_VMA_FLAG_BITS BITS_PER_LONG
typedef struct {
DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS);
} __private vma_flags_t;
} vma_flags_t;
#define EMPTY_VMA_FLAGS ((vma_flags_t){ })
/*
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
@ -885,10 +887,7 @@ struct vm_area_desc {
/* Mutable fields. Populated with initial state. */
pgoff_t pgoff;
struct file *vm_file;
union {
vm_flags_t vm_flags;
vma_flags_t vma_flags;
};
pgprot_t page_prot;
/* Write-only fields. */
@ -1059,7 +1058,7 @@ struct vm_area_struct {
/* Clears all bits in the VMA flags bitmap, non-atomically. */
static inline void vma_flags_clear_all(vma_flags_t *flags)
{
bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
bitmap_zero(flags->__vma_flags, NUM_VMA_FLAG_BITS);
}
/*
@ -1070,7 +1069,9 @@ static inline void vma_flags_clear_all(vma_flags_t *flags)
*/
static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
{
*ACCESS_PRIVATE(flags, __vma_flags) = value;
unsigned long *bitmap = flags->__vma_flags;
bitmap[0] = value;
}
/*
@ -1081,7 +1082,7 @@ static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long va
*/
static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
{
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
unsigned long *bitmap = flags->__vma_flags;
WRITE_ONCE(*bitmap, value);
}
@ -1089,7 +1090,7 @@ static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned lo
/* Update the first system word of VMA flags setting bits, non-atomically. */
static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
{
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
unsigned long *bitmap = flags->__vma_flags;
*bitmap |= value;
}
@ -1097,7 +1098,7 @@ static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
/* Update the first system word of VMA flags clearing bits, non-atomically. */
static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
{
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
unsigned long *bitmap = flags->__vma_flags;
*bitmap &= ~value;
}

View file

@ -515,16 +515,17 @@ static inline void mmu_notifier_range_init_owner(
range->owner = owner;
}
#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
#define clear_flush_young_ptes_notify(__vma, __address, __ptep, __nr) \
({ \
int __young; \
struct vm_area_struct *___vma = __vma; \
unsigned long ___address = __address; \
__young = ptep_clear_flush_young(___vma, ___address, __ptep); \
unsigned int ___nr = __nr; \
__young = clear_flush_young_ptes(___vma, ___address, __ptep, ___nr); \
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
___address, \
___address + \
PAGE_SIZE); \
___nr * PAGE_SIZE); \
__young; \
})
@ -650,7 +651,7 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
#define mmu_notifier_range_update_to_read_only(r) false
#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define clear_flush_young_ptes_notify clear_flush_young_ptes
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
#define ptep_clear_young_notify ptep_test_and_clear_young
#define pmdp_clear_young_notify pmdp_test_and_clear_young

View file

@ -22,25 +22,6 @@
#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
#endif
/*
* On almost all architectures and configurations, 0 can be used as the
* upper ceiling to free_pgtables(): on many architectures it has the same
* effect as using TASK_SIZE. However, there is one configuration which
* must impose a more careful limit, to avoid freeing kernel pgtables.
*/
#ifndef USER_PGTABLES_CEILING
#define USER_PGTABLES_CEILING 0UL
#endif
/*
* This defines the first usable user address. Platforms
* can override its value with custom FIRST_USER_ADDRESS
* defined in their respective <asm/pgtable.h>.
*/
#ifndef FIRST_USER_ADDRESS
#define FIRST_USER_ADDRESS 0UL
#endif
/*
* This defines the generic helper for accessing PMD page
* table page. Although platforms can still override this
@ -1087,6 +1068,41 @@ static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
}
#endif
#ifndef clear_flush_young_ptes
/**
* clear_flush_young_ptes - Mark PTEs that map consecutive pages of the same
* folio as old and flush the TLB.
* @vma: The virtual memory area the pages are mapped into.
* @addr: Address the first page is mapped at.
* @ptep: Page table pointer for the first entry.
* @nr: Number of entries to clear access bit.
*
* May be overridden by the architecture; otherwise, implemented as a simple
* loop over ptep_clear_flush_young().
*
* Note that PTE bits in the PTE range besides the PFN can differ. For example,
* some PTEs might be write-protected.
*
* Context: The caller holds the page table lock. The PTEs map consecutive
* pages that belong to the same folio. The PTEs are all in the same PMD.
*/
static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep, unsigned int nr)
{
int young = 0;
for (;;) {
young |= ptep_clear_flush_young(vma, addr, ptep);
if (--nr == 0)
break;
ptep++;
addr += PAGE_SIZE;
}
return young;
}
#endif
/*
* On some architectures hardware does not set page access bit when accessing
* memory page, it is responsibility of software setting this bit. It brings
@ -1629,6 +1645,25 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
#endif /* CONFIG_MMU */
/*
* On almost all architectures and configurations, 0 can be used as the
* upper ceiling to free_pgtables(): on many architectures it has the same
* effect as using TASK_SIZE. However, there is one configuration which
* must impose a more careful limit, to avoid freeing kernel pgtables.
*/
#ifndef USER_PGTABLES_CEILING
#define USER_PGTABLES_CEILING 0UL
#endif
/*
* This defines the first usable user address. Platforms
* can override its value with custom FIRST_USER_ADDRESS
* defined in their respective <asm/pgtable.h>.
*/
#ifndef FIRST_USER_ADDRESS
#define FIRST_USER_ADDRESS 0UL
#endif
/*
* No-op macros that just return the current protection value. Defined here
* because these macros can be used even if CONFIG_MMU is not defined.

View file

@ -102,12 +102,10 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
extern const struct fs_parameter_spec shmem_fs_parameters[];
extern void shmem_init(void);
extern int shmem_init_fs_context(struct fs_context *fc);
extern struct file *shmem_file_setup(const char *name,
loff_t size, unsigned long flags);
extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
unsigned long flags);
struct file *shmem_file_setup(const char *name, loff_t size, vma_flags_t flags);
struct file *shmem_kernel_file_setup(const char *name, loff_t size, vma_flags_t vma_flags);
extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
const char *name, loff_t size, unsigned long flags);
const char *name, loff_t size, vma_flags_t flags);
int shmem_zero_setup(struct vm_area_struct *vma);
int shmem_zero_setup_desc(struct vm_area_desc *desc);
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,

View file

@ -707,9 +707,10 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
int error;
struct shmid_kernel *shp;
size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
const bool has_no_reserve = shmflg & SHM_NORESERVE;
vma_flags_t acctflag = EMPTY_VMA_FLAGS;
struct file *file;
char name[13];
vm_flags_t acctflag = 0;
if (size < SHMMIN || size > ns->shm_ctlmax)
return -EINVAL;
@ -749,8 +750,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
hugesize = ALIGN(size, huge_page_size(hs));
/* hugetlb_file_setup applies strict accounting */
if (shmflg & SHM_NORESERVE)
acctflag = VM_NORESERVE;
if (has_no_reserve)
vma_flags_set(&acctflag, VMA_NORESERVE_BIT);
file = hugetlb_file_setup(name, hugesize, acctflag,
HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
} else {
@ -758,9 +759,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
* Do not allow no accounting for OVERCOMMIT_NEVER, even
* if it's asked for.
*/
if ((shmflg & SHM_NORESERVE) &&
sysctl_overcommit_memory != OVERCOMMIT_NEVER)
acctflag = VM_NORESERVE;
if (has_no_reserve && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
vma_flags_set(&acctflag, VMA_NORESERVE_BIT);
file = shmem_kernel_file_setup(name, size, acctflag);
}
error = PTR_ERR(file);

View file

@ -4145,40 +4145,58 @@ bool cpuset_current_node_allowed(int node, gfp_t gfp_mask)
return allowed;
}
bool cpuset_node_allowed(struct cgroup *cgroup, int nid)
/**
* cpuset_nodes_allowed - return effective_mems mask from a cgroup cpuset.
* @cgroup: pointer to struct cgroup.
* @mask: pointer to struct nodemask_t to be returned.
*
* Returns effective_mems mask from a cgroup cpuset if it is cgroup v2 and
* has cpuset subsys. Otherwise, returns node_states[N_MEMORY].
*
* This function intentionally avoids taking the cpuset_mutex or callback_lock
* when accessing effective_mems. This is because the obtained effective_mems
* is stale immediately after the query anyway (e.g., effective_mems is updated
* immediately after releasing the lock but before returning).
*
* As a result, returned @mask may be empty because cs->effective_mems can be
* rebound during this call. Besides, nodes in @mask are not guaranteed to be
* online due to hot plugins. Callers should check the mask for validity on
* return based on its subsequent use.
**/
void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask)
{
struct cgroup_subsys_state *css;
struct cpuset *cs;
bool allowed;
/*
* In v1, mem_cgroup and cpuset are unlikely in the same hierarchy
* and mems_allowed is likely to be empty even if we could get to it,
* so return true to avoid taking a global lock on the empty check.
* so return directly to avoid taking a global lock on the empty check.
*/
if (!cpuset_v2())
return true;
if (!cgroup || !cpuset_v2()) {
nodes_copy(*mask, node_states[N_MEMORY]);
return;
}
css = cgroup_get_e_css(cgroup, &cpuset_cgrp_subsys);
if (!css)
return true;
if (!css) {
nodes_copy(*mask, node_states[N_MEMORY]);
return;
}
/*
* The reference taken via cgroup_get_e_css is sufficient to
* protect css, but it does not imply safe accesses to effective_mems.
*
* Normally, accessing effective_mems would require the cpuset_mutex
* or callback_lock - but node_isset is atomic and the reference
* taken via cgroup_get_e_css is sufficient to protect css.
*
* Since this interface is intended for use by migration paths, we
* relax locking here to avoid taking global locks - while accepting
* there may be rare scenarios where the result may be innaccurate.
*
* Reclaim and migration are subject to these same race conditions, and
* cannot make strong isolation guarantees, so this is acceptable.
* or callback_lock - but the correctness of this information is stale
* immediately after the query anyway. We do not acquire the lock
* during this process to save lock contention in exchange for racing
* against mems_allowed rebinds.
*/
cs = container_of(css, struct cpuset, css);
allowed = node_isset(nid, cs->effective_mems);
nodes_copy(*mask, cs->effective_mems);
css_put(css);
return allowed;
}
/**

View file

@ -91,7 +91,7 @@ static int relay_mmap_prepare_buf(struct rchan_buf *buf,
return -EINVAL;
desc->vm_ops = &relay_file_mmap_ops;
desc->vm_flags |= VM_DONTEXPAND;
vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT);
desc->private_data = buf;
return 0;

View file

@ -4012,7 +4012,7 @@ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
int generic_file_readonly_mmap_prepare(struct vm_area_desc *desc)
{
if (is_shared_maywrite(desc->vm_flags))
if (is_shared_maywrite(&desc->vma_flags))
return -EINVAL;
return generic_file_mmap_prepare(desc);
}

View file

@ -1193,16 +1193,16 @@ static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map)
{
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE);
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));
desc->private_data = map;
}
static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags)
{
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE);
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));
desc->private_data = (void *)((unsigned long)desc->private_data | flags);
}
@ -1216,7 +1216,7 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
static bool is_vma_desc_resv_set(struct vm_area_desc *desc, unsigned long flag)
{
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
return ((unsigned long)desc->private_data) & flag;
}
@ -6571,7 +6571,7 @@ next:
long hugetlb_reserve_pages(struct inode *inode,
long from, long to,
struct vm_area_desc *desc,
vm_flags_t vm_flags)
vma_flags_t vma_flags)
{
long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
@ -6592,7 +6592,7 @@ long hugetlb_reserve_pages(struct inode *inode,
* attempt will be made for VM_NORESERVE to allocate a page
* without using reserves
*/
if (vm_flags & VM_NORESERVE)
if (vma_flags_test(&vma_flags, VMA_NORESERVE_BIT))
return 0;
/*
@ -6601,7 +6601,7 @@ long hugetlb_reserve_pages(struct inode *inode,
* to reserve the full area even if read-only as mprotect() may be
* called to make the mapping read-write. Assume !desc is a shm mapping
*/
if (!desc || desc->vm_flags & VM_MAYSHARE) {
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
/*
* resv_map can not be NULL as hugetlb_reserve_pages is only
* called for inodes for which resv_maps were created (see
@ -6635,7 +6635,7 @@ long hugetlb_reserve_pages(struct inode *inode,
if (err < 0)
goto out_err;
if (desc && !(desc->vm_flags & VM_MAYSHARE) && h_cg) {
if (desc && !vma_desc_test_flags(desc, VMA_MAYSHARE_BIT) && h_cg) {
/* For private mappings, the hugetlb_cgroup uncharge info hangs
* of the resv_map.
*/
@ -6672,7 +6672,7 @@ long hugetlb_reserve_pages(struct inode *inode,
* consumed reservations are stored in the map. Hence, nothing
* else has to be done for private mappings here
*/
if (!desc || desc->vm_flags & VM_MAYSHARE) {
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
add = region_add(resv_map, from, to, regions_needed, h, h_cg);
if (unlikely(add < 0)) {
@ -6736,7 +6736,7 @@ out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
out_err:
if (!desc || desc->vm_flags & VM_MAYSHARE)
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT))
/* Only call region_abort if the region_chg succeeded but the
* region_add failed or didn't run.
*/

View file

@ -197,6 +197,9 @@ static inline void vma_close(struct vm_area_struct *vma)
}
}
/* unmap_vmas is in mm/memory.c */
void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap);
#ifdef CONFIG_MMU
static inline void get_anon_vma(struct anon_vma *anon_vma)
@ -509,9 +512,8 @@ bool __folio_end_writeback(struct folio *folio);
void deactivate_file_folio(struct folio *folio);
void folio_activate(struct folio *folio);
void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *start_vma, unsigned long floor,
unsigned long ceiling, bool mm_wr_locked);
void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *desc);
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
struct zap_details;
@ -1044,7 +1046,7 @@ extern long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *locked);
extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
unsigned long end, bool write, int *locked);
bool mlock_future_ok(const struct mm_struct *mm, vm_flags_t vm_flags,
bool mlock_future_ok(const struct mm_struct *mm, bool is_vma_locked,
unsigned long bytes);
/*

View file

@ -1732,7 +1732,7 @@ static bool file_backed_vma_is_retractable(struct vm_area_struct *vma)
* obtained on guard region installation after the flag is set, so this
* check being performed under this lock excludes races.
*/
if (vma_flag_test_atomic(vma, VMA_MAYBE_GUARD_BIT))
if (vma_test_atomic_flag(vma, VMA_MAYBE_GUARD_BIT))
return false;
return true;

View file

@ -1140,7 +1140,7 @@ static long madvise_guard_install(struct madvise_behavior *madv_behavior)
* acquire an mmap/VMA write lock to read it. All remaining readers may
* or may not see the flag set, but we don't care.
*/
vma_flag_set_atomic(vma, VMA_MAYBE_GUARD_BIT);
vma_set_atomic_flag(vma, VMA_MAYBE_GUARD_BIT);
/*
* If anonymous and we are establishing page tables the VMA ought to

View file

@ -5649,9 +5649,21 @@ subsys_initcall(mem_cgroup_swap_init);
#endif /* CONFIG_SWAP */
bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid)
void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg, nodemask_t *mask)
{
return memcg ? cpuset_node_allowed(memcg->css.cgroup, nid) : true;
nodemask_t allowed;
if (!memcg)
return;
/*
* Since this interface is intended for use by migration paths, and
* reclaim and migration are subject to race conditions such as changes
* in effective_mems and hot-unpluging of nodes, inaccurate allowed
* mask is acceptable.
*/
cpuset_nodes_allowed(memcg->css.cgroup, &allowed);
nodes_and(*mask, *mask, allowed);
}
void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg)

View file

@ -86,7 +86,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
idx >>= huge_page_order(h);
nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, 0);
nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, EMPTY_VMA_FLAGS);
if (nr_resv < 0)
return ERR_PTR(nr_resv);
@ -463,12 +463,12 @@ struct file *memfd_alloc_file(const char *name, unsigned int flags)
int err = 0;
if (flags & MFD_HUGETLB) {
file = hugetlb_file_setup(name, 0, VM_NORESERVE,
file = hugetlb_file_setup(name, 0, mk_vma_flags(VMA_NORESERVE_BIT),
HUGETLB_ANONHUGE_INODE,
(flags >> MFD_HUGE_SHIFT) &
MFD_HUGE_MASK);
} else {
file = shmem_file_setup(name, 0, VM_NORESERVE);
file = shmem_file_setup(name, 0, mk_vma_flags(VMA_NORESERVE_BIT));
}
if (IS_ERR(file))
return file;

View file

@ -320,16 +320,17 @@ void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
/**
* next_demotion_node() - Get the next node in the demotion path
* @node: The starting node to lookup the next node
* @allowed_mask: The pointer to allowed node mask
*
* Return: node id for next memory node in the demotion path hierarchy
* from @node; NUMA_NO_NODE if @node is terminal. This does not keep
* @node online or guarantee that it *continues* to be the next demotion
* target.
*/
int next_demotion_node(int node)
int next_demotion_node(int node, const nodemask_t *allowed_mask)
{
struct demotion_nodes *nd;
int target;
nodemask_t mask;
if (!node_demotion)
return NUMA_NO_NODE;
@ -344,6 +345,10 @@ int next_demotion_node(int node)
* node_demotion[] reads need to be consistent.
*/
rcu_read_lock();
/* Filter out nodes that are not in allowed_mask. */
nodes_and(mask, nd->preferred, *allowed_mask);
rcu_read_unlock();
/*
* If there are multiple target nodes, just select one
* target node randomly.
@ -356,10 +361,16 @@ int next_demotion_node(int node)
* caching issue, which seems more complicated. So selecting
* target node randomly seems better until now.
*/
target = node_random(&nd->preferred);
rcu_read_unlock();
if (!nodes_empty(mask))
return node_random(&mask);
return target;
/*
* Preferred nodes are not in allowed_mask. Flip bits in
* allowed_mask as used node mask. Then, use it to get the
* closest demotion target.
*/
nodes_complement(mask, *allowed_mask);
return find_next_best_node(node, &mask);
}
static void disable_all_demotion_targets(void)

View file

@ -370,11 +370,32 @@ void free_pgd_range(struct mmu_gather *tlb,
} while (pgd++, addr = next, addr != end);
}
void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long floor,
unsigned long ceiling, bool mm_wr_locked)
/**
* free_pgtables() - Free a range of page tables
* @tlb: The mmu gather
* @unmap: The unmap_desc
*
* Note: pg_start and pg_end are provided to indicate the absolute range of the
* page tables that should be removed. This can differ from the vma mappings on
* some archs that may have mappings that need to be removed outside the vmas.
* Note that the prev->vm_end and next->vm_start are often used.
*
* The vma_end differs from the pg_end when a dup_mmap() failed and the tree has
* unrelated data to the mm_struct being torn down.
*/
void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *unmap)
{
struct unlink_vma_file_batch vb;
struct ma_state *mas = unmap->mas;
struct vm_area_struct *vma = unmap->first;
/*
* Note: USER_PGTABLES_CEILING may be passed as the value of pg_end and
* may be 0. Underflow is expected in this case. Otherwise the
* pagetable end is exclusive. vma_end is exclusive. The last vma
* address should never be larger than the pagetable end.
*/
WARN_ON_ONCE(unmap->vma_end - 1 > unmap->pg_end - 1);
tlb_free_vmas(tlb);
@ -382,19 +403,13 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
unsigned long addr = vma->vm_start;
struct vm_area_struct *next;
/*
* Note: USER_PGTABLES_CEILING may be passed as ceiling and may
* be 0. This will underflow and is okay.
*/
next = mas_find(mas, ceiling - 1);
if (unlikely(xa_is_zero(next)))
next = NULL;
next = mas_find(mas, unmap->tree_end - 1);
/*
* Hide vma from rmap and truncate_pagecache before freeing
* pgtables
*/
if (mm_wr_locked)
if (unmap->mm_wr_locked)
vma_start_write(vma);
unlink_anon_vmas(vma);
@ -406,18 +421,16 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
*/
while (next && next->vm_start <= vma->vm_end + PMD_SIZE) {
vma = next;
next = mas_find(mas, ceiling - 1);
if (unlikely(xa_is_zero(next)))
next = NULL;
if (mm_wr_locked)
next = mas_find(mas, unmap->tree_end - 1);
if (unmap->mm_wr_locked)
vma_start_write(vma);
unlink_anon_vmas(vma);
unlink_file_vma_batch_add(&vb, vma);
}
unlink_file_vma_batch_final(&vb);
free_pgd_range(tlb, addr, vma->vm_end,
floor, next ? next->vm_start : ceiling);
free_pgd_range(tlb, addr, vma->vm_end, unmap->pg_start,
next ? next->vm_start : unmap->pg_end);
vma = next;
} while (vma);
}
@ -2124,11 +2137,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
/**
* unmap_vmas - unmap a range of memory covered by a list of vma's
* @tlb: address of the caller's struct mmu_gather
* @mas: the maple state
* @vma: the starting vma
* @start_addr: virtual address at which to start unmapping
* @end_addr: virtual address at which to end unmapping
* @tree_end: The maximum index to check
* @unmap: The unmap_desc
*
* Unmap all pages in the vma list.
*
@ -2141,10 +2150,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long tree_end)
void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
{
struct vm_area_struct *vma;
struct mmu_notifier_range range;
struct zap_details details = {
.zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
@ -2152,17 +2160,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
.even_cows = true,
};
vma = unmap->first;
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
start_addr, end_addr);
unmap->vma_start, unmap->vma_end);
mmu_notifier_invalidate_range_start(&range);
do {
unsigned long start = start_addr;
unsigned long end = end_addr;
unsigned long start = unmap->vma_start;
unsigned long end = unmap->vma_end;
hugetlb_zap_begin(vma, &start, &end);
unmap_single_vma(tlb, vma, start, end, &details);
hugetlb_zap_end(vma, &details);
vma = mas_find(mas, tree_end - 1);
} while (vma && likely(!xa_is_zero(vma)));
vma = mas_find(unmap->mas, unmap->tree_end - 1);
} while (vma);
mmu_notifier_invalidate_range_end(&range);
}
@ -2948,7 +2957,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
return 0;
}
static int get_remap_pgoff(vm_flags_t vm_flags, unsigned long addr,
static int get_remap_pgoff(bool is_cow, unsigned long addr,
unsigned long end, unsigned long vm_start, unsigned long vm_end,
unsigned long pfn, pgoff_t *vm_pgoff_p)
{
@ -2958,7 +2967,7 @@ static int get_remap_pgoff(vm_flags_t vm_flags, unsigned long addr,
* un-COW'ed pages by matching them up with "vma->vm_pgoff".
* See vm_normal_page() for details.
*/
if (is_cow_mapping(vm_flags)) {
if (is_cow) {
if (addr != vm_start || end != vm_end)
return -EINVAL;
*vm_pgoff_p = pfn;
@ -2979,7 +2988,7 @@ static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long ad
if (WARN_ON_ONCE(!PAGE_ALIGNED(addr)))
return -EINVAL;
VM_WARN_ON_ONCE((vma->vm_flags & VM_REMAP_FLAGS) != VM_REMAP_FLAGS);
VM_WARN_ON_ONCE(!vma_test_all_flags_mask(vma, VMA_REMAP_FLAGS));
BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT;
@ -3103,9 +3112,9 @@ void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
* check it again on complete and will fail there if specified addr is
* invalid.
*/
get_remap_pgoff(desc->vm_flags, desc->start, desc->end,
get_remap_pgoff(vma_desc_is_cow_mapping(desc), desc->start, desc->end,
desc->start, desc->end, pfn, &desc->pgoff);
desc->vm_flags |= VM_REMAP_FLAGS;
vma_desc_set_flags_mask(desc, VMA_REMAP_FLAGS);
}
static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma, unsigned long addr,
@ -3114,13 +3123,12 @@ static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma, unsigned long
unsigned long end = addr + PAGE_ALIGN(size);
int err;
err = get_remap_pgoff(vma->vm_flags, addr, end,
vma->vm_start, vma->vm_end,
pfn, &vma->vm_pgoff);
err = get_remap_pgoff(is_cow_mapping(vma->vm_flags), addr, end,
vma->vm_start, vma->vm_end, pfn, &vma->vm_pgoff);
if (err)
return err;
vm_flags_set(vma, VM_REMAP_FLAGS);
vma_set_flags_mask(vma, VMA_REMAP_FLAGS);
return 0;
}
@ -7316,7 +7324,7 @@ void folio_zero_user(struct folio *folio, unsigned long addr_hint)
const unsigned long base_addr = ALIGN_DOWN(addr_hint, folio_size(folio));
const long fault_idx = (addr_hint - base_addr) / PAGE_SIZE;
const struct range pg = DEFINE_RANGE(0, folio_nr_pages(folio) - 1);
const int radius = FOLIO_ZERO_LOCALITY_RADIUS;
const long radius = FOLIO_ZERO_LOCALITY_RADIUS;
struct range r[3];
int i;
@ -7324,20 +7332,19 @@ void folio_zero_user(struct folio *folio, unsigned long addr_hint)
* Faulting page and its immediate neighbourhood. Will be cleared at the
* end to keep its cachelines hot.
*/
r[2] = DEFINE_RANGE(clamp_t(s64, fault_idx - radius, pg.start, pg.end),
clamp_t(s64, fault_idx + radius, pg.start, pg.end));
r[2] = DEFINE_RANGE(fault_idx - radius < (long)pg.start ? pg.start : fault_idx - radius,
fault_idx + radius > (long)pg.end ? pg.end : fault_idx + radius);
/* Region to the left of the fault */
r[1] = DEFINE_RANGE(pg.start,
clamp_t(s64, r[2].start - 1, pg.start - 1, r[2].start));
r[1] = DEFINE_RANGE(pg.start, r[2].start - 1);
/* Region to the right of the fault: always valid for the common fault_idx=0 case. */
r[0] = DEFINE_RANGE(clamp_t(s64, r[2].end + 1, r[2].end, pg.end + 1),
pg.end);
r[0] = DEFINE_RANGE(r[2].end + 1, pg.end);
for (i = 0; i < ARRAY_SIZE(r); i++) {
const unsigned long addr = base_addr + r[i].start * PAGE_SIZE;
const unsigned int nr_pages = range_len(&r[i]);
const long nr_pages = (long)range_len(&r[i]);
struct page *page = folio_page(folio, r[i].start);
if (nr_pages > 0)

108
mm/mmap.c
View file

@ -108,7 +108,8 @@ static int check_brk_limits(unsigned long addr, unsigned long len)
if (IS_ERR_VALUE(mapped_addr))
return mapped_addr;
return mlock_future_ok(current->mm, current->mm->def_flags, len)
return mlock_future_ok(current->mm,
current->mm->def_flags & VM_LOCKED, len)
? 0 : -EAGAIN;
}
@ -225,12 +226,12 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
return hint;
}
bool mlock_future_ok(const struct mm_struct *mm, vm_flags_t vm_flags,
bool mlock_future_ok(const struct mm_struct *mm, bool is_vma_locked,
unsigned long bytes)
{
unsigned long locked_pages, limit_pages;
if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
if (!is_vma_locked || capable(CAP_IPC_LOCK))
return true;
locked_pages = bytes >> PAGE_SHIFT;
@ -416,7 +417,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (!can_do_mlock())
return -EPERM;
if (!mlock_future_ok(mm, vm_flags, len))
if (!mlock_future_ok(mm, vm_flags & VM_LOCKED, len))
return -EAGAIN;
if (file) {
@ -594,7 +595,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
* taken when vm_ops->mmap() is called
*/
file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
VM_NORESERVE,
mk_vma_flags(VMA_NORESERVE_BIT),
HUGETLB_ANONHUGE_INODE,
(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
if (IS_ERR(file))
@ -1247,6 +1248,29 @@ limits_failed:
}
EXPORT_SYMBOL(vm_brk_flags);
static
unsigned long tear_down_vmas(struct mm_struct *mm, struct vma_iterator *vmi,
struct vm_area_struct *vma, unsigned long end)
{
unsigned long nr_accounted = 0;
int count = 0;
mmap_assert_write_locked(mm);
vma_iter_set(vmi, vma->vm_end);
do {
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += vma_pages(vma);
vma_mark_detached(vma);
remove_vma(vma);
count++;
cond_resched();
vma = vma_next(vmi);
} while (vma && vma->vm_end <= end);
VM_WARN_ON_ONCE(count != mm->map_count);
return nr_accounted;
}
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
@ -1254,7 +1278,7 @@ void exit_mmap(struct mm_struct *mm)
struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
VMA_ITERATOR(vmi, mm, 0);
int count = 0;
struct unmap_desc unmap;
/* mm's last user has gone, and its about to be pulled down */
mmu_notifier_release(mm);
@ -1263,18 +1287,19 @@ void exit_mmap(struct mm_struct *mm)
arch_exit_mmap(mm);
vma = vma_next(&vmi);
if (!vma || unlikely(xa_is_zero(vma))) {
if (!vma) {
/* Can happen if dup_mmap() received an OOM */
mmap_read_unlock(mm);
mmap_write_lock(mm);
goto destroy;
}
unmap_all_init(&unmap, &vmi, vma);
flush_cache_mm(mm);
tlb_gather_mmu_fullmm(&tlb, mm);
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX);
unmap_vmas(&tlb, &unmap);
mmap_read_unlock(mm);
/*
@ -1283,10 +1308,10 @@ void exit_mmap(struct mm_struct *mm)
*/
mm_flags_set(MMF_OOM_SKIP, mm);
mmap_write_lock(mm);
unmap.mm_wr_locked = true;
mt_clear_in_rcu(&mm->mm_mt);
vma_iter_set(&vmi, vma->vm_end);
free_pgtables(&tlb, &vmi.mas, vma, FIRST_USER_ADDRESS,
USER_PGTABLES_CEILING, true);
unmap_pgtable_init(&unmap, &vmi);
free_pgtables(&tlb, &unmap);
tlb_finish_mmu(&tlb);
/*
@ -1294,22 +1319,11 @@ void exit_mmap(struct mm_struct *mm)
* enabled, without holding any MM locks besides the unreachable
* mmap_write_lock.
*/
vma_iter_set(&vmi, vma->vm_end);
do {
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += vma_pages(vma);
vma_mark_detached(vma);
remove_vma(vma);
count++;
cond_resched();
vma = vma_next(&vmi);
} while (vma && likely(!xa_is_zero(vma)));
nr_accounted = tear_down_vmas(mm, &vmi, vma, ULONG_MAX);
BUG_ON(count != mm->map_count);
trace_exit_mmap(mm);
destroy:
__mt_destroy(&mm->mm_mt);
trace_exit_mmap(mm);
mmap_write_unlock(mm);
vm_unacct_memory(nr_accounted);
}
@ -1840,20 +1854,46 @@ loop_out:
ksm_fork(mm, oldmm);
khugepaged_fork(mm, oldmm);
} else {
unsigned long end;
/*
* The entire maple tree has already been duplicated. If the
* mmap duplication fails, mark the failure point with
* XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered,
* stop releasing VMAs that have not been duplicated after this
* point.
* The entire maple tree has already been duplicated, but
* replacing the vmas failed at mpnt (which could be NULL if
* all were allocated but the last vma was not fully set up).
* Use the start address of the failure point to clean up the
* partially initialized tree.
*/
if (mpnt) {
mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
mas_store(&vmi.mas, XA_ZERO_ENTRY);
/* Avoid OOM iterating a broken tree */
mm_flags_set(MMF_OOM_SKIP, mm);
if (!mm->map_count) {
/* zero vmas were written to the new tree. */
end = 0;
} else if (mpnt) {
/* partial tree failure */
end = mpnt->vm_start;
} else {
/* All vmas were written to the new tree */
end = ULONG_MAX;
}
/* Hide mm from oom killer because the memory is being freed */
mm_flags_set(MMF_OOM_SKIP, mm);
if (end) {
vma_iter_set(&vmi, 0);
tmp = vma_next(&vmi);
UNMAP_STATE(unmap, &vmi, /* first = */ tmp,
/* vma_start = */ 0, /* vma_end = */ end,
/* prev = */ NULL, /* next = */ NULL);
/*
* Don't iterate over vmas beyond the failure point for
* both unmap_vma() and free_pgtables().
*/
unmap.tree_end = end;
flush_cache_mm(mm);
unmap_region(&unmap);
charge = tear_down_vmas(mm, &vmi, tmp, end);
vm_unacct_memory(charge);
}
__mt_destroy(&mm->mm_mt);
/*
* The mm_struct is going to exit, but the locks will be dropped
* first. Set the mm_struct as unstable is advisable as it is

View file

@ -1740,7 +1740,7 @@ static int check_prep_vma(struct vma_remap_struct *vrm)
if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
return -EFAULT;
if (!mlock_future_ok(mm, vma->vm_flags, vrm->delta))
if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, vrm->delta))
return -EAGAIN;
if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT))

View file

@ -1429,6 +1429,7 @@ __always_inline bool __free_pages_prepare(struct page *page,
page_cpupid_reset_last(page);
page->flags.f &= ~PAGE_FLAGS_CHECK_AT_PREP;
page->private = 0;
reset_page_owner(page, order);
page_table_check_free(page, order);
pgalloc_tag_sub(page, 1 << order);

View file

@ -913,9 +913,11 @@ static bool folio_referenced_one(struct folio *folio,
struct folio_referenced_arg *pra = arg;
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
int ptes = 0, referenced = 0;
unsigned int nr;
while (page_vma_mapped_walk(&pvmw)) {
address = pvmw.address;
nr = 1;
if (vma->vm_flags & VM_LOCKED) {
ptes++;
@ -960,9 +962,21 @@ static bool folio_referenced_one(struct folio *folio,
if (lru_gen_look_around(&pvmw))
referenced++;
} else if (pvmw.pte) {
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte))
if (folio_test_large(folio)) {
unsigned long end_addr = pmd_addr_end(address, vma->vm_end);
unsigned int max_nr = (end_addr - address) >> PAGE_SHIFT;
pte_t pteval = ptep_get(pvmw.pte);
nr = folio_pte_batch(folio, pvmw.pte,
pteval, max_nr);
}
ptes += nr;
if (clear_flush_young_ptes_notify(vma, address, pvmw.pte, nr))
referenced++;
/* Skip the batched PTEs */
pvmw.pte += nr - 1;
pvmw.address += (nr - 1) * PAGE_SIZE;
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
if (pmdp_clear_flush_young_notify(vma, address,
pvmw.pmd))
@ -972,7 +986,15 @@ static bool folio_referenced_one(struct folio *folio,
WARN_ON_ONCE(1);
}
pra->mapcount--;
pra->mapcount -= nr;
/*
* If we are sure that we batched the entire folio,
* we can just optimize and stop right here.
*/
if (ptes == pvmw.nr_pages) {
page_vma_mapped_walk_done(&pvmw);
break;
}
}
if (referenced)
@ -1923,12 +1945,16 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
end_addr = pmd_addr_end(addr, vma->vm_end);
max_nr = (end_addr - addr) >> PAGE_SHIFT;
/* We only support lazyfree batching for now ... */
if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
/* We only support lazyfree or file folios batching for now ... */
if (folio_test_anon(folio) && folio_test_swapbacked(folio))
return 1;
if (pte_unused(pte))
return 1;
if (userfaultfd_wp(vma))
return 1;
return folio_pte_batch(folio, pvmw->pte, pte, max_nr);
}
@ -2291,7 +2317,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
*
* See Documentation/mm/mmu_notifier.rst
*/
dec_mm_counter(mm, mm_counter_file(folio));
add_mm_counter(mm, mm_counter_file(folio), -nr_pages);
}
discard:
if (unlikely(folio_test_hugetlb(folio))) {

View file

@ -122,13 +122,12 @@ static int secretmem_mmap_prepare(struct vm_area_desc *desc)
{
const unsigned long len = vma_desc_size(desc);
if ((desc->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
if (!vma_desc_test_flags(desc, VMA_SHARED_BIT, VMA_MAYSHARE_BIT))
return -EINVAL;
if (!mlock_future_ok(desc->mm, desc->vm_flags | VM_LOCKED, len))
vma_desc_set_flags(desc, VMA_LOCKED_BIT, VMA_DONTDUMP_BIT);
if (!mlock_future_ok(desc->mm, /*is_vma_locked=*/ true, len))
return -EAGAIN;
desc->vm_flags |= VM_LOCKED | VM_DONTDUMP;
desc->vm_ops = &secretmem_vm_ops;
return 0;

View file

@ -3064,7 +3064,7 @@ static struct offset_ctx *shmem_get_offset_ctx(struct inode *inode)
static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
struct super_block *sb,
struct inode *dir, umode_t mode,
dev_t dev, unsigned long flags)
dev_t dev, vma_flags_t flags)
{
struct inode *inode;
struct shmem_inode_info *info;
@ -3092,7 +3092,8 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
spin_lock_init(&info->lock);
atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
info->flags = (flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0;
info->flags = vma_flags_test(&flags, VMA_NORESERVE_BIT)
? SHMEM_F_NORESERVE : 0;
info->i_crtime = inode_get_mtime(inode);
info->fsflags = (dir == NULL) ? 0 :
SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
@ -3145,7 +3146,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
#ifdef CONFIG_TMPFS_QUOTA
static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
struct super_block *sb, struct inode *dir,
umode_t mode, dev_t dev, unsigned long flags)
umode_t mode, dev_t dev, vma_flags_t flags)
{
int err;
struct inode *inode;
@ -3171,9 +3172,9 @@ errout:
return ERR_PTR(err);
}
#else
static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
struct super_block *sb, struct inode *dir,
umode_t mode, dev_t dev, unsigned long flags)
umode_t mode, dev_t dev, vma_flags_t flags)
{
return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
}
@ -3880,7 +3881,8 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
return -EINVAL;
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev,
mk_vma_flags(VMA_NORESERVE_BIT));
if (IS_ERR(inode))
return PTR_ERR(inode);
@ -3915,7 +3917,8 @@ shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct inode *inode;
int error;
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0,
mk_vma_flags(VMA_NORESERVE_BIT));
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
goto err_out;
@ -4112,7 +4115,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
return -ENAMETOOLONG;
inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
VM_NORESERVE);
mk_vma_flags(VMA_NORESERVE_BIT));
if (IS_ERR(inode))
return PTR_ERR(inode);
@ -5113,7 +5116,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#endif /* CONFIG_TMPFS_QUOTA */
inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL,
S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
S_IFDIR | sbinfo->mode, 0,
mk_vma_flags(VMA_NORESERVE_BIT));
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
goto failed;
@ -5814,7 +5818,7 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
struct super_block *sb, struct inode *dir,
umode_t mode, dev_t dev, unsigned long flags)
umode_t mode, dev_t dev, vma_flags_t flags)
{
struct inode *inode = ramfs_get_inode(sb, dir, mode, dev);
return inode ? inode : ERR_PTR(-ENOSPC);
@ -5825,10 +5829,11 @@ static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
/* common code */
static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
loff_t size, unsigned long vm_flags,
loff_t size, vma_flags_t flags,
unsigned int i_flags)
{
unsigned long flags = (vm_flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0;
const unsigned long shmem_flags =
vma_flags_test(&flags, VMA_NORESERVE_BIT) ? SHMEM_F_NORESERVE : 0;
struct inode *inode;
struct file *res;
@ -5841,13 +5846,13 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
if (is_idmapped_mnt(mnt))
return ERR_PTR(-EINVAL);
if (shmem_acct_size(flags, size))
if (shmem_acct_size(shmem_flags, size))
return ERR_PTR(-ENOMEM);
inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
S_IFREG | S_IRWXUGO, 0, vm_flags);
S_IFREG | S_IRWXUGO, 0, flags);
if (IS_ERR(inode)) {
shmem_unacct_size(flags, size);
shmem_unacct_size(shmem_flags, size);
return ERR_CAST(inode);
}
inode->i_flags |= i_flags;
@ -5870,9 +5875,10 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
* checks are provided at the key or shm level rather than the inode.
* @name: name for dentry (to be seen in /proc/<pid>/maps)
* @size: size to be set for the file
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
* @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size
*/
struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
struct file *shmem_kernel_file_setup(const char *name, loff_t size,
vma_flags_t flags)
{
return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
}
@ -5882,9 +5888,9 @@ EXPORT_SYMBOL_GPL(shmem_kernel_file_setup);
* shmem_file_setup - get an unlinked file living in tmpfs
* @name: name for dentry (to be seen in /proc/<pid>/maps)
* @size: size to be set for the file
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
* @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size
*/
struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
struct file *shmem_file_setup(const char *name, loff_t size, vma_flags_t flags)
{
return __shmem_file_setup(shm_mnt, name, size, flags, 0);
}
@ -5895,16 +5901,17 @@ EXPORT_SYMBOL_GPL(shmem_file_setup);
* @mnt: the tmpfs mount where the file will be created
* @name: name for dentry (to be seen in /proc/<pid>/maps)
* @size: size to be set for the file
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
* @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size
*/
struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name,
loff_t size, unsigned long flags)
loff_t size, vma_flags_t flags)
{
return __shmem_file_setup(mnt, name, size, flags, 0);
}
EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);
static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, vm_flags_t vm_flags)
static struct file *__shmem_zero_setup(unsigned long start, unsigned long end,
vma_flags_t flags)
{
loff_t size = end - start;
@ -5914,7 +5921,7 @@ static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, v
* accessible to the user through its mapping, use S_PRIVATE flag to
* bypass file security, in the same way as shmem_kernel_file_setup().
*/
return shmem_kernel_file_setup("dev/zero", size, vm_flags);
return shmem_kernel_file_setup("dev/zero", size, flags);
}
/**
@ -5924,7 +5931,7 @@ static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, v
*/
int shmem_zero_setup(struct vm_area_struct *vma)
{
struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->vm_flags);
struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->flags);
if (IS_ERR(file))
return PTR_ERR(file);
@ -5945,7 +5952,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
*/
int shmem_zero_setup_desc(struct vm_area_desc *desc)
{
struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vm_flags);
struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vma_flags);
if (IS_ERR(file))
return PTR_ERR(file);

View file

@ -1154,7 +1154,7 @@ int __compat_vma_mmap(const struct file_operations *f_op,
.pgoff = vma->vm_pgoff,
.vm_file = vma->vm_file,
.vm_flags = vma->vm_flags,
.vma_flags = vma->flags,
.page_prot = vma->vm_page_prot,
.action.type = MMAP_NOTHING, /* Default */

View file

@ -15,7 +15,10 @@ struct mmap_state {
unsigned long end;
pgoff_t pgoff;
unsigned long pglen;
union {
vm_flags_t vm_flags;
vma_flags_t vma_flags;
};
struct file *file;
pgprot_t page_prot;
@ -472,19 +475,16 @@ void remove_vma(struct vm_area_struct *vma)
*
* Called with the mm semaphore held.
*/
void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct vm_area_struct *next)
void unmap_region(struct unmap_desc *unmap)
{
struct mm_struct *mm = vma->vm_mm;
struct mm_struct *mm = unmap->first->vm_mm;
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm);
unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end);
mas_set(mas, vma->vm_end);
free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
next ? next->vm_start : USER_PGTABLES_CEILING,
/* mm_wr_locked = */ true);
unmap_vmas(&tlb, unmap);
mas_set(unmap->mas, unmap->tree_reset);
free_pgtables(&tlb, unmap);
tlb_finish_mmu(&tlb);
}
@ -1256,26 +1256,32 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
struct ma_state *mas_detach, bool mm_wr_locked)
{
struct mmu_gather tlb;
if (!vms->clear_ptes) /* Nothing to do */
return;
struct unmap_desc unmap = {
.mas = mas_detach,
.first = vms->vma,
/* start and end may be different if there is no prev or next vma. */
.pg_start = vms->unmap_start,
.pg_end = vms->unmap_end,
.vma_start = vms->start,
.vma_end = vms->end,
/*
* The tree limits and reset differ from the normal case since it's a
* side-tree
*/
.tree_reset = 1,
.tree_end = vms->vma_count,
/*
* We can free page tables without write-locking mmap_lock because VMAs
* were isolated before we downgraded mmap_lock.
*/
mas_set(mas_detach, 1);
tlb_gather_mmu(&tlb, vms->vma->vm_mm);
update_hiwater_rss(vms->vma->vm_mm);
unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
vms->vma_count);
.mm_wr_locked = mm_wr_locked,
};
if (!vms->clear_ptes) /* Nothing to do */
return;
mas_set(mas_detach, 1);
/* start and end may be different if there is no prev or next vma. */
free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start,
vms->unmap_end, mm_wr_locked);
tlb_finish_mmu(&tlb);
unmap_region(&unmap);
vms->clear_ptes = false;
}
@ -2366,7 +2372,7 @@ static void set_desc_from_map(struct vm_area_desc *desc,
desc->pgoff = map->pgoff;
desc->vm_file = map->file;
desc->vm_flags = map->vm_flags;
desc->vma_flags = map->vma_flags;
desc->page_prot = map->page_prot;
}
@ -2461,13 +2467,14 @@ static int __mmap_new_file_vma(struct mmap_state *map,
error = mmap_file(vma->vm_file, vma);
if (error) {
UNMAP_STATE(unmap, vmi, vma, vma->vm_start, vma->vm_end,
map->prev, map->next);
fput(vma->vm_file);
vma->vm_file = NULL;
vma_iter_set(vmi, vma->vm_end);
/* Undo any partial mapping done by a device driver. */
unmap_region(&vmi->mas, vma, map->prev, map->next);
unmap_region(&unmap);
return error;
}
@ -2646,7 +2653,7 @@ static int call_mmap_prepare(struct mmap_state *map,
map->file_doesnt_need_get = true;
map->file = desc->vm_file;
}
map->vm_flags = desc->vm_flags;
map->vma_flags = desc->vma_flags;
map->page_prot = desc->page_prot;
/* User-defined fields. */
map->vm_ops = desc->vm_ops;
@ -2819,7 +2826,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
return -EINVAL;
/* Map writable and ensure this isn't a sealed memfd. */
if (file && is_shared_maywrite(vm_flags)) {
if (file && is_shared_maywrite_vm_flags(vm_flags)) {
int error = mapping_map_writable(file->f_mapping);
if (error)
@ -3049,7 +3056,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
return -ENOMEM;
/* mlock limit tests */
if (!mlock_future_ok(mm, vma->vm_flags, grow << PAGE_SHIFT))
if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, grow << PAGE_SHIFT))
return -ENOMEM;
/* Check to ensure the stack will not grow into a hugetlb-only region */

View file

@ -155,6 +155,72 @@ struct vma_merge_struct {
};
struct unmap_desc {
struct ma_state *mas; /* the maple state point to the first vma */
struct vm_area_struct *first; /* The first vma */
unsigned long pg_start; /* The first pagetable address to free (floor) */
unsigned long pg_end; /* The last pagetable address to free (ceiling) */
unsigned long vma_start; /* The min vma address */
unsigned long vma_end; /* The max vma address */
unsigned long tree_end; /* Maximum for the vma tree search */
unsigned long tree_reset; /* Where to reset the vma tree walk */
bool mm_wr_locked; /* If the mmap write lock is held */
};
/*
* unmap_all_init() - Initialize unmap_desc to remove all vmas, point the
* pg_start and pg_end to a safe location.
*/
static inline void unmap_all_init(struct unmap_desc *unmap,
struct vma_iterator *vmi, struct vm_area_struct *vma)
{
unmap->mas = &vmi->mas;
unmap->first = vma;
unmap->pg_start = FIRST_USER_ADDRESS;
unmap->pg_end = USER_PGTABLES_CEILING;
unmap->vma_start = 0;
unmap->vma_end = ULONG_MAX;
unmap->tree_end = ULONG_MAX;
unmap->tree_reset = vma->vm_end;
unmap->mm_wr_locked = false;
}
/*
* unmap_pgtable_init() - Initialize unmap_desc to remove all page tables within
* the user range.
*
* ARM can have mappings outside of vmas.
* See: e2cdef8c847b4 ("[PATCH] freepgt: free_pgtables from FIRST_USER_ADDRESS")
*
* ARM LPAE uses page table mappings beyond the USER_PGTABLES_CEILING
* See: CONFIG_ARM_LPAE in arch/arm/include/asm/pgtable.h
*/
static inline void unmap_pgtable_init(struct unmap_desc *unmap,
struct vma_iterator *vmi)
{
vma_iter_set(vmi, unmap->tree_reset);
unmap->vma_start = FIRST_USER_ADDRESS;
unmap->vma_end = USER_PGTABLES_CEILING;
unmap->tree_end = USER_PGTABLES_CEILING;
}
#define UNMAP_STATE(name, _vmi, _vma, _vma_start, _vma_end, _prev, _next) \
struct unmap_desc name = { \
.mas = &(_vmi)->mas, \
.first = _vma, \
.pg_start = _prev ? ((struct vm_area_struct *)_prev)->vm_end : \
FIRST_USER_ADDRESS, \
.pg_end = _next ? ((struct vm_area_struct *)_next)->vm_start : \
USER_PGTABLES_CEILING, \
.vma_start = _vma_start, \
.vma_end = _vma_end, \
.tree_end = _next ? \
((struct vm_area_struct *)_next)->vm_start : \
USER_PGTABLES_CEILING, \
.tree_reset = _vma->vm_end, \
.mm_wr_locked = true, \
}
static inline bool vmg_nomem(struct vma_merge_struct *vmg)
{
return vmg->state == VMA_MERGE_ERROR_NOMEM;
@ -243,8 +309,7 @@ static inline void set_vma_from_desc(struct vm_area_struct *vma,
vma->vm_pgoff = desc->pgoff;
if (desc->vm_file != vma->vm_file)
vma_set_file(vma, desc->vm_file);
if (desc->vm_flags != vma->vm_flags)
vm_flags_set(vma, desc->vm_flags);
vma->flags = desc->vma_flags;
vma->vm_page_prot = desc->page_prot;
/* User-defined fields. */
@ -262,9 +327,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
bool unlock);
void remove_vma(struct vm_area_struct *vma);
void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct vm_area_struct *next);
void unmap_region(struct unmap_desc *unmap);
/**
* vma_modify_flags() - Perform any necessary split/merge in preparation for

View file

@ -46,6 +46,7 @@
#include <linux/swap.h>
#include <linux/uprobes.h>
#include <linux/userfaultfd_k.h>
#include <linux/pgtable.h>
#include <asm/current.h>
#include <asm/tlb.h>

View file

@ -343,19 +343,21 @@ static void flush_reclaim_state(struct scan_control *sc)
static bool can_demote(int nid, struct scan_control *sc,
struct mem_cgroup *memcg)
{
int demotion_nid;
struct pglist_data *pgdat = NODE_DATA(nid);
nodemask_t allowed_mask;
if (!numa_demotion_enabled)
if (!pgdat || !numa_demotion_enabled)
return false;
if (sc && sc->no_demotion)
return false;
demotion_nid = next_demotion_node(nid);
if (demotion_nid == NUMA_NO_NODE)
node_get_allowed_targets(pgdat, &allowed_mask);
if (nodes_empty(allowed_mask))
return false;
/* If demotion node isn't in the cgroup's mems_allowed, fall back */
return mem_cgroup_node_allowed(memcg, demotion_nid);
/* Filter out nodes that are not in cgroup's mems_allowed. */
mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
return !nodes_empty(allowed_mask);
}
static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
@ -1017,9 +1019,10 @@ static struct folio *alloc_demote_folio(struct folio *src,
* Folios which are not demoted are left on @demote_folios.
*/
static unsigned int demote_folio_list(struct list_head *demote_folios,
struct pglist_data *pgdat)
struct pglist_data *pgdat,
struct mem_cgroup *memcg)
{
int target_nid = next_demotion_node(pgdat->node_id);
int target_nid;
unsigned int nr_succeeded;
nodemask_t allowed_mask;
@ -1031,7 +1034,6 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
*/
.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
__GFP_NOMEMALLOC | GFP_NOWAIT,
.nid = target_nid,
.nmask = &allowed_mask,
.reason = MR_DEMOTION,
};
@ -1039,10 +1041,17 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
if (list_empty(demote_folios))
return 0;
if (target_nid == NUMA_NO_NODE)
node_get_allowed_targets(pgdat, &allowed_mask);
mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
if (nodes_empty(allowed_mask))
return 0;
node_get_allowed_targets(pgdat, &allowed_mask);
target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
if (target_nid == NUMA_NO_NODE)
/* No lower-tier nodes or nodes were hot-unplugged. */
return 0;
mtc.nid = target_nid;
/* Demotion ignores all cpuset and mempolicy settings */
migrate_pages(demote_folios, alloc_demote_folio, NULL,
@ -1564,7 +1573,7 @@ keep:
/* 'folio_list' is always empty here */
/* Migrate folios selected for demotion */
nr_demoted = demote_folio_list(&demote_folios, pgdat);
nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
nr_reclaimed += nr_demoted;
stat->nr_demoted += nr_demoted;
/* Folios that could not be demoted are still in @demote_folios */

View file

@ -103,7 +103,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
0, enckey);
/* save aligned data to file */
file = shmem_kernel_file_setup("", enclen, 0);
file = shmem_kernel_file_setup("", enclen, EMPTY_VMA_FLAGS);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
goto err_enckey;

View file

@ -24,6 +24,10 @@ void __bitmap_set(unsigned long *map, unsigned int start, int len);
void __bitmap_clear(unsigned long *map, unsigned int start, int len);
bool __bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
bool __bitmap_subset(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
@ -81,6 +85,15 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
__bitmap_or(dst, src1, src2, nbits);
}
static __always_inline
bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_andnot(dst, src1, src2, nbits);
}
static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused)
{
return malloc(bitmap_size(nbits));
@ -157,6 +170,15 @@ static inline bool bitmap_intersects(const unsigned long *src1,
return __bitmap_intersects(src1, src2, nbits);
}
static __always_inline
bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
else
return __bitmap_subset(src1, src2, nbits);
}
static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
{
if (__builtin_constant_p(nbits) && nbits == 1)

View file

@ -140,3 +140,32 @@ void __bitmap_clear(unsigned long *map, unsigned int start, int len)
*p &= ~mask_to_clear;
}
}
bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
unsigned int k;
unsigned int lim = bits/BITS_PER_LONG;
unsigned long result = 0;
for (k = 0; k < lim; k++)
result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
if (bits % BITS_PER_LONG)
result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
BITMAP_LAST_WORD_MASK(bits));
return result != 0;
}
bool __bitmap_subset(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] & ~bitmap2[k])
return false;
if (bits % BITS_PER_LONG)
if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
return false;
return true;
}

View file

@ -12,6 +12,7 @@ map_hugetlb
map_populate
thuge-gen
compaction_test
memory-failure
migration
mlock2-tests
mrelease_test

View file

@ -75,6 +75,7 @@ TEST_GEN_FILES += map_populate
ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64 loongarch32 loongarch64))
TEST_GEN_FILES += memfd_secret
endif
TEST_GEN_FILES += memory-failure
TEST_GEN_FILES += migration
TEST_GEN_FILES += mkdirty
TEST_GEN_FILES += mlock-random-test
@ -154,6 +155,7 @@ TEST_PROGS += ksft_ksm_numa.sh
TEST_PROGS += ksft_madv_guard.sh
TEST_PROGS += ksft_madv_populate.sh
TEST_PROGS += ksft_memfd_secret.sh
TEST_PROGS += ksft_memory_failure.sh
TEST_PROGS += ksft_migration.sh
TEST_PROGS += ksft_mkdirty.sh
TEST_PROGS += ksft_mlock.sh

View file

@ -11,3 +11,5 @@ CONFIG_ANON_VMA_NAME=y
CONFIG_FTRACE=y
CONFIG_PROFILING=y
CONFIG_UPROBES=y
CONFIG_MEMORY_FAILURE=y
CONFIG_HWPOISON_INJECT=m

View file

@ -0,0 +1,4 @@
#!/bin/sh -e
# SPDX-License-Identifier: GPL-2.0
./run_vmtests.sh -t memory-failure

View file

@ -0,0 +1,359 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Memory-failure functional tests.
*
* Author(s): Miaohe Lin <linmiaohe@huawei.com>
*/
#include "../kselftest_harness.h"
#include <sys/mman.h>
#include <linux/mman.h>
#include <linux/string.h>
#include <unistd.h>
#include <signal.h>
#include <setjmp.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/vfs.h>
#include <linux/magic.h>
#include <errno.h>
#include "vm_util.h"
enum inject_type {
MADV_HARD,
MADV_SOFT,
};
enum result_type {
MADV_HARD_ANON,
MADV_HARD_CLEAN_PAGECACHE,
MADV_HARD_DIRTY_PAGECACHE,
MADV_SOFT_ANON,
MADV_SOFT_CLEAN_PAGECACHE,
MADV_SOFT_DIRTY_PAGECACHE,
};
static jmp_buf signal_jmp_buf;
static siginfo_t siginfo;
const char *pagemap_proc = "/proc/self/pagemap";
const char *kpageflags_proc = "/proc/kpageflags";
FIXTURE(memory_failure)
{
unsigned long page_size;
unsigned long corrupted_size;
unsigned long pfn;
int pagemap_fd;
int kpageflags_fd;
bool triggered;
};
FIXTURE_VARIANT(memory_failure)
{
enum inject_type type;
int (*inject)(FIXTURE_DATA(memory_failure) * self, void *vaddr);
};
static int madv_hard_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
{
return madvise(vaddr, self->page_size, MADV_HWPOISON);
}
FIXTURE_VARIANT_ADD(memory_failure, madv_hard)
{
.type = MADV_HARD,
.inject = madv_hard_inject,
};
static int madv_soft_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
{
return madvise(vaddr, self->page_size, MADV_SOFT_OFFLINE);
}
FIXTURE_VARIANT_ADD(memory_failure, madv_soft)
{
.type = MADV_SOFT,
.inject = madv_soft_inject,
};
static void sigbus_action(int signo, siginfo_t *si, void *args)
{
memcpy(&siginfo, si, sizeof(siginfo_t));
siglongjmp(signal_jmp_buf, 1);
}
static int setup_sighandler(void)
{
struct sigaction sa = {
.sa_sigaction = sigbus_action,
.sa_flags = SA_SIGINFO,
};
return sigaction(SIGBUS, &sa, NULL);
}
FIXTURE_SETUP(memory_failure)
{
memset(self, 0, sizeof(*self));
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
memset(&siginfo, 0, sizeof(siginfo));
if (setup_sighandler())
SKIP(return, "setup sighandler failed.\n");
self->pagemap_fd = open(pagemap_proc, O_RDONLY);
if (self->pagemap_fd == -1)
SKIP(return, "open %s failed.\n", pagemap_proc);
self->kpageflags_fd = open(kpageflags_proc, O_RDONLY);
if (self->kpageflags_fd == -1)
SKIP(return, "open %s failed.\n", kpageflags_proc);
}
static void teardown_sighandler(void)
{
struct sigaction sa = {
.sa_handler = SIG_DFL,
.sa_flags = SA_SIGINFO,
};
sigaction(SIGBUS, &sa, NULL);
}
FIXTURE_TEARDOWN(memory_failure)
{
close(self->kpageflags_fd);
close(self->pagemap_fd);
teardown_sighandler();
}
static void prepare(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
void *vaddr)
{
self->pfn = pagemap_get_pfn(self->pagemap_fd, vaddr);
ASSERT_NE(self->pfn, -1UL);
ASSERT_EQ(get_hardware_corrupted_size(&self->corrupted_size), 0);
}
static bool check_memory(void *vaddr, unsigned long size)
{
char buf[64];
memset(buf, 0xce, sizeof(buf));
while (size >= sizeof(buf)) {
if (memcmp(vaddr, buf, sizeof(buf)))
return false;
size -= sizeof(buf);
vaddr += sizeof(buf);
}
return true;
}
static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
void *vaddr, enum result_type type, int setjmp)
{
unsigned long size;
uint64_t pfn_flags;
switch (type) {
case MADV_SOFT_ANON:
case MADV_HARD_CLEAN_PAGECACHE:
case MADV_SOFT_CLEAN_PAGECACHE:
case MADV_SOFT_DIRTY_PAGECACHE:
/* It is not expected to receive a SIGBUS signal. */
ASSERT_EQ(setjmp, 0);
/* The page content should remain unchanged. */
ASSERT_TRUE(check_memory(vaddr, self->page_size));
/* The backing pfn of addr should have changed. */
ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn);
break;
case MADV_HARD_ANON:
case MADV_HARD_DIRTY_PAGECACHE:
/* The SIGBUS signal should have been received. */
ASSERT_EQ(setjmp, 1);
/* Check if siginfo contains correct SIGBUS context. */
ASSERT_EQ(siginfo.si_signo, SIGBUS);
ASSERT_EQ(siginfo.si_code, BUS_MCEERR_AR);
ASSERT_EQ(1UL << siginfo.si_addr_lsb, self->page_size);
ASSERT_EQ(siginfo.si_addr, vaddr);
/* XXX Check backing pte is hwpoison entry when supported. */
ASSERT_TRUE(pagemap_is_swapped(self->pagemap_fd, vaddr));
break;
default:
SKIP(return, "unexpected inject type %d.\n", type);
}
/* Check if the value of HardwareCorrupted has increased. */
ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
ASSERT_EQ(size, self->corrupted_size + self->page_size / 1024);
/* Check if HWPoison flag is set. */
ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
ASSERT_EQ(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
}
static void cleanup(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
void *vaddr)
{
unsigned long size;
uint64_t pfn_flags;
ASSERT_EQ(unpoison_memory(self->pfn), 0);
/* Check if HWPoison flag is cleared. */
ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
ASSERT_NE(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
/* Check if the value of HardwareCorrupted has decreased. */
ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
ASSERT_EQ(size, self->corrupted_size);
}
TEST_F(memory_failure, anon)
{
char *addr;
int ret;
addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (addr == MAP_FAILED)
SKIP(return, "mmap failed, not enough memory.\n");
memset(addr, 0xce, self->page_size);
prepare(_metadata, self, addr);
ret = sigsetjmp(signal_jmp_buf, 1);
if (!self->triggered) {
self->triggered = true;
ASSERT_EQ(variant->inject(self, addr), 0);
FORCE_READ(*addr);
}
if (variant->type == MADV_HARD)
check(_metadata, self, addr, MADV_HARD_ANON, ret);
else
check(_metadata, self, addr, MADV_SOFT_ANON, ret);
cleanup(_metadata, self, addr);
ASSERT_EQ(munmap(addr, self->page_size), 0);
}
static int prepare_file(const char *fname, unsigned long size)
{
int fd;
fd = open(fname, O_RDWR | O_CREAT, 0664);
if (fd >= 0) {
unlink(fname);
ftruncate(fd, size);
}
return fd;
}
/* Borrowed from mm/gup_longterm.c. */
static int get_fs_type(int fd)
{
struct statfs fs;
int ret;
do {
ret = fstatfs(fd, &fs);
} while (ret && errno == EINTR);
return ret ? 0 : (int)fs.f_type;
}
TEST_F(memory_failure, clean_pagecache)
{
int fd;
char *addr;
int ret;
int fs_type;
fd = prepare_file("./clean-page-cache-test-file", self->page_size);
if (fd < 0)
SKIP(return, "failed to open test file.\n");
fs_type = get_fs_type(fd);
if (!fs_type || fs_type == TMPFS_MAGIC)
SKIP(return, "unsupported filesystem :%x\n", fs_type);
addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (addr == MAP_FAILED)
SKIP(return, "mmap failed, not enough memory.\n");
memset(addr, 0xce, self->page_size);
fsync(fd);
prepare(_metadata, self, addr);
ret = sigsetjmp(signal_jmp_buf, 1);
if (!self->triggered) {
self->triggered = true;
ASSERT_EQ(variant->inject(self, addr), 0);
FORCE_READ(*addr);
}
if (variant->type == MADV_HARD)
check(_metadata, self, addr, MADV_HARD_CLEAN_PAGECACHE, ret);
else
check(_metadata, self, addr, MADV_SOFT_CLEAN_PAGECACHE, ret);
cleanup(_metadata, self, addr);
ASSERT_EQ(munmap(addr, self->page_size), 0);
ASSERT_EQ(close(fd), 0);
}
TEST_F(memory_failure, dirty_pagecache)
{
int fd;
char *addr;
int ret;
int fs_type;
fd = prepare_file("./dirty-page-cache-test-file", self->page_size);
if (fd < 0)
SKIP(return, "failed to open test file.\n");
fs_type = get_fs_type(fd);
if (!fs_type || fs_type == TMPFS_MAGIC)
SKIP(return, "unsupported filesystem :%x\n", fs_type);
addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
if (addr == MAP_FAILED)
SKIP(return, "mmap failed, not enough memory.\n");
memset(addr, 0xce, self->page_size);
prepare(_metadata, self, addr);
ret = sigsetjmp(signal_jmp_buf, 1);
if (!self->triggered) {
self->triggered = true;
ASSERT_EQ(variant->inject(self, addr), 0);
FORCE_READ(*addr);
}
if (variant->type == MADV_HARD)
check(_metadata, self, addr, MADV_HARD_DIRTY_PAGECACHE, ret);
else
check(_metadata, self, addr, MADV_SOFT_DIRTY_PAGECACHE, ret);
cleanup(_metadata, self, addr);
ASSERT_EQ(munmap(addr, self->page_size), 0);
ASSERT_EQ(close(fd), 0);
}
TEST_HARNESS_MAIN

View file

@ -91,6 +91,8 @@ separated by spaces:
test VMA merge cases behave as expected
- rmap
test rmap behaves as expected
- memory-failure
test memory-failure behaves as expected
example: ./run_vmtests.sh -t "hmm mmap ksm"
EOF
@ -527,6 +529,25 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
CATEGORY="rmap" run_test ./rmap
# Try to load hwpoison_inject if not present.
HWPOISON_DIR=/sys/kernel/debug/hwpoison/
if [ ! -d "$HWPOISON_DIR" ]; then
if ! modprobe -q -R hwpoison_inject; then
echo "Module hwpoison_inject not found, skipping..."
else
modprobe hwpoison_inject > /dev/null 2>&1
LOADED_MOD=1
fi
fi
if [ -d "$HWPOISON_DIR" ]; then
CATEGORY="memory-failure" run_test ./memory-failure
fi
if [ -n "${LOADED_MOD}" ]; then
modprobe -r hwpoison_inject > /dev/null 2>&1
fi
if [ "${HAVE_HUGEPAGES}" = 1 ]; then
echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages
fi

View file

@ -723,3 +723,44 @@ int ksm_stop(void)
close(ksm_fd);
return ret == 1 ? 0 : -errno;
}
int get_hardware_corrupted_size(unsigned long *val)
{
unsigned long size;
char *line = NULL;
size_t linelen = 0;
FILE *f = fopen("/proc/meminfo", "r");
int ret = -1;
if (!f)
return ret;
while (getline(&line, &linelen, f) > 0) {
if (sscanf(line, "HardwareCorrupted: %12lu kB", &size) == 1) {
*val = size;
ret = 0;
break;
}
}
free(line);
fclose(f);
return ret;
}
int unpoison_memory(unsigned long pfn)
{
int unpoison_fd, len;
char buf[32];
ssize_t ret;
unpoison_fd = open("/sys/kernel/debug/hwpoison/unpoison-pfn", O_WRONLY);
if (unpoison_fd < 0)
return -errno;
len = sprintf(buf, "0x%lx\n", pfn);
ret = write(unpoison_fd, buf, len);
close(unpoison_fd);
return ret > 0 ? 0 : -errno;
}

View file

@ -20,6 +20,7 @@
#define KPF_COMPOUND_HEAD BIT_ULL(15)
#define KPF_COMPOUND_TAIL BIT_ULL(16)
#define KPF_HWPOISON BIT_ULL(19)
#define KPF_THP BIT_ULL(22)
/*
* Ignore the checkpatch warning, we must read from x but don't want to do
@ -154,6 +155,8 @@ long ksm_get_full_scans(void);
int ksm_use_zero_pages(void);
int ksm_start(void);
int ksm_stop(void);
int get_hardware_corrupted_size(unsigned long *val);
int unpoison_memory(unsigned long pfn);
/*
* On ppc64 this will only work with radix 2M hugepage size

View file

@ -6,10 +6,13 @@ default: vma
include ../shared/shared.mk
OFILES = $(SHARED_OFILES) vma.o maple-shim.o
OFILES = $(SHARED_OFILES) main.o shared.o maple-shim.o
TARGETS = vma
vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
# These can be varied to test different sizes.
CFLAGS += -DNUM_VMA_FLAG_BITS=128 -DNUM_MM_FLAG_BITS=128
main.o: main.c shared.c shared.h vma_internal.h tests/merge.c tests/mmap.c tests/vma.c ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h include/custom.h include/dup.h include/stubs.h
vma: $(OFILES)
$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)

View file

@ -0,0 +1,119 @@
/* SPDX-License-Identifier: GPL-2.0+ */
#pragma once
/*
* Contains declarations that exist in the kernel which have been CUSTOMISED for
* testing purposes to faciliate userland VMA testing.
*/
#ifdef CONFIG_MMU
extern unsigned long mmap_min_addr;
extern unsigned long dac_mmap_min_addr;
#else
#define mmap_min_addr 0UL
#define dac_mmap_min_addr 0UL
#endif
#define VM_WARN_ON(_expr) (WARN_ON(_expr))
#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
#define VM_BUG_ON(_expr) (BUG_ON(_expr))
#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
/* We hardcode this for now. */
#define sysctl_max_map_count 0x1000000UL
#define TASK_SIZE ((1ul << 47)-PAGE_SIZE)
/*
* The shared stubs do not implement this, it amounts to an fprintf(STDERR,...)
* either way :)
*/
#define pr_warn_once pr_err
#define pgtable_supports_soft_dirty() 1
struct anon_vma {
struct anon_vma *root;
struct rb_root_cached rb_root;
/* Test fields. */
bool was_cloned;
bool was_unlinked;
};
static inline void unlink_anon_vmas(struct vm_area_struct *vma)
{
/* For testing purposes, indicate that the anon_vma was unlinked. */
vma->anon_vma->was_unlinked = true;
}
static inline void vma_start_write(struct vm_area_struct *vma)
{
/* Used to indicate to tests that a write operation has begun. */
vma->vm_lock_seq++;
}
static inline __must_check
int vma_start_write_killable(struct vm_area_struct *vma)
{
/* Used to indicate to tests that a write operation has begun. */
vma->vm_lock_seq++;
return 0;
}
static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src,
enum vma_operation operation)
{
/* For testing purposes. We indicate that an anon_vma has been cloned. */
if (src->anon_vma != NULL) {
dst->anon_vma = src->anon_vma;
dst->anon_vma->was_cloned = true;
}
return 0;
}
static inline int __anon_vma_prepare(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
if (!anon_vma)
return -ENOMEM;
anon_vma->root = anon_vma;
vma->anon_vma = anon_vma;
return 0;
}
static inline int anon_vma_prepare(struct vm_area_struct *vma)
{
if (likely(vma->anon_vma))
return 0;
return __anon_vma_prepare(vma);
}
static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
{
if (reset_refcnt)
refcount_set(&vma->vm_refcnt, 0);
}
static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
{
vma_flags_t flags;
int i;
/*
* For testing purposes: allow invalid bit specification so we can
* easily test.
*/
vma_flags_clear_all(&flags);
for (i = 0; i < count; i++)
if (bits[i] < NUM_VMA_FLAG_BITS)
vma_flag_set(&flags, bits[i]);
return flags;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,428 @@
/* SPDX-License-Identifier: GPL-2.0+ */
#pragma once
/*
* Contains declarations that are STUBBED, that is that are rendered no-ops, in
* order to faciliate userland VMA testing.
*/
/* Forward declarations. */
struct mm_struct;
struct vm_area_struct;
struct vm_area_desc;
struct pagetable_move_control;
struct mmap_action;
struct file;
struct anon_vma;
struct anon_vma_chain;
struct address_space;
struct unmap_desc;
#define __bitwise
#define __randomize_layout
#define FIRST_USER_ADDRESS 0UL
#define USER_PGTABLES_CEILING 0UL
#define vma_policy(vma) NULL
#define down_write_nest_lock(sem, nest_lock)
#define data_race(expr) expr
#define ASSERT_EXCLUSIVE_WRITER(x)
struct vm_userfaultfd_ctx {};
struct mempolicy {};
struct mmu_gather {};
struct mutex {};
struct vm_fault {};
static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf)
{
}
static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
{
return 0;
}
static inline void free_pgd_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
{
}
static inline int ksm_execve(struct mm_struct *mm)
{
return 0;
}
static inline void ksm_exit(struct mm_struct *mm)
{
}
static inline void vma_numab_state_init(struct vm_area_struct *vma)
{
}
static inline void vma_numab_state_free(struct vm_area_struct *vma)
{
}
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
struct vm_area_struct *new_vma)
{
}
static inline void free_anon_vma_name(struct vm_area_struct *vma)
{
}
static inline void mmap_action_prepare(struct mmap_action *action,
struct vm_area_desc *desc)
{
}
static inline int mmap_action_complete(struct mmap_action *action,
struct vm_area_struct *vma)
{
return 0;
}
static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
}
static inline bool shmem_file(struct file *file)
{
return false;
}
static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
const struct file *file, vm_flags_t vm_flags)
{
return vm_flags;
}
static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
{
}
static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t pgprot)
{
return 0;
}
static inline int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf)
{
return 0;
}
/* Currently stubbed but we may later wish to un-stub. */
static inline void vm_acct_memory(long pages);
static inline void mmap_assert_locked(struct mm_struct *mm)
{
}
static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
{
}
static inline void i_mmap_unlock_write(struct address_space *mapping)
{
}
static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
struct list_head *unmaps)
{
return 0;
}
static inline void mmap_write_downgrade(struct mm_struct *mm)
{
}
static inline void mmap_read_unlock(struct mm_struct *mm)
{
}
static inline void mmap_write_unlock(struct mm_struct *mm)
{
}
static inline int mmap_write_lock_killable(struct mm_struct *mm)
{
return 0;
}
static inline bool can_modify_mm(struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
return true;
}
static inline void arch_unmap(struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
}
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
return true;
}
static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
vm_flags_t vm_flags)
{
}
static inline bool mapping_can_writeback(struct address_space *mapping)
{
return true;
}
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
{
return false;
}
static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
{
return false;
}
static inline bool userfaultfd_wp(struct vm_area_struct *vma)
{
return false;
}
static inline void mmap_assert_write_locked(struct mm_struct *mm)
{
}
static inline void mutex_lock(struct mutex *lock)
{
}
static inline void mutex_unlock(struct mutex *lock)
{
}
static inline bool mutex_is_locked(struct mutex *lock)
{
return true;
}
static inline bool signal_pending(void *p)
{
return false;
}
static inline bool is_file_hugepages(struct file *file)
{
return false;
}
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
{
return 0;
}
static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
unsigned long npages)
{
return true;
}
static inline int shmem_zero_setup(struct vm_area_struct *vma)
{
return 0;
}
static inline void vm_acct_memory(long pages)
{
}
static inline void vma_interval_tree_insert(struct vm_area_struct *vma,
struct rb_root_cached *rb)
{
}
static inline void vma_interval_tree_remove(struct vm_area_struct *vma,
struct rb_root_cached *rb)
{
}
static inline void flush_dcache_mmap_unlock(struct address_space *mapping)
{
}
static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc,
struct rb_root_cached *rb)
{
}
static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc,
struct rb_root_cached *rb)
{
}
static inline void uprobe_mmap(struct vm_area_struct *vma)
{
}
static inline void uprobe_munmap(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
static inline void i_mmap_lock_write(struct address_space *mapping)
{
}
static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
{
}
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{
}
static inline void ksm_add_vma(struct vm_area_struct *vma)
{
}
static inline void perf_event_mmap(struct vm_area_struct *vma)
{
}
static inline bool vma_is_dax(struct vm_area_struct *vma)
{
return false;
}
static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return NULL;
}
static inline bool arch_validate_flags(vm_flags_t flags)
{
return true;
}
static inline void vma_close(struct vm_area_struct *vma)
{
}
static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
{
return 0;
}
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr, unsigned long len)
{
return 0;
}
static inline bool capable(int cap)
{
return true;
}
static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
{
return NULL;
}
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx vm_ctx)
{
return true;
}
static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
struct anon_vma_name *anon_name2)
{
return true;
}
static inline void might_sleep(void)
{
}
static inline void fput(struct file *file)
{
}
static inline void mpol_put(struct mempolicy *pol)
{
}
static inline void lru_add_drain(void)
{
}
static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
{
}
static inline void update_hiwater_rss(struct mm_struct *mm)
{
}
static inline void update_hiwater_vm(struct mm_struct *mm)
{
}
static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
{
}
static inline void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *unmap)
{
}
static inline void mapping_unmap_writable(struct address_space *mapping)
{
}
static inline void flush_dcache_mmap_lock(struct address_space *mapping)
{
}
static inline void tlb_finish_mmu(struct mmu_gather *tlb)
{
}
static inline struct file *get_file(struct file *f)
{
return f;
}
static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
{
return 0;
}
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
struct vm_area_struct *next)
{
}
static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}

55
tools/testing/vma/main.c Normal file
View file

@ -0,0 +1,55 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shared.h"
/*
* Directly import the VMA implementation here. Our vma_internal.h wrapper
* provides userland-equivalent functionality for everything vma.c uses.
*/
#include "../../../mm/vma_init.c"
#include "../../../mm/vma_exec.c"
#include "../../../mm/vma.c"
/* Tests are included directly so they can test static functions in mm/vma.c. */
#include "tests/merge.c"
#include "tests/mmap.c"
#include "tests/vma.c"
/* Helper functions which utilise static kernel functions. */
struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg)
{
struct vm_area_struct *vma;
vma = vma_merge_existing_range(vmg);
if (vma)
vma_assert_attached(vma);
return vma;
}
int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
{
int res;
res = vma_link(mm, vma);
if (!res)
vma_assert_attached(vma);
return res;
}
/* Main test running which invokes tests/ *.c runners. */
int main(void)
{
int num_tests = 0, num_fail = 0;
maple_tree_init();
vma_state_init();
run_merge_tests(&num_tests, &num_fail);
run_mmap_tests(&num_tests, &num_fail);
run_vma_tests(&num_tests, &num_fail);
printf("%d tests run, %d passed, %d failed.\n",
num_tests, num_tests - num_fail, num_fail);
return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
}

131
tools/testing/vma/shared.c Normal file
View file

@ -0,0 +1,131 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shared.h"
bool fail_prealloc;
unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
const struct vm_operations_struct vma_dummy_vm_ops;
struct anon_vma dummy_anon_vma;
struct task_struct __current;
struct vm_area_struct *alloc_vma(struct mm_struct *mm,
unsigned long start, unsigned long end,
pgoff_t pgoff, vm_flags_t vm_flags)
{
struct vm_area_struct *vma = vm_area_alloc(mm);
if (vma == NULL)
return NULL;
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
vm_flags_reset(vma, vm_flags);
vma_assert_detached(vma);
return vma;
}
void detach_free_vma(struct vm_area_struct *vma)
{
vma_mark_detached(vma);
vm_area_free(vma);
}
struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
unsigned long start, unsigned long end,
pgoff_t pgoff, vm_flags_t vm_flags)
{
struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags);
if (vma == NULL)
return NULL;
if (attach_vma(mm, vma)) {
detach_free_vma(vma);
return NULL;
}
/*
* Reset this counter which we use to track whether writes have
* begun. Linking to the tree will have caused this to be incremented,
* which means we will get a false positive otherwise.
*/
vma->vm_lock_seq = UINT_MAX;
return vma;
}
void reset_dummy_anon_vma(void)
{
dummy_anon_vma.was_cloned = false;
dummy_anon_vma.was_unlinked = false;
}
int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
{
struct vm_area_struct *vma;
int count = 0;
fail_prealloc = false;
reset_dummy_anon_vma();
vma_iter_set(vmi, 0);
for_each_vma(*vmi, vma) {
detach_free_vma(vma);
count++;
}
mtree_destroy(&mm->mm_mt);
mm->map_count = 0;
return count;
}
bool vma_write_started(struct vm_area_struct *vma)
{
int seq = vma->vm_lock_seq;
/* We reset after each check. */
vma->vm_lock_seq = UINT_MAX;
/* The vma_start_write() stub simply increments this value. */
return seq > -1;
}
void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
struct anon_vma_chain *avc, struct anon_vma *anon_vma)
{
vma->anon_vma = anon_vma;
INIT_LIST_HEAD(&vma->anon_vma_chain);
list_add(&avc->same_vma, &vma->anon_vma_chain);
avc->anon_vma = vma->anon_vma;
}
void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
struct anon_vma_chain *avc)
{
__vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
}
struct task_struct *get_current(void)
{
return &__current;
}
unsigned long rlimit(unsigned int limit)
{
return (unsigned long)-1;
}
void vma_set_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
pgoff_t pgoff)
{
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
}

114
tools/testing/vma/shared.h Normal file
View file

@ -0,0 +1,114 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "generated/bit-length.h"
#include "maple-shared.h"
#include "vma_internal.h"
#include "../../../mm/vma.h"
/* Simple test runner. Assumes local num_[fail, tests] counters. */
#define TEST(name) \
do { \
(*num_tests)++; \
if (!test_##name()) { \
(*num_fail)++; \
fprintf(stderr, "Test " #name " FAILED\n"); \
} \
} while (0)
#define ASSERT_TRUE(_expr) \
do { \
if (!(_expr)) { \
fprintf(stderr, \
"Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \
__FILE__, __LINE__, __FUNCTION__, #_expr); \
return false; \
} \
} while (0)
#define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr))
#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2))
#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2))
#define IS_SET(_val, _flags) ((_val & _flags) == _flags)
extern bool fail_prealloc;
/* Override vma_iter_prealloc() so we can choose to fail it. */
#define vma_iter_prealloc(vmi, vma) \
(fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL))
#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536
extern unsigned long mmap_min_addr;
extern unsigned long dac_mmap_min_addr;
extern unsigned long stack_guard_gap;
extern const struct vm_operations_struct vma_dummy_vm_ops;
extern struct anon_vma dummy_anon_vma;
extern struct task_struct __current;
/*
* Helper function which provides a wrapper around a merge existing VMA
* operation.
*
* Declared in main.c as uses static VMA function.
*/
struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg);
/*
* Helper function to allocate a VMA and link it to the tree.
*
* Declared in main.c as uses static VMA function.
*/
int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma);
/* Helper function providing a dummy vm_ops->close() method.*/
static inline void dummy_close(struct vm_area_struct *)
{
}
/* Helper function to simply allocate a VMA. */
struct vm_area_struct *alloc_vma(struct mm_struct *mm,
unsigned long start, unsigned long end,
pgoff_t pgoff, vm_flags_t vm_flags);
/* Helper function to detach and free a VMA. */
void detach_free_vma(struct vm_area_struct *vma);
/* Helper function to allocate a VMA and link it to the tree. */
struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
unsigned long start, unsigned long end,
pgoff_t pgoff, vm_flags_t vm_flags);
/*
* Helper function to reset the dummy anon_vma to indicate it has not been
* duplicated.
*/
void reset_dummy_anon_vma(void);
/*
* Helper function to remove all VMAs and destroy the maple tree associated with
* a virtual address space. Returns a count of VMAs in the tree.
*/
int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi);
/* Helper function to determine if VMA has had vma_start_write() performed. */
bool vma_write_started(struct vm_area_struct *vma);
void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
struct anon_vma_chain *avc, struct anon_vma *anon_vma);
/* Provide a simple dummy VMA/anon_vma dummy setup for testing. */
void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
struct anon_vma_chain *avc);
/* Helper function to specify a VMA's range. */
void vma_set_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
pgoff_t pgoff);

View file

@ -1,132 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "generated/bit-length.h"
#include "maple-shared.h"
#include "vma_internal.h"
/* Include so header guard set. */
#include "../../../mm/vma.h"
static bool fail_prealloc;
/* Then override vma_iter_prealloc() so we can choose to fail it. */
#define vma_iter_prealloc(vmi, vma) \
(fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL))
#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536
unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
/*
* Directly import the VMA implementation here. Our vma_internal.h wrapper
* provides userland-equivalent functionality for everything vma.c uses.
*/
#include "../../../mm/vma_init.c"
#include "../../../mm/vma_exec.c"
#include "../../../mm/vma.c"
const struct vm_operations_struct vma_dummy_vm_ops;
static struct anon_vma dummy_anon_vma;
#define ASSERT_TRUE(_expr) \
do { \
if (!(_expr)) { \
fprintf(stderr, \
"Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \
__FILE__, __LINE__, __FUNCTION__, #_expr); \
return false; \
} \
} while (0)
#define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr))
#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2))
#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2))
#define IS_SET(_val, _flags) ((_val & _flags) == _flags)
static struct task_struct __current;
struct task_struct *get_current(void)
{
return &__current;
}
unsigned long rlimit(unsigned int limit)
{
return (unsigned long)-1;
}
/* Helper function to simply allocate a VMA. */
static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
unsigned long start,
unsigned long end,
pgoff_t pgoff,
vm_flags_t vm_flags)
{
struct vm_area_struct *vma = vm_area_alloc(mm);
if (vma == NULL)
return NULL;
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
vm_flags_reset(vma, vm_flags);
vma_assert_detached(vma);
return vma;
}
/* Helper function to allocate a VMA and link it to the tree. */
static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
{
int res;
res = vma_link(mm, vma);
if (!res)
vma_assert_attached(vma);
return res;
}
static void detach_free_vma(struct vm_area_struct *vma)
{
vma_mark_detached(vma);
vm_area_free(vma);
}
/* Helper function to allocate a VMA and link it to the tree. */
static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
unsigned long start,
unsigned long end,
pgoff_t pgoff,
vm_flags_t vm_flags)
{
struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags);
if (vma == NULL)
return NULL;
if (attach_vma(mm, vma)) {
detach_free_vma(vma);
return NULL;
}
/*
* Reset this counter which we use to track whether writes have
* begun. Linking to the tree will have caused this to be incremented,
* which means we will get a false positive otherwise.
*/
vma->vm_lock_seq = UINT_MAX;
return vma;
}
/* Helper function which provides a wrapper around a merge new VMA operation. */
static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
{
@ -146,20 +19,6 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
return vma;
}
/*
* Helper function which provides a wrapper around a merge existing VMA
* operation.
*/
static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg)
{
struct vm_area_struct *vma;
vma = vma_merge_existing_range(vmg);
if (vma)
vma_assert_attached(vma);
return vma;
}
/*
* Helper function which provides a wrapper around the expansion of an existing
* VMA.
@ -173,7 +32,7 @@ static int expand_existing(struct vma_merge_struct *vmg)
* Helper function to reset merge state the associated VMA iterator to a
* specified new range.
*/
static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags)
{
vma_iter_set(vmg->vmi, start);
@ -211,9 +70,8 @@ static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long s
* VMA, link it to the maple tree and return it.
*/
static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
struct vma_merge_struct *vmg,
unsigned long start, unsigned long end,
pgoff_t pgoff, vm_flags_t vm_flags,
struct vma_merge_struct *vmg, unsigned long start,
unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags,
bool *was_merged)
{
struct vm_area_struct *merged;
@ -234,72 +92,6 @@ static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
return alloc_and_link_vma(mm, start, end, pgoff, vm_flags);
}
/*
* Helper function to reset the dummy anon_vma to indicate it has not been
* duplicated.
*/
static void reset_dummy_anon_vma(void)
{
dummy_anon_vma.was_cloned = false;
dummy_anon_vma.was_unlinked = false;
}
/*
* Helper function to remove all VMAs and destroy the maple tree associated with
* a virtual address space. Returns a count of VMAs in the tree.
*/
static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
{
struct vm_area_struct *vma;
int count = 0;
fail_prealloc = false;
reset_dummy_anon_vma();
vma_iter_set(vmi, 0);
for_each_vma(*vmi, vma) {
detach_free_vma(vma);
count++;
}
mtree_destroy(&mm->mm_mt);
mm->map_count = 0;
return count;
}
/* Helper function to determine if VMA has had vma_start_write() performed. */
static bool vma_write_started(struct vm_area_struct *vma)
{
int seq = vma->vm_lock_seq;
/* We reset after each check. */
vma->vm_lock_seq = UINT_MAX;
/* The vma_start_write() stub simply increments this value. */
return seq > -1;
}
/* Helper function providing a dummy vm_ops->close() method.*/
static void dummy_close(struct vm_area_struct *)
{
}
static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
struct anon_vma_chain *avc,
struct anon_vma *anon_vma)
{
vma->anon_vma = anon_vma;
INIT_LIST_HEAD(&vma->anon_vma_chain);
list_add(&avc->same_vma, &vma->anon_vma_chain);
avc->anon_vma = vma->anon_vma;
}
static void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
struct anon_vma_chain *avc)
{
__vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
}
static bool test_simple_merge(void)
{
struct vm_area_struct *vma;
@ -1616,39 +1408,6 @@ static bool test_merge_extend(void)
return true;
}
static bool test_copy_vma(void)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
struct mm_struct mm = {};
bool need_locks = false;
VMA_ITERATOR(vmi, &mm, 0);
struct vm_area_struct *vma, *vma_new, *vma_next;
/* Move backwards and do not merge. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
ASSERT_NE(vma_new, vma);
ASSERT_EQ(vma_new->vm_start, 0);
ASSERT_EQ(vma_new->vm_end, 0x2000);
ASSERT_EQ(vma_new->vm_pgoff, 0);
vma_assert_attached(vma_new);
cleanup_mm(&mm, &vmi);
/* Move a VMA into position next to another and merge the two. */
vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags);
vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
vma_assert_attached(vma_new);
ASSERT_EQ(vma_new, vma_next);
cleanup_mm(&mm, &vmi);
return true;
}
static bool test_expand_only_mode(void)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
@ -1689,73 +1448,8 @@ static bool test_expand_only_mode(void)
return true;
}
static bool test_mmap_region_basic(void)
static void run_merge_tests(int *num_tests, int *num_fail)
{
struct mm_struct mm = {};
unsigned long addr;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, &mm, 0);
current->mm = &mm;
/* Map at 0x300000, length 0x3000. */
addr = __mmap_region(NULL, 0x300000, 0x3000,
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
0x300, NULL);
ASSERT_EQ(addr, 0x300000);
/* Map at 0x250000, length 0x3000. */
addr = __mmap_region(NULL, 0x250000, 0x3000,
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
0x250, NULL);
ASSERT_EQ(addr, 0x250000);
/* Map at 0x303000, merging to 0x300000 of length 0x6000. */
addr = __mmap_region(NULL, 0x303000, 0x3000,
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
0x303, NULL);
ASSERT_EQ(addr, 0x303000);
/* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
addr = __mmap_region(NULL, 0x24d000, 0x3000,
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
0x24d, NULL);
ASSERT_EQ(addr, 0x24d000);
ASSERT_EQ(mm.map_count, 2);
for_each_vma(vmi, vma) {
if (vma->vm_start == 0x300000) {
ASSERT_EQ(vma->vm_end, 0x306000);
ASSERT_EQ(vma->vm_pgoff, 0x300);
} else if (vma->vm_start == 0x24d000) {
ASSERT_EQ(vma->vm_end, 0x253000);
ASSERT_EQ(vma->vm_pgoff, 0x24d);
} else {
ASSERT_FALSE(true);
}
}
cleanup_mm(&mm, &vmi);
return true;
}
int main(void)
{
int num_tests = 0, num_fail = 0;
maple_tree_init();
vma_state_init();
#define TEST(name) \
do { \
num_tests++; \
if (!test_##name()) { \
num_fail++; \
fprintf(stderr, "Test " #name " FAILED\n"); \
} \
} while (0)
/* Very simple tests to kick the tyres. */
TEST(simple_merge);
TEST(simple_modify);
@ -1771,15 +1465,5 @@ int main(void)
TEST(dup_anon_vma);
TEST(vmi_prealloc_fail);
TEST(merge_extend);
TEST(copy_vma);
TEST(expand_only_mode);
TEST(mmap_region_basic);
#undef TEST
printf("%d tests run, %d passed, %d failed.\n",
num_tests, num_tests - num_fail, num_fail);
return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
}

View file

@ -0,0 +1,57 @@
// SPDX-License-Identifier: GPL-2.0-or-later
static bool test_mmap_region_basic(void)
{
struct mm_struct mm = {};
unsigned long addr;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, &mm, 0);
current->mm = &mm;
/* Map at 0x300000, length 0x3000. */
addr = __mmap_region(NULL, 0x300000, 0x3000,
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
0x300, NULL);
ASSERT_EQ(addr, 0x300000);
/* Map at 0x250000, length 0x3000. */
addr = __mmap_region(NULL, 0x250000, 0x3000,
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
0x250, NULL);
ASSERT_EQ(addr, 0x250000);
/* Map at 0x303000, merging to 0x300000 of length 0x6000. */
addr = __mmap_region(NULL, 0x303000, 0x3000,
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
0x303, NULL);
ASSERT_EQ(addr, 0x303000);
/* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
addr = __mmap_region(NULL, 0x24d000, 0x3000,
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
0x24d, NULL);
ASSERT_EQ(addr, 0x24d000);
ASSERT_EQ(mm.map_count, 2);
for_each_vma(vmi, vma) {
if (vma->vm_start == 0x300000) {
ASSERT_EQ(vma->vm_end, 0x306000);
ASSERT_EQ(vma->vm_pgoff, 0x300);
} else if (vma->vm_start == 0x24d000) {
ASSERT_EQ(vma->vm_end, 0x253000);
ASSERT_EQ(vma->vm_pgoff, 0x24d);
} else {
ASSERT_FALSE(true);
}
}
cleanup_mm(&mm, &vmi);
return true;
}
static void run_mmap_tests(int *num_tests, int *num_fail)
{
TEST(mmap_region_basic);
}

View file

@ -0,0 +1,339 @@
// SPDX-License-Identifier: GPL-2.0-or-later
static bool compare_legacy_flags(vm_flags_t legacy_flags, vma_flags_t flags)
{
const unsigned long legacy_val = legacy_flags;
/* The lower word should contain the precise same value. */
const unsigned long flags_lower = flags.__vma_flags[0];
#if NUM_VMA_FLAGS > BITS_PER_LONG
int i;
/* All bits in higher flag values should be zero. */
for (i = 1; i < NUM_VMA_FLAGS / BITS_PER_LONG; i++) {
if (flags.__vma_flags[i] != 0)
return false;
}
#endif
static_assert(sizeof(legacy_flags) == sizeof(unsigned long));
return legacy_val == flags_lower;
}
static bool test_copy_vma(void)
{
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
struct mm_struct mm = {};
bool need_locks = false;
VMA_ITERATOR(vmi, &mm, 0);
struct vm_area_struct *vma, *vma_new, *vma_next;
/* Move backwards and do not merge. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
ASSERT_NE(vma_new, vma);
ASSERT_EQ(vma_new->vm_start, 0);
ASSERT_EQ(vma_new->vm_end, 0x2000);
ASSERT_EQ(vma_new->vm_pgoff, 0);
vma_assert_attached(vma_new);
cleanup_mm(&mm, &vmi);
/* Move a VMA into position next to another and merge the two. */
vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags);
vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
vma_assert_attached(vma_new);
ASSERT_EQ(vma_new, vma_next);
cleanup_mm(&mm, &vmi);
return true;
}
static bool test_vma_flags_unchanged(void)
{
vma_flags_t flags = EMPTY_VMA_FLAGS;
vm_flags_t legacy_flags = 0;
int bit;
struct vm_area_struct vma;
struct vm_area_desc desc;
vma.flags = EMPTY_VMA_FLAGS;
desc.vma_flags = EMPTY_VMA_FLAGS;
for (bit = 0; bit < BITS_PER_LONG; bit++) {
vma_flags_t mask = mk_vma_flags(bit);
legacy_flags |= (1UL << bit);
/* Individual flags. */
vma_flags_set(&flags, bit);
ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags));
/* Via mask. */
vma_flags_set_mask(&flags, mask);
ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags));
/* Same for VMA. */
vma_set_flags(&vma, bit);
ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags));
vma_set_flags_mask(&vma, mask);
ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags));
/* Same for VMA descriptor. */
vma_desc_set_flags(&desc, bit);
ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags));
vma_desc_set_flags_mask(&desc, mask);
ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags));
}
return true;
}
static bool test_vma_flags_cleared(void)
{
const vma_flags_t empty = EMPTY_VMA_FLAGS;
vma_flags_t flags;
int i;
/* Set all bits high. */
memset(&flags, 1, sizeof(flags));
/* Try to clear. */
vma_flags_clear_all(&flags);
/* Equal to EMPTY_VMA_FLAGS? */
ASSERT_EQ(memcmp(&empty, &flags, sizeof(flags)), 0);
/* Make sure every unsigned long entry in bitmap array zero. */
for (i = 0; i < sizeof(flags) / BITS_PER_LONG; i++) {
const unsigned long val = flags.__vma_flags[i];
ASSERT_EQ(val, 0);
}
return true;
}
/*
* Assert that VMA flag functions that operate at the system word level function
* correctly.
*/
static bool test_vma_flags_word(void)
{
vma_flags_t flags = EMPTY_VMA_FLAGS;
const vma_flags_t comparison =
mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, 64, 65);
/* Set some custom high flags. */
vma_flags_set(&flags, 64, 65);
/* Now overwrite the first word. */
vma_flags_overwrite_word(&flags, VM_READ | VM_WRITE);
/* Ensure they are equal. */
ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
flags = EMPTY_VMA_FLAGS;
vma_flags_set(&flags, 64, 65);
/* Do the same with the _once() equivalent. */
vma_flags_overwrite_word_once(&flags, VM_READ | VM_WRITE);
ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
flags = EMPTY_VMA_FLAGS;
vma_flags_set(&flags, 64, 65);
/* Make sure we can set a word without disturbing other bits. */
vma_flags_set(&flags, VMA_WRITE_BIT);
vma_flags_set_word(&flags, VM_READ);
ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
flags = EMPTY_VMA_FLAGS;
vma_flags_set(&flags, 64, 65);
/* Make sure we can clear a word without disturbing other bits. */
vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
vma_flags_clear_word(&flags, VM_EXEC);
ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
return true;
}
/* Ensure that vma_flags_test() and friends works correctly. */
static bool test_vma_flags_test(void)
{
const vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
VMA_EXEC_BIT, 64, 65);
struct vm_area_struct vma;
struct vm_area_desc desc;
vma.flags = flags;
desc.vma_flags = flags;
#define do_test(...) \
ASSERT_TRUE(vma_flags_test(&flags, __VA_ARGS__)); \
ASSERT_TRUE(vma_desc_test_flags(&desc, __VA_ARGS__))
#define do_test_all_true(...) \
ASSERT_TRUE(vma_flags_test_all(&flags, __VA_ARGS__)); \
ASSERT_TRUE(vma_test_all_flags(&vma, __VA_ARGS__))
#define do_test_all_false(...) \
ASSERT_FALSE(vma_flags_test_all(&flags, __VA_ARGS__)); \
ASSERT_FALSE(vma_test_all_flags(&vma, __VA_ARGS__))
/*
* Testing for some flags that are present, some that are not - should
* pass. ANY flags matching should work.
*/
do_test(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT);
/* However, the ...test_all() variant should NOT pass. */
do_test_all_false(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT);
/* But should pass for flags present. */
do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65);
/* Also subsets... */
do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64);
do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT);
do_test_all_true(VMA_READ_BIT);
/*
* Check _mask variant. We don't need to test extensively as macro
* helper is the equivalent.
*/
ASSERT_TRUE(vma_flags_test_mask(&flags, flags));
ASSERT_TRUE(vma_flags_test_all_mask(&flags, flags));
/* Single bits. */
do_test(VMA_READ_BIT);
do_test(VMA_WRITE_BIT);
do_test(VMA_EXEC_BIT);
#if NUM_VMA_FLAG_BITS > 64
do_test(64);
do_test(65);
#endif
/* Two bits. */
do_test(VMA_READ_BIT, VMA_WRITE_BIT);
do_test(VMA_READ_BIT, VMA_EXEC_BIT);
do_test(VMA_WRITE_BIT, VMA_EXEC_BIT);
/* Ordering shouldn't matter. */
do_test(VMA_WRITE_BIT, VMA_READ_BIT);
do_test(VMA_EXEC_BIT, VMA_READ_BIT);
do_test(VMA_EXEC_BIT, VMA_WRITE_BIT);
#if NUM_VMA_FLAG_BITS > 64
do_test(VMA_READ_BIT, 64);
do_test(VMA_WRITE_BIT, 64);
do_test(64, VMA_READ_BIT);
do_test(64, VMA_WRITE_BIT);
do_test(VMA_READ_BIT, 65);
do_test(VMA_WRITE_BIT, 65);
do_test(65, VMA_READ_BIT);
do_test(65, VMA_WRITE_BIT);
#endif
/* Three bits. */
do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
#if NUM_VMA_FLAG_BITS > 64
/* No need to consider every single permutation. */
do_test(VMA_READ_BIT, VMA_WRITE_BIT, 64);
do_test(VMA_READ_BIT, VMA_WRITE_BIT, 65);
/* Four bits. */
do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64);
do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 65);
/* Five bits. */
do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65);
#endif
#undef do_test
#undef do_test_all_true
#undef do_test_all_false
return true;
}
/* Ensure that vma_flags_clear() and friends works correctly. */
static bool test_vma_flags_clear(void)
{
vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
VMA_EXEC_BIT, 64, 65);
vma_flags_t mask = mk_vma_flags(VMA_EXEC_BIT, 64);
struct vm_area_struct vma;
struct vm_area_desc desc;
vma.flags = flags;
desc.vma_flags = flags;
/* Cursory check of _mask() variant, as the helper macros imply. */
vma_flags_clear_mask(&flags, mask);
vma_flags_clear_mask(&vma.flags, mask);
vma_desc_clear_flags_mask(&desc, mask);
ASSERT_FALSE(vma_flags_test(&flags, VMA_EXEC_BIT, 64));
ASSERT_FALSE(vma_flags_test(&vma.flags, VMA_EXEC_BIT, 64));
ASSERT_FALSE(vma_desc_test_flags(&desc, VMA_EXEC_BIT, 64));
/* Reset. */
vma_flags_set(&flags, VMA_EXEC_BIT, 64);
vma_set_flags(&vma, VMA_EXEC_BIT, 64);
vma_desc_set_flags(&desc, VMA_EXEC_BIT, 64);
/*
* Clear the flags and assert clear worked, then reset flags back to
* include specified flags.
*/
#define do_test_and_reset(...) \
vma_flags_clear(&flags, __VA_ARGS__); \
vma_flags_clear(&vma.flags, __VA_ARGS__); \
vma_desc_clear_flags(&desc, __VA_ARGS__); \
ASSERT_FALSE(vma_flags_test(&flags, __VA_ARGS__)); \
ASSERT_FALSE(vma_flags_test(&vma.flags, __VA_ARGS__)); \
ASSERT_FALSE(vma_desc_test_flags(&desc, __VA_ARGS__)); \
vma_flags_set(&flags, __VA_ARGS__); \
vma_set_flags(&vma, __VA_ARGS__); \
vma_desc_set_flags(&desc, __VA_ARGS__)
/* Single flags. */
do_test_and_reset(VMA_READ_BIT);
do_test_and_reset(VMA_WRITE_BIT);
do_test_and_reset(VMA_EXEC_BIT);
do_test_and_reset(64);
do_test_and_reset(65);
/* Two flags, in different orders. */
do_test_and_reset(VMA_READ_BIT, VMA_WRITE_BIT);
do_test_and_reset(VMA_READ_BIT, VMA_EXEC_BIT);
do_test_and_reset(VMA_READ_BIT, 64);
do_test_and_reset(VMA_READ_BIT, 65);
do_test_and_reset(VMA_WRITE_BIT, VMA_READ_BIT);
do_test_and_reset(VMA_WRITE_BIT, VMA_EXEC_BIT);
do_test_and_reset(VMA_WRITE_BIT, 64);
do_test_and_reset(VMA_WRITE_BIT, 65);
do_test_and_reset(VMA_EXEC_BIT, VMA_READ_BIT);
do_test_and_reset(VMA_EXEC_BIT, VMA_WRITE_BIT);
do_test_and_reset(VMA_EXEC_BIT, 64);
do_test_and_reset(VMA_EXEC_BIT, 65);
do_test_and_reset(64, VMA_READ_BIT);
do_test_and_reset(64, VMA_WRITE_BIT);
do_test_and_reset(64, VMA_EXEC_BIT);
do_test_and_reset(64, 65);
do_test_and_reset(65, VMA_READ_BIT);
do_test_and_reset(65, VMA_WRITE_BIT);
do_test_and_reset(65, VMA_EXEC_BIT);
do_test_and_reset(65, 64);
/* Three flags. */
#undef do_test_some_missing
#undef do_test_and_reset
return true;
}
static void run_vma_tests(int *num_tests, int *num_fail)
{
TEST(copy_vma);
TEST(vma_flags_unchanged);
TEST(vma_flags_cleared);
TEST(vma_flags_word);
TEST(vma_flags_test);
TEST(vma_flags_clear);
}

File diff suppressed because it is too large Load diff