mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:24:47 +01:00
mm.git review status for linus..mm-stable
Total patches: 36 Reviews/patch: 1.77 Reviewed rate: 83% - The 2 patch series "mm/vmscan: fix demotion targets checks in reclaim/demotion" from Bing Jiao fixes a couple of issues in the demotion code - pages were failed demotion and were finding themselves demoted into disallowed nodes. - The 11 patch series "Remove XA_ZERO from error recovery of dup_mmap()" from Liam Howlett fixes a rare mapledtree race and performs a number of cleanups. - The 13 patch series "mm: add bitmap VMA flag helpers and convert all mmap_prepare to use them" from Lorenzo Stoakes implements a lot of cleanups following on from the conversion of the VMA flags into a bitmap. - The 5 patch series "support batch checking of references and unmapping for large folios" from Baolin Wang implements batching to greatly improve the performance of reclaiming clean file-backed large folios. - The 3 patch series "selftests/mm: add memory failure selftests" from Miaohe Lin does as claimed. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaZaIEQAKCRDdBJ7gKXxA jj73AQCQDwLoipDiQRGyjB5BDYydymWuDoiB1tlDPHfYAP3b/QD/UQtVlOEXqwM3 naOKs3NQ1pwnfhDaQMirGw2eAnJ1SQY= =6Iif -----END PGP SIGNATURE----- Merge tag 'mm-stable-2026-02-18-19-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull more MM updates from Andrew Morton: - "mm/vmscan: fix demotion targets checks in reclaim/demotion" fixes a couple of issues in the demotion code - pages were failed demotion and were finding themselves demoted into disallowed nodes (Bing Jiao) - "Remove XA_ZERO from error recovery of dup_mmap()" fixes a rare mapledtree race and performs a number of cleanups (Liam Howlett) - "mm: add bitmap VMA flag helpers and convert all mmap_prepare to use them" implements a lot of cleanups following on from the conversion of the VMA flags into a bitmap (Lorenzo Stoakes) - "support batch checking of references and unmapping for large folios" implements batching to greatly improve the performance of reclaiming clean file-backed large folios (Baolin Wang) - "selftests/mm: add memory failure selftests" does as claimed (Miaohe Lin) * tag 'mm-stable-2026-02-18-19-48' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (36 commits) mm/page_alloc: clear page->private in free_pages_prepare() selftests/mm: add memory failure dirty pagecache test selftests/mm: add memory failure clean pagecache test selftests/mm: add memory failure anonymous page test mm: rmap: support batched unmapping for file large folios arm64: mm: implement the architecture-specific clear_flush_young_ptes() arm64: mm: support batch clearing of the young flag for large folios arm64: mm: factor out the address and ptep alignment into a new helper mm: rmap: support batched checks of the references for large folios tools/testing/vma: add VMA userland tests for VMA flag functions tools/testing/vma: separate out vma_internal.h into logical headers tools/testing/vma: separate VMA userland tests into separate files mm: make vm_area_desc utilise vma_flags_t only mm: update all remaining mmap_prepare users to use vma_flags_t mm: update shmem_[kernel]_file_*() functions to use vma_flags_t mm: update secretmem to use VMA flags on mmap_prepare mm: update hugetlbfs to use VMA flags on mmap_prepare mm: add basic VMA flag operation helper functions tools: bitmap: add missing bitmap_[subset(), andnot()] mm: add mk_vma_flags() bitmap flag macro helper ...
This commit is contained in:
commit
eeccf287a2
82 changed files with 3941 additions and 2521 deletions
|
|
@ -11845,6 +11845,7 @@ F: include/linux/memory-failure.h
|
|||
F: include/trace/events/memory-failure.h
|
||||
F: mm/hwpoison-inject.c
|
||||
F: mm/memory-failure.c
|
||||
F: tools/testing/selftests/mm/memory-failure.c
|
||||
|
||||
HYCON HY46XX TOUCHSCREEN SUPPORT
|
||||
M: Giulio Benetti <giulio.benetti@benettiengineering.com>
|
||||
|
|
|
|||
|
|
@ -1648,10 +1648,10 @@ extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
|
|||
extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
unsigned int nr, int full);
|
||||
extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep);
|
||||
extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep);
|
||||
int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep, unsigned int nr);
|
||||
int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep, unsigned int nr);
|
||||
extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned int nr);
|
||||
extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
|
||||
|
|
@ -1823,7 +1823,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
|||
if (likely(!pte_valid_cont(orig_pte)))
|
||||
return __ptep_test_and_clear_young(vma, addr, ptep);
|
||||
|
||||
return contpte_ptep_test_and_clear_young(vma, addr, ptep);
|
||||
return contpte_test_and_clear_young_ptes(vma, addr, ptep, 1);
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
||||
|
|
@ -1835,7 +1835,18 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
|
|||
if (likely(!pte_valid_cont(orig_pte)))
|
||||
return __ptep_clear_flush_young(vma, addr, ptep);
|
||||
|
||||
return contpte_ptep_clear_flush_young(vma, addr, ptep);
|
||||
return contpte_clear_flush_young_ptes(vma, addr, ptep, 1);
|
||||
}
|
||||
|
||||
#define clear_flush_young_ptes clear_flush_young_ptes
|
||||
static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
unsigned int nr)
|
||||
{
|
||||
if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
|
||||
return __ptep_clear_flush_young(vma, addr, ptep);
|
||||
|
||||
return contpte_clear_flush_young_ptes(vma, addr, ptep, nr);
|
||||
}
|
||||
|
||||
#define wrprotect_ptes wrprotect_ptes
|
||||
|
|
|
|||
|
|
@ -26,6 +26,26 @@ static inline pte_t *contpte_align_down(pte_t *ptep)
|
|||
return PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
|
||||
}
|
||||
|
||||
static inline pte_t *contpte_align_addr_ptep(unsigned long *start,
|
||||
unsigned long *end, pte_t *ptep,
|
||||
unsigned int nr)
|
||||
{
|
||||
/*
|
||||
* Note: caller must ensure these nr PTEs are consecutive (present)
|
||||
* PTEs that map consecutive pages of the same large folio within a
|
||||
* single VMA and a single page table.
|
||||
*/
|
||||
if (pte_cont(__ptep_get(ptep + nr - 1)))
|
||||
*end = ALIGN(*end, CONT_PTE_SIZE);
|
||||
|
||||
if (pte_cont(__ptep_get(ptep))) {
|
||||
*start = ALIGN_DOWN(*start, CONT_PTE_SIZE);
|
||||
ptep = contpte_align_down(ptep);
|
||||
}
|
||||
|
||||
return ptep;
|
||||
}
|
||||
|
||||
static void contpte_try_unfold_partial(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned int nr)
|
||||
{
|
||||
|
|
@ -488,8 +508,9 @@ pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes);
|
||||
|
||||
int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
unsigned int nr)
|
||||
{
|
||||
/*
|
||||
* ptep_clear_flush_young() technically requires us to clear the access
|
||||
|
|
@ -498,41 +519,45 @@ int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
|
|||
* contig range when the range is covered by a single folio, we can get
|
||||
* away with clearing young for the whole contig range here, so we avoid
|
||||
* having to unfold.
|
||||
*
|
||||
* The 'nr' means consecutive (present) PTEs that map consecutive pages
|
||||
* of the same large folio in a single VMA and a single page table.
|
||||
*/
|
||||
|
||||
unsigned long end = addr + nr * PAGE_SIZE;
|
||||
int young = 0;
|
||||
int i;
|
||||
|
||||
ptep = contpte_align_down(ptep);
|
||||
addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
|
||||
|
||||
for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE)
|
||||
ptep = contpte_align_addr_ptep(&addr, &end, ptep, nr);
|
||||
for (; addr != end; ptep++, addr += PAGE_SIZE)
|
||||
young |= __ptep_test_and_clear_young(vma, addr, ptep);
|
||||
|
||||
return young;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(contpte_ptep_test_and_clear_young);
|
||||
EXPORT_SYMBOL_GPL(contpte_test_and_clear_young_ptes);
|
||||
|
||||
int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
unsigned int nr)
|
||||
{
|
||||
int young;
|
||||
|
||||
young = contpte_ptep_test_and_clear_young(vma, addr, ptep);
|
||||
young = contpte_test_and_clear_young_ptes(vma, addr, ptep, nr);
|
||||
|
||||
if (young) {
|
||||
unsigned long end = addr + nr * PAGE_SIZE;
|
||||
|
||||
contpte_align_addr_ptep(&addr, &end, ptep, nr);
|
||||
/*
|
||||
* See comment in __ptep_clear_flush_young(); same rationale for
|
||||
* eliding the trailing DSB applies here.
|
||||
*/
|
||||
addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
|
||||
__flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE,
|
||||
__flush_tlb_range_nosync(vma->vm_mm, addr, end,
|
||||
PAGE_SIZE, true, 3);
|
||||
}
|
||||
|
||||
return young;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(contpte_ptep_clear_flush_young);
|
||||
EXPORT_SYMBOL_GPL(contpte_clear_flush_young_ptes);
|
||||
|
||||
void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned int nr)
|
||||
|
|
@ -569,14 +594,7 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
|
|||
unsigned long start = addr;
|
||||
unsigned long end = start + nr * PAGE_SIZE;
|
||||
|
||||
if (pte_cont(__ptep_get(ptep + nr - 1)))
|
||||
end = ALIGN(end, CONT_PTE_SIZE);
|
||||
|
||||
if (pte_cont(__ptep_get(ptep))) {
|
||||
start = ALIGN_DOWN(start, CONT_PTE_SIZE);
|
||||
ptep = contpte_align_down(ptep);
|
||||
}
|
||||
|
||||
ptep = contpte_align_addr_ptep(&start, &end, ptep, nr);
|
||||
__clear_young_dirty_ptes(vma, start, ptep, (end - start) / PAGE_SIZE, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
|
|||
encl_size = secs->size + PAGE_SIZE;
|
||||
|
||||
backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
|
||||
VM_NORESERVE);
|
||||
mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
if (IS_ERR(backing)) {
|
||||
ret = PTR_ERR(backing);
|
||||
goto err_out_shrink;
|
||||
|
|
|
|||
|
|
@ -306,7 +306,7 @@ static unsigned zero_mmap_capabilities(struct file *file)
|
|||
/* can't do an in-place private mapping if there's no MMU */
|
||||
static inline int private_mapping_ok(struct vm_area_desc *desc)
|
||||
{
|
||||
return is_nommu_shared_mapping(desc->vm_flags);
|
||||
return is_nommu_shared_vma_flags(&desc->vma_flags);
|
||||
}
|
||||
#else
|
||||
|
||||
|
|
@ -360,7 +360,7 @@ static int mmap_mem_prepare(struct vm_area_desc *desc)
|
|||
|
||||
desc->vm_ops = &mmap_mem_ops;
|
||||
|
||||
/* Remap-pfn-range will mark the range VM_IO. */
|
||||
/* Remap-pfn-range will mark the range with the I/O flag. */
|
||||
mmap_action_remap_full(desc, desc->pgoff);
|
||||
/* We filter remap errors to -EAGAIN. */
|
||||
desc->action.error_hook = mmap_filter_error;
|
||||
|
|
@ -520,7 +520,7 @@ static int mmap_zero_prepare(struct vm_area_desc *desc)
|
|||
#ifndef CONFIG_MMU
|
||||
return -ENOSYS;
|
||||
#endif
|
||||
if (desc->vm_flags & VM_SHARED)
|
||||
if (vma_desc_test_flags(desc, VMA_SHARED_BIT))
|
||||
return shmem_zero_setup_desc(desc);
|
||||
|
||||
desc->action.success_hook = mmap_zero_private_success;
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
#include "dax-private.h"
|
||||
#include "bus.h"
|
||||
|
||||
static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags,
|
||||
static int __check_vma(struct dev_dax *dev_dax, vma_flags_t flags,
|
||||
unsigned long start, unsigned long end, struct file *file,
|
||||
const char *func)
|
||||
{
|
||||
|
|
@ -24,7 +24,7 @@ static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags,
|
|||
return -ENXIO;
|
||||
|
||||
/* prevent private mappings from being established */
|
||||
if ((vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
|
||||
if (!vma_flags_test(&flags, VMA_MAYSHARE_BIT)) {
|
||||
dev_info_ratelimited(dev,
|
||||
"%s: %s: fail, attempted private mapping\n",
|
||||
current->comm, func);
|
||||
|
|
@ -53,7 +53,7 @@ static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags,
|
|||
static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
|
||||
const char *func)
|
||||
{
|
||||
return __check_vma(dev_dax, vma->vm_flags, vma->vm_start, vma->vm_end,
|
||||
return __check_vma(dev_dax, vma->flags, vma->vm_start, vma->vm_end,
|
||||
vma->vm_file, func);
|
||||
}
|
||||
|
||||
|
|
@ -306,14 +306,14 @@ static int dax_mmap_prepare(struct vm_area_desc *desc)
|
|||
* fault time.
|
||||
*/
|
||||
id = dax_read_lock();
|
||||
rc = __check_vma(dev_dax, desc->vm_flags, desc->start, desc->end, filp,
|
||||
rc = __check_vma(dev_dax, desc->vma_flags, desc->start, desc->end, filp,
|
||||
__func__);
|
||||
dax_read_unlock(id);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
desc->vm_ops = &dax_vm_ops;
|
||||
desc->vm_flags |= VM_HUGEPAGE;
|
||||
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -186,15 +186,16 @@ int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj,
|
|||
{
|
||||
struct vfsmount *huge_mnt;
|
||||
struct file *filp;
|
||||
const vma_flags_t flags = mk_vma_flags(VMA_NORESERVE_BIT);
|
||||
|
||||
drm_gem_private_object_init(dev, obj, size);
|
||||
|
||||
huge_mnt = drm_gem_get_huge_mnt(dev);
|
||||
if (huge_mnt)
|
||||
filp = shmem_file_setup_with_mnt(huge_mnt, "drm mm object",
|
||||
size, VM_NORESERVE);
|
||||
size, flags);
|
||||
else
|
||||
filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
|
||||
filp = shmem_file_setup("drm mm object", size, flags);
|
||||
|
||||
if (IS_ERR(filp))
|
||||
return PTR_ERR(filp);
|
||||
|
|
|
|||
|
|
@ -499,7 +499,7 @@ static int __create_shmem(struct drm_i915_private *i915,
|
|||
resource_size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
unsigned long shmem_flags = VM_NORESERVE;
|
||||
const vma_flags_t shmem_flags = mk_vma_flags(VMA_NORESERVE_BIT);
|
||||
struct vfsmount *huge_mnt;
|
||||
struct file *filp;
|
||||
|
||||
|
|
|
|||
|
|
@ -200,7 +200,8 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
|
|||
struct address_space *mapping;
|
||||
gfp_t mask;
|
||||
|
||||
filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE);
|
||||
filp = shmem_file_setup("i915-shmem-tt", size,
|
||||
mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
if (IS_ERR(filp))
|
||||
return PTR_ERR(filp);
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,8 @@ struct file *shmem_create_from_data(const char *name, void *data, size_t len)
|
|||
struct file *file;
|
||||
int err;
|
||||
|
||||
file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE);
|
||||
file = shmem_file_setup(name, PAGE_ALIGN(len),
|
||||
mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
if (IS_ERR(file))
|
||||
return file;
|
||||
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ static void ttm_tt_fini_shmem(struct kunit *test)
|
|||
err = ttm_tt_init(tt, bo, 0, caching, 0);
|
||||
KUNIT_ASSERT_EQ(test, err, 0);
|
||||
|
||||
shmem = shmem_file_setup("ttm swap", BO_SIZE, 0);
|
||||
shmem = shmem_file_setup("ttm swap", BO_SIZE, EMPTY_VMA_FLAGS);
|
||||
tt->swap_storage = shmem;
|
||||
|
||||
ttm_tt_fini(tt);
|
||||
|
|
|
|||
|
|
@ -178,5 +178,6 @@ EXPORT_SYMBOL_GPL(ttm_backup_bytes_avail);
|
|||
*/
|
||||
struct file *ttm_backup_shmem_create(loff_t size)
|
||||
{
|
||||
return shmem_file_setup("ttm shmem backup", size, 0);
|
||||
return shmem_file_setup("ttm shmem backup", size,
|
||||
EMPTY_VMA_FLAGS);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -330,7 +330,7 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm,
|
|||
struct page *to_page;
|
||||
int i, ret;
|
||||
|
||||
swap_storage = shmem_file_setup("ttm swap", size, 0);
|
||||
swap_storage = shmem_file_setup("ttm swap", size, EMPTY_VMA_FLAGS);
|
||||
if (IS_ERR(swap_storage)) {
|
||||
pr_err("Failed allocating swap storage\n");
|
||||
return PTR_ERR(swap_storage);
|
||||
|
|
|
|||
2
fs/aio.c
2
fs/aio.c
|
|
@ -394,7 +394,7 @@ static const struct vm_operations_struct aio_ring_vm_ops = {
|
|||
|
||||
static int aio_ring_mmap_prepare(struct vm_area_desc *desc)
|
||||
{
|
||||
desc->vm_flags |= VM_DONTEXPAND;
|
||||
vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT);
|
||||
desc->vm_ops = &aio_ring_vm_ops;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -473,11 +473,12 @@ static int erofs_file_mmap_prepare(struct vm_area_desc *desc)
|
|||
if (!IS_DAX(file_inode(desc->file)))
|
||||
return generic_file_readonly_mmap_prepare(desc);
|
||||
|
||||
if ((desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE))
|
||||
if (vma_desc_test_flags(desc, VMA_SHARED_BIT) &&
|
||||
vma_desc_test_flags(desc, VMA_MAYWRITE_BIT))
|
||||
return -EINVAL;
|
||||
|
||||
desc->vm_ops = &erofs_dax_vm_ops;
|
||||
desc->vm_flags |= VM_HUGEPAGE;
|
||||
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -818,13 +818,13 @@ static int ext4_file_mmap_prepare(struct vm_area_desc *desc)
|
|||
* We don't support synchronous mappings for non-DAX files and
|
||||
* for DAX files if underneath dax_device is not synchronous.
|
||||
*/
|
||||
if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file), dax_dev))
|
||||
if (!daxdev_mapping_supported(desc, file_inode(file), dax_dev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
file_accessed(file);
|
||||
if (IS_DAX(file_inode(file))) {
|
||||
desc->vm_ops = &ext4_dax_vm_ops;
|
||||
desc->vm_flags |= VM_HUGEPAGE;
|
||||
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
|
||||
} else {
|
||||
desc->vm_ops = &ext4_file_vm_ops;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
|
|||
loff_t len, vma_len;
|
||||
int ret;
|
||||
struct hstate *h = hstate_file(file);
|
||||
vm_flags_t vm_flags;
|
||||
vma_flags_t vma_flags;
|
||||
|
||||
/*
|
||||
* vma address alignment (but not the pgoff alignment) has
|
||||
|
|
@ -119,7 +119,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
|
|||
* way when do_mmap unwinds (may be important on powerpc
|
||||
* and ia64).
|
||||
*/
|
||||
desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
|
||||
vma_desc_set_flags(desc, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT);
|
||||
desc->vm_ops = &hugetlb_vm_ops;
|
||||
|
||||
/*
|
||||
|
|
@ -148,23 +148,23 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
|
|||
|
||||
ret = -ENOMEM;
|
||||
|
||||
vm_flags = desc->vm_flags;
|
||||
vma_flags = desc->vma_flags;
|
||||
/*
|
||||
* for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
|
||||
* reserving here. Note: only for SHM hugetlbfs file, the inode
|
||||
* flag S_PRIVATE is set.
|
||||
*/
|
||||
if (inode->i_flags & S_PRIVATE)
|
||||
vm_flags |= VM_NORESERVE;
|
||||
vma_flags_set(&vma_flags, VMA_NORESERVE_BIT);
|
||||
|
||||
if (hugetlb_reserve_pages(inode,
|
||||
desc->pgoff >> huge_page_order(h),
|
||||
len >> huge_page_shift(h), desc,
|
||||
vm_flags) < 0)
|
||||
vma_flags) < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
if ((desc->vm_flags & VM_WRITE) && inode->i_size < len)
|
||||
if (vma_desc_test_flags(desc, VMA_WRITE_BIT) && inode->i_size < len)
|
||||
i_size_write(inode, len);
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
|
|
@ -1527,7 +1527,7 @@ static int get_hstate_idx(int page_size_log)
|
|||
* otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
|
||||
*/
|
||||
struct file *hugetlb_file_setup(const char *name, size_t size,
|
||||
vm_flags_t acctflag, int creat_flags,
|
||||
vma_flags_t acctflag, int creat_flags,
|
||||
int page_size_log)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
|
|
|||
|
|
@ -276,7 +276,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc)
|
|||
struct file *file = desc->file;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct ntfs_inode *ni = ntfs_i(inode);
|
||||
bool rw = desc->vm_flags & VM_WRITE;
|
||||
const bool rw = vma_desc_test_flags(desc, VMA_WRITE_BIT);
|
||||
int err;
|
||||
|
||||
/* Avoid any operation if inode is bad. */
|
||||
|
|
|
|||
|
|
@ -411,8 +411,8 @@ static int orangefs_file_mmap_prepare(struct vm_area_desc *desc)
|
|||
"orangefs_file_mmap: called on %pD\n", file);
|
||||
|
||||
/* set the sequential readahead hint */
|
||||
desc->vm_flags |= VM_SEQ_READ;
|
||||
desc->vm_flags &= ~VM_RAND_READ;
|
||||
vma_desc_set_flags(desc, VMA_SEQ_READ_BIT);
|
||||
vma_desc_clear_flags(desc, VMA_RAND_READ_BIT);
|
||||
|
||||
file_accessed(file);
|
||||
desc->vm_ops = &orangefs_file_vm_ops;
|
||||
|
|
|
|||
|
|
@ -264,7 +264,7 @@ out:
|
|||
*/
|
||||
static int ramfs_nommu_mmap_prepare(struct vm_area_desc *desc)
|
||||
{
|
||||
if (!is_nommu_shared_mapping(desc->vm_flags))
|
||||
if (!is_nommu_shared_vma_flags(&desc->vma_flags))
|
||||
return -ENOSYS;
|
||||
|
||||
file_accessed(desc->file);
|
||||
|
|
|
|||
|
|
@ -1044,7 +1044,7 @@ static int pseudo_lock_dev_mmap_prepare(struct vm_area_desc *desc)
|
|||
* Ensure changes are carried directly to the memory being mapped,
|
||||
* do not allow copy-on-write mapping.
|
||||
*/
|
||||
if (!(desc->vm_flags & VM_SHARED)) {
|
||||
if (!vma_desc_test_flags(desc, VMA_SHARED_BIT)) {
|
||||
mutex_unlock(&rdtgroup_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
|
|||
*/
|
||||
static int romfs_mmap_prepare(struct vm_area_desc *desc)
|
||||
{
|
||||
return is_nommu_shared_mapping(desc->vm_flags) ? 0 : -ENOSYS;
|
||||
return is_nommu_shared_vma_flags(&desc->vma_flags) ? 0 : -ENOSYS;
|
||||
}
|
||||
|
||||
static unsigned romfs_mmap_capabilities(struct file *file)
|
||||
|
|
|
|||
|
|
@ -61,7 +61,8 @@ xfile_create(
|
|||
if (!xf)
|
||||
return -ENOMEM;
|
||||
|
||||
xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
|
||||
xf->file = shmem_kernel_file_setup(description, isize,
|
||||
mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
if (IS_ERR(xf->file)) {
|
||||
error = PTR_ERR(xf->file);
|
||||
goto out_xfile;
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ xmbuf_alloc(
|
|||
if (!btp)
|
||||
return -ENOMEM;
|
||||
|
||||
file = shmem_kernel_file_setup(descr, 0, 0);
|
||||
file = shmem_kernel_file_setup(descr, 0, EMPTY_VMA_FLAGS);
|
||||
if (IS_ERR(file)) {
|
||||
error = PTR_ERR(file);
|
||||
goto out_free_btp;
|
||||
|
|
|
|||
|
|
@ -2010,14 +2010,14 @@ xfs_file_mmap_prepare(
|
|||
* We don't support synchronous mappings for non-DAX files and
|
||||
* for DAX files if underneath dax_device is not synchronous.
|
||||
*/
|
||||
if (!daxdev_mapping_supported(desc->vm_flags, file_inode(file),
|
||||
if (!daxdev_mapping_supported(desc, file_inode(file),
|
||||
target->bt_daxdev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
file_accessed(file);
|
||||
desc->vm_ops = &xfs_file_vm_ops;
|
||||
if (IS_DAX(inode))
|
||||
desc->vm_flags |= VM_HUGEPAGE;
|
||||
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -333,7 +333,8 @@ static int zonefs_file_mmap_prepare(struct vm_area_desc *desc)
|
|||
* ordering between msync() and page cache writeback.
|
||||
*/
|
||||
if (zonefs_inode_is_seq(file_inode(file)) &&
|
||||
(desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE))
|
||||
vma_desc_test_flags(desc, VMA_SHARED_BIT) &&
|
||||
vma_desc_test_flags(desc, VMA_MAYWRITE_BIT))
|
||||
return -EINVAL;
|
||||
|
||||
file_accessed(file);
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ static inline void set_mems_allowed(nodemask_t nodemask)
|
|||
task_unlock(current);
|
||||
}
|
||||
|
||||
extern bool cpuset_node_allowed(struct cgroup *cgroup, int nid);
|
||||
extern void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask);
|
||||
#else /* !CONFIG_CPUSETS */
|
||||
|
||||
static inline bool cpusets_enabled(void) { return false; }
|
||||
|
|
@ -299,9 +299,9 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool cpuset_node_allowed(struct cgroup *cgroup, int nid)
|
||||
static inline void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask)
|
||||
{
|
||||
return true;
|
||||
nodes_copy(*mask, node_states[N_MEMORY]);
|
||||
}
|
||||
#endif /* !CONFIG_CPUSETS */
|
||||
|
||||
|
|
|
|||
|
|
@ -65,11 +65,11 @@ size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
|||
/*
|
||||
* Check if given mapping is supported by the file / underlying device.
|
||||
*/
|
||||
static inline bool daxdev_mapping_supported(vm_flags_t vm_flags,
|
||||
static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc,
|
||||
const struct inode *inode,
|
||||
struct dax_device *dax_dev)
|
||||
{
|
||||
if (!(vm_flags & VM_SYNC))
|
||||
if (!vma_desc_test_flags(desc, VMA_SYNC_BIT))
|
||||
return true;
|
||||
if (!IS_DAX(inode))
|
||||
return false;
|
||||
|
|
@ -111,11 +111,11 @@ static inline void set_dax_nomc(struct dax_device *dax_dev)
|
|||
static inline void set_dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
}
|
||||
static inline bool daxdev_mapping_supported(vm_flags_t vm_flags,
|
||||
static inline bool daxdev_mapping_supported(const struct vm_area_desc *desc,
|
||||
const struct inode *inode,
|
||||
struct dax_device *dax_dev)
|
||||
{
|
||||
return !(vm_flags & VM_SYNC);
|
||||
return !vma_desc_test_flags(desc, VMA_SYNC_BIT);
|
||||
}
|
||||
static inline size_t dax_recovery_write(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
|
|||
struct folio **foliop);
|
||||
#endif /* CONFIG_USERFAULTFD */
|
||||
long hugetlb_reserve_pages(struct inode *inode, long from, long to,
|
||||
struct vm_area_desc *desc, vm_flags_t vm_flags);
|
||||
struct vm_area_desc *desc, vma_flags_t vma_flags);
|
||||
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
|
||||
long freed);
|
||||
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
|
||||
|
|
@ -527,7 +527,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
|
|||
}
|
||||
|
||||
extern const struct vm_operations_struct hugetlb_vm_ops;
|
||||
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
|
||||
struct file *hugetlb_file_setup(const char *name, size_t size, vma_flags_t acct,
|
||||
int creat_flags, int page_size_log);
|
||||
|
||||
static inline bool is_file_hugepages(const struct file *file)
|
||||
|
|
@ -543,7 +543,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
|
|||
|
||||
#define is_file_hugepages(file) false
|
||||
static inline struct file *
|
||||
hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
|
||||
hugetlb_file_setup(const char *name, size_t size, vma_flags_t acctflag,
|
||||
int creat_flags, int page_size_log)
|
||||
{
|
||||
return ERR_PTR(-ENOSYS);
|
||||
|
|
|
|||
|
|
@ -11,6 +11,11 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
|
|||
return !!(vm_flags & VM_HUGETLB);
|
||||
}
|
||||
|
||||
static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
|
||||
{
|
||||
return vma_flags_test(flags, VMA_HUGETLB_BIT);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
|
||||
|
|
@ -18,6 +23,11 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
|
||||
|
|
|
|||
|
|
@ -1758,7 +1758,7 @@ static inline void count_objcg_events(struct obj_cgroup *objcg,
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid);
|
||||
void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg, nodemask_t *mask);
|
||||
|
||||
void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg);
|
||||
|
||||
|
|
@ -1829,9 +1829,9 @@ static inline ino_t page_cgroup_ino(struct page *page)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid)
|
||||
static inline void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg,
|
||||
nodemask_t *mask)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg)
|
||||
|
|
|
|||
|
|
@ -53,11 +53,11 @@ struct memory_dev_type *mt_find_alloc_memory_type(int adist,
|
|||
struct list_head *memory_types);
|
||||
void mt_put_memory_types(struct list_head *memory_types);
|
||||
#ifdef CONFIG_MIGRATION
|
||||
int next_demotion_node(int node);
|
||||
int next_demotion_node(int node, const nodemask_t *allowed_mask);
|
||||
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
|
||||
bool node_is_toptier(int node);
|
||||
#else
|
||||
static inline int next_demotion_node(int node)
|
||||
static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
|
||||
{
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
|
|
@ -101,7 +101,7 @@ static inline void clear_node_memory_type(int node, struct memory_dev_type *memt
|
|||
|
||||
}
|
||||
|
||||
static inline int next_demotion_node(int node)
|
||||
static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
|
||||
{
|
||||
return NUMA_NO_NODE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#ifndef _LINUX_MM_H
|
||||
#define _LINUX_MM_H
|
||||
|
||||
#include <linux/args.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/gfp.h>
|
||||
|
|
@ -551,17 +552,18 @@ enum {
|
|||
/*
|
||||
* Physically remapped pages are special. Tell the
|
||||
* rest of the world about it:
|
||||
* VM_IO tells people not to look at these pages
|
||||
* IO tells people not to look at these pages
|
||||
* (accesses can have side effects).
|
||||
* VM_PFNMAP tells the core MM that the base pages are just
|
||||
* PFNMAP tells the core MM that the base pages are just
|
||||
* raw PFN mappings, and do not have a "struct page" associated
|
||||
* with them.
|
||||
* VM_DONTEXPAND
|
||||
* DONTEXPAND
|
||||
* Disable vma merging and expanding with mremap().
|
||||
* VM_DONTDUMP
|
||||
* DONTDUMP
|
||||
* Omit vma from core dump, even when VM_IO turned off.
|
||||
*/
|
||||
#define VM_REMAP_FLAGS (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
|
||||
#define VMA_REMAP_FLAGS mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT, \
|
||||
VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)
|
||||
|
||||
/* This mask prevents VMA from being scanned with khugepaged */
|
||||
#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB)
|
||||
|
|
@ -945,7 +947,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma,
|
|||
* system word.
|
||||
*/
|
||||
if (NUM_VMA_FLAG_BITS > BITS_PER_LONG) {
|
||||
unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags);
|
||||
unsigned long *bitmap = vma->flags.__vma_flags;
|
||||
|
||||
bitmap_zero(&bitmap[1], NUM_VMA_FLAG_BITS - BITS_PER_LONG);
|
||||
}
|
||||
|
|
@ -989,8 +991,7 @@ static inline void vm_flags_mod(struct vm_area_struct *vma,
|
|||
__vm_flags_mod(vma, set, clear);
|
||||
}
|
||||
|
||||
static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma,
|
||||
vma_flag_t bit)
|
||||
static inline bool __vma_atomic_valid_flag(struct vm_area_struct *vma, vma_flag_t bit)
|
||||
{
|
||||
const vm_flags_t mask = BIT((__force int)bit);
|
||||
|
||||
|
|
@ -1005,13 +1006,12 @@ static inline bool __vma_flag_atomic_valid(struct vm_area_struct *vma,
|
|||
* Set VMA flag atomically. Requires only VMA/mmap read lock. Only specific
|
||||
* valid flags are allowed to do this.
|
||||
*/
|
||||
static inline void vma_flag_set_atomic(struct vm_area_struct *vma,
|
||||
vma_flag_t bit)
|
||||
static inline void vma_set_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit)
|
||||
{
|
||||
unsigned long *bitmap = ACCESS_PRIVATE(&vma->flags, __vma_flags);
|
||||
unsigned long *bitmap = vma->flags.__vma_flags;
|
||||
|
||||
vma_assert_stabilised(vma);
|
||||
if (__vma_flag_atomic_valid(vma, bit))
|
||||
if (__vma_atomic_valid_flag(vma, bit))
|
||||
set_bit((__force int)bit, bitmap);
|
||||
}
|
||||
|
||||
|
|
@ -1022,15 +1022,211 @@ static inline void vma_flag_set_atomic(struct vm_area_struct *vma,
|
|||
* This is necessarily racey, so callers must ensure that serialisation is
|
||||
* achieved through some other means, or that races are permissible.
|
||||
*/
|
||||
static inline bool vma_flag_test_atomic(struct vm_area_struct *vma,
|
||||
vma_flag_t bit)
|
||||
static inline bool vma_test_atomic_flag(struct vm_area_struct *vma, vma_flag_t bit)
|
||||
{
|
||||
if (__vma_flag_atomic_valid(vma, bit))
|
||||
if (__vma_atomic_valid_flag(vma, bit))
|
||||
return test_bit((__force int)bit, &vma->vm_flags);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Set an individual VMA flag in flags, non-atomically. */
|
||||
static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
|
||||
{
|
||||
unsigned long *bitmap = flags->__vma_flags;
|
||||
|
||||
__set_bit((__force int)bit, bitmap);
|
||||
}
|
||||
|
||||
static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
|
||||
{
|
||||
vma_flags_t flags;
|
||||
int i;
|
||||
|
||||
vma_flags_clear_all(&flags);
|
||||
for (i = 0; i < count; i++)
|
||||
vma_flag_set(&flags, bits[i]);
|
||||
return flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper macro which bitwise-or combines the specified input flags into a
|
||||
* vma_flags_t bitmap value. E.g.:
|
||||
*
|
||||
* vma_flags_t flags = mk_vma_flags(VMA_IO_BIT, VMA_PFNMAP_BIT,
|
||||
* VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT);
|
||||
*
|
||||
* The compiler cleverly optimises away all of the work and this ends up being
|
||||
* equivalent to aggregating the values manually.
|
||||
*/
|
||||
#define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
|
||||
(const vma_flag_t []){__VA_ARGS__})
|
||||
|
||||
/* Test each of to_test flags in flags, non-atomically. */
|
||||
static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
|
||||
vma_flags_t to_test)
|
||||
{
|
||||
const unsigned long *bitmap = flags->__vma_flags;
|
||||
const unsigned long *bitmap_to_test = to_test.__vma_flags;
|
||||
|
||||
return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test whether any specified VMA flag is set, e.g.:
|
||||
*
|
||||
* if (vma_flags_test(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
|
||||
*/
|
||||
#define vma_flags_test(flags, ...) \
|
||||
vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
/* Test that ALL of the to_test flags are set, non-atomically. */
|
||||
static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
|
||||
vma_flags_t to_test)
|
||||
{
|
||||
const unsigned long *bitmap = flags->__vma_flags;
|
||||
const unsigned long *bitmap_to_test = to_test.__vma_flags;
|
||||
|
||||
return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test whether ALL specified VMA flags are set, e.g.:
|
||||
*
|
||||
* if (vma_flags_test_all(flags, VMA_READ_BIT, VMA_MAYREAD_BIT)) { ... }
|
||||
*/
|
||||
#define vma_flags_test_all(flags, ...) \
|
||||
vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
/* Set each of the to_set flags in flags, non-atomically. */
|
||||
static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
|
||||
{
|
||||
unsigned long *bitmap = flags->__vma_flags;
|
||||
const unsigned long *bitmap_to_set = to_set.__vma_flags;
|
||||
|
||||
bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set all specified VMA flags, e.g.:
|
||||
*
|
||||
* vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
|
||||
*/
|
||||
#define vma_flags_set(flags, ...) \
|
||||
vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
/* Clear all of the to-clear flags in flags, non-atomically. */
|
||||
static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
|
||||
{
|
||||
unsigned long *bitmap = flags->__vma_flags;
|
||||
const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
|
||||
|
||||
bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear all specified individual flags, e.g.:
|
||||
*
|
||||
* vma_flags_clear(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
|
||||
*/
|
||||
#define vma_flags_clear(flags, ...) \
|
||||
vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
/*
|
||||
* Helper to test that ALL specified flags are set in a VMA.
|
||||
*
|
||||
* Note: appropriate locks must be held, this function does not acquire them for
|
||||
* you.
|
||||
*/
|
||||
static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
|
||||
vma_flags_t flags)
|
||||
{
|
||||
return vma_flags_test_all_mask(&vma->flags, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper macro for checking that ALL specified flags are set in a VMA, e.g.:
|
||||
*
|
||||
* if (vma_test_all_flags(vma, VMA_READ_BIT, VMA_MAYREAD_BIT) { ... }
|
||||
*/
|
||||
#define vma_test_all_flags(vma, ...) \
|
||||
vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
/*
|
||||
* Helper to set all VMA flags in a VMA.
|
||||
*
|
||||
* Note: appropriate locks must be held, this function does not acquire them for
|
||||
* you.
|
||||
*/
|
||||
static inline void vma_set_flags_mask(struct vm_area_struct *vma,
|
||||
vma_flags_t flags)
|
||||
{
|
||||
vma_flags_set_mask(&vma->flags, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper macro for specifying VMA flags in a VMA, e.g.:
|
||||
*
|
||||
* vma_set_flags(vma, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
|
||||
* VMA_DONTDUMP_BIT);
|
||||
*
|
||||
* Note: appropriate locks must be held, this function does not acquire them for
|
||||
* you.
|
||||
*/
|
||||
#define vma_set_flags(vma, ...) \
|
||||
vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
/* Helper to test all VMA flags in a VMA descriptor. */
|
||||
static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
|
||||
vma_flags_t flags)
|
||||
{
|
||||
return vma_flags_test_mask(&desc->vma_flags, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper macro for testing VMA flags for an input pointer to a struct
|
||||
* vm_area_desc object describing a proposed VMA, e.g.:
|
||||
*
|
||||
* if (vma_desc_test_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT,
|
||||
* VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT)) { ... }
|
||||
*/
|
||||
#define vma_desc_test_flags(desc, ...) \
|
||||
vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
/* Helper to set all VMA flags in a VMA descriptor. */
|
||||
static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
|
||||
vma_flags_t flags)
|
||||
{
|
||||
vma_flags_set_mask(&desc->vma_flags, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper macro for specifying VMA flags for an input pointer to a struct
|
||||
* vm_area_desc object describing a proposed VMA, e.g.:
|
||||
*
|
||||
* vma_desc_set_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
|
||||
* VMA_DONTDUMP_BIT);
|
||||
*/
|
||||
#define vma_desc_set_flags(desc, ...) \
|
||||
vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
/* Helper to clear all VMA flags in a VMA descriptor. */
|
||||
static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
|
||||
vma_flags_t flags)
|
||||
{
|
||||
vma_flags_clear_mask(&desc->vma_flags, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper macro for clearing VMA flags for an input pointer to a struct
|
||||
* vm_area_desc object describing a proposed VMA, e.g.:
|
||||
*
|
||||
* vma_desc_clear_flags(desc, VMA_IO_BIT, VMA_PFNMAP_BIT, VMA_DONTEXPAND_BIT,
|
||||
* VMA_DONTDUMP_BIT);
|
||||
*/
|
||||
#define vma_desc_clear_flags(desc, ...) \
|
||||
vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
|
||||
|
||||
static inline void vma_set_anonymous(struct vm_area_struct *vma)
|
||||
{
|
||||
vma->vm_ops = NULL;
|
||||
|
|
@ -1096,15 +1292,20 @@ static inline bool vma_is_accessible(const struct vm_area_struct *vma)
|
|||
return vma->vm_flags & VM_ACCESS_FLAGS;
|
||||
}
|
||||
|
||||
static inline bool is_shared_maywrite(vm_flags_t vm_flags)
|
||||
static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
|
||||
{
|
||||
return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
|
||||
(VM_SHARED | VM_MAYWRITE);
|
||||
}
|
||||
|
||||
static inline bool is_shared_maywrite(const vma_flags_t *flags)
|
||||
{
|
||||
return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
|
||||
}
|
||||
|
||||
static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma)
|
||||
{
|
||||
return is_shared_maywrite(vma->vm_flags);
|
||||
return is_shared_maywrite(&vma->flags);
|
||||
}
|
||||
|
||||
static inline
|
||||
|
|
@ -1732,6 +1933,14 @@ static inline bool is_cow_mapping(vm_flags_t flags)
|
|||
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
}
|
||||
|
||||
static inline bool vma_desc_is_cow_mapping(struct vm_area_desc *desc)
|
||||
{
|
||||
const vma_flags_t *flags = &desc->vma_flags;
|
||||
|
||||
return vma_flags_test(flags, VMA_MAYWRITE_BIT) &&
|
||||
!vma_flags_test(flags, VMA_SHARED_BIT);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_MMU
|
||||
static inline bool is_nommu_shared_mapping(vm_flags_t flags)
|
||||
{
|
||||
|
|
@ -1745,6 +1954,11 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags)
|
|||
*/
|
||||
return flags & (VM_MAYSHARE | VM_MAYOVERLAY);
|
||||
}
|
||||
|
||||
static inline bool is_nommu_shared_vma_flags(const vma_flags_t *flags)
|
||||
{
|
||||
return vma_flags_test(flags, VMA_MAYSHARE_BIT, VMA_MAYOVERLAY_BIT);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
|
|
@ -2627,10 +2841,6 @@ static inline void zap_vma_pages(struct vm_area_struct *vma)
|
|||
zap_page_range_single(vma, vma->vm_start,
|
||||
vma->vm_end - vma->vm_start, NULL);
|
||||
}
|
||||
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
|
||||
struct vm_area_struct *start_vma, unsigned long start,
|
||||
unsigned long end, unsigned long tree_end);
|
||||
|
||||
struct mmu_notifier_range;
|
||||
|
||||
void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
|
||||
|
|
|
|||
|
|
@ -844,7 +844,7 @@ struct mmap_action {
|
|||
|
||||
/*
|
||||
* If specified, this hook is invoked when an error occurred when
|
||||
* attempting the selection action.
|
||||
* attempting the selected action.
|
||||
*
|
||||
* The hook can return an error code in order to filter the error, but
|
||||
* it is not valid to clear the error here.
|
||||
|
|
@ -866,7 +866,9 @@ struct mmap_action {
|
|||
#define NUM_VMA_FLAG_BITS BITS_PER_LONG
|
||||
typedef struct {
|
||||
DECLARE_BITMAP(__vma_flags, NUM_VMA_FLAG_BITS);
|
||||
} __private vma_flags_t;
|
||||
} vma_flags_t;
|
||||
|
||||
#define EMPTY_VMA_FLAGS ((vma_flags_t){ })
|
||||
|
||||
/*
|
||||
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
|
||||
|
|
@ -885,10 +887,7 @@ struct vm_area_desc {
|
|||
/* Mutable fields. Populated with initial state. */
|
||||
pgoff_t pgoff;
|
||||
struct file *vm_file;
|
||||
union {
|
||||
vm_flags_t vm_flags;
|
||||
vma_flags_t vma_flags;
|
||||
};
|
||||
pgprot_t page_prot;
|
||||
|
||||
/* Write-only fields. */
|
||||
|
|
@ -1059,7 +1058,7 @@ struct vm_area_struct {
|
|||
/* Clears all bits in the VMA flags bitmap, non-atomically. */
|
||||
static inline void vma_flags_clear_all(vma_flags_t *flags)
|
||||
{
|
||||
bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
|
||||
bitmap_zero(flags->__vma_flags, NUM_VMA_FLAG_BITS);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1070,7 +1069,9 @@ static inline void vma_flags_clear_all(vma_flags_t *flags)
|
|||
*/
|
||||
static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
|
||||
{
|
||||
*ACCESS_PRIVATE(flags, __vma_flags) = value;
|
||||
unsigned long *bitmap = flags->__vma_flags;
|
||||
|
||||
bitmap[0] = value;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1081,7 +1082,7 @@ static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long va
|
|||
*/
|
||||
static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
|
||||
{
|
||||
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
|
||||
unsigned long *bitmap = flags->__vma_flags;
|
||||
|
||||
WRITE_ONCE(*bitmap, value);
|
||||
}
|
||||
|
|
@ -1089,7 +1090,7 @@ static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned lo
|
|||
/* Update the first system word of VMA flags setting bits, non-atomically. */
|
||||
static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
|
||||
{
|
||||
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
|
||||
unsigned long *bitmap = flags->__vma_flags;
|
||||
|
||||
*bitmap |= value;
|
||||
}
|
||||
|
|
@ -1097,7 +1098,7 @@ static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
|
|||
/* Update the first system word of VMA flags clearing bits, non-atomically. */
|
||||
static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
|
||||
{
|
||||
unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
|
||||
unsigned long *bitmap = flags->__vma_flags;
|
||||
|
||||
*bitmap &= ~value;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -515,16 +515,17 @@ static inline void mmu_notifier_range_init_owner(
|
|||
range->owner = owner;
|
||||
}
|
||||
|
||||
#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
|
||||
#define clear_flush_young_ptes_notify(__vma, __address, __ptep, __nr) \
|
||||
({ \
|
||||
int __young; \
|
||||
struct vm_area_struct *___vma = __vma; \
|
||||
unsigned long ___address = __address; \
|
||||
__young = ptep_clear_flush_young(___vma, ___address, __ptep); \
|
||||
unsigned int ___nr = __nr; \
|
||||
__young = clear_flush_young_ptes(___vma, ___address, __ptep, ___nr); \
|
||||
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
|
||||
___address, \
|
||||
___address + \
|
||||
PAGE_SIZE); \
|
||||
___nr * PAGE_SIZE); \
|
||||
__young; \
|
||||
})
|
||||
|
||||
|
|
@ -650,7 +651,7 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
|
|||
|
||||
#define mmu_notifier_range_update_to_read_only(r) false
|
||||
|
||||
#define ptep_clear_flush_young_notify ptep_clear_flush_young
|
||||
#define clear_flush_young_ptes_notify clear_flush_young_ptes
|
||||
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
|
||||
#define ptep_clear_young_notify ptep_test_and_clear_young
|
||||
#define pmdp_clear_young_notify pmdp_test_and_clear_young
|
||||
|
|
|
|||
|
|
@ -22,25 +22,6 @@
|
|||
#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On almost all architectures and configurations, 0 can be used as the
|
||||
* upper ceiling to free_pgtables(): on many architectures it has the same
|
||||
* effect as using TASK_SIZE. However, there is one configuration which
|
||||
* must impose a more careful limit, to avoid freeing kernel pgtables.
|
||||
*/
|
||||
#ifndef USER_PGTABLES_CEILING
|
||||
#define USER_PGTABLES_CEILING 0UL
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This defines the first usable user address. Platforms
|
||||
* can override its value with custom FIRST_USER_ADDRESS
|
||||
* defined in their respective <asm/pgtable.h>.
|
||||
*/
|
||||
#ifndef FIRST_USER_ADDRESS
|
||||
#define FIRST_USER_ADDRESS 0UL
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This defines the generic helper for accessing PMD page
|
||||
* table page. Although platforms can still override this
|
||||
|
|
@ -1087,6 +1068,41 @@ static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef clear_flush_young_ptes
|
||||
/**
|
||||
* clear_flush_young_ptes - Mark PTEs that map consecutive pages of the same
|
||||
* folio as old and flush the TLB.
|
||||
* @vma: The virtual memory area the pages are mapped into.
|
||||
* @addr: Address the first page is mapped at.
|
||||
* @ptep: Page table pointer for the first entry.
|
||||
* @nr: Number of entries to clear access bit.
|
||||
*
|
||||
* May be overridden by the architecture; otherwise, implemented as a simple
|
||||
* loop over ptep_clear_flush_young().
|
||||
*
|
||||
* Note that PTE bits in the PTE range besides the PFN can differ. For example,
|
||||
* some PTEs might be write-protected.
|
||||
*
|
||||
* Context: The caller holds the page table lock. The PTEs map consecutive
|
||||
* pages that belong to the same folio. The PTEs are all in the same PMD.
|
||||
*/
|
||||
static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep, unsigned int nr)
|
||||
{
|
||||
int young = 0;
|
||||
|
||||
for (;;) {
|
||||
young |= ptep_clear_flush_young(vma, addr, ptep);
|
||||
if (--nr == 0)
|
||||
break;
|
||||
ptep++;
|
||||
addr += PAGE_SIZE;
|
||||
}
|
||||
|
||||
return young;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On some architectures hardware does not set page access bit when accessing
|
||||
* memory page, it is responsibility of software setting this bit. It brings
|
||||
|
|
@ -1629,6 +1645,25 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
|
|||
|
||||
#endif /* CONFIG_MMU */
|
||||
|
||||
/*
|
||||
* On almost all architectures and configurations, 0 can be used as the
|
||||
* upper ceiling to free_pgtables(): on many architectures it has the same
|
||||
* effect as using TASK_SIZE. However, there is one configuration which
|
||||
* must impose a more careful limit, to avoid freeing kernel pgtables.
|
||||
*/
|
||||
#ifndef USER_PGTABLES_CEILING
|
||||
#define USER_PGTABLES_CEILING 0UL
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This defines the first usable user address. Platforms
|
||||
* can override its value with custom FIRST_USER_ADDRESS
|
||||
* defined in their respective <asm/pgtable.h>.
|
||||
*/
|
||||
#ifndef FIRST_USER_ADDRESS
|
||||
#define FIRST_USER_ADDRESS 0UL
|
||||
#endif
|
||||
|
||||
/*
|
||||
* No-op macros that just return the current protection value. Defined here
|
||||
* because these macros can be used even if CONFIG_MMU is not defined.
|
||||
|
|
|
|||
|
|
@ -102,12 +102,10 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
|
|||
extern const struct fs_parameter_spec shmem_fs_parameters[];
|
||||
extern void shmem_init(void);
|
||||
extern int shmem_init_fs_context(struct fs_context *fc);
|
||||
extern struct file *shmem_file_setup(const char *name,
|
||||
loff_t size, unsigned long flags);
|
||||
extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
|
||||
unsigned long flags);
|
||||
struct file *shmem_file_setup(const char *name, loff_t size, vma_flags_t flags);
|
||||
struct file *shmem_kernel_file_setup(const char *name, loff_t size, vma_flags_t vma_flags);
|
||||
extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
|
||||
const char *name, loff_t size, unsigned long flags);
|
||||
const char *name, loff_t size, vma_flags_t flags);
|
||||
int shmem_zero_setup(struct vm_area_struct *vma);
|
||||
int shmem_zero_setup_desc(struct vm_area_desc *desc);
|
||||
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
|
||||
|
|
|
|||
12
ipc/shm.c
12
ipc/shm.c
|
|
@ -707,9 +707,10 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
|
|||
int error;
|
||||
struct shmid_kernel *shp;
|
||||
size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
const bool has_no_reserve = shmflg & SHM_NORESERVE;
|
||||
vma_flags_t acctflag = EMPTY_VMA_FLAGS;
|
||||
struct file *file;
|
||||
char name[13];
|
||||
vm_flags_t acctflag = 0;
|
||||
|
||||
if (size < SHMMIN || size > ns->shm_ctlmax)
|
||||
return -EINVAL;
|
||||
|
|
@ -749,8 +750,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
|
|||
hugesize = ALIGN(size, huge_page_size(hs));
|
||||
|
||||
/* hugetlb_file_setup applies strict accounting */
|
||||
if (shmflg & SHM_NORESERVE)
|
||||
acctflag = VM_NORESERVE;
|
||||
if (has_no_reserve)
|
||||
vma_flags_set(&acctflag, VMA_NORESERVE_BIT);
|
||||
file = hugetlb_file_setup(name, hugesize, acctflag,
|
||||
HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
|
||||
} else {
|
||||
|
|
@ -758,9 +759,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
|
|||
* Do not allow no accounting for OVERCOMMIT_NEVER, even
|
||||
* if it's asked for.
|
||||
*/
|
||||
if ((shmflg & SHM_NORESERVE) &&
|
||||
sysctl_overcommit_memory != OVERCOMMIT_NEVER)
|
||||
acctflag = VM_NORESERVE;
|
||||
if (has_no_reserve && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
|
||||
vma_flags_set(&acctflag, VMA_NORESERVE_BIT);
|
||||
file = shmem_kernel_file_setup(name, size, acctflag);
|
||||
}
|
||||
error = PTR_ERR(file);
|
||||
|
|
|
|||
|
|
@ -4145,40 +4145,58 @@ bool cpuset_current_node_allowed(int node, gfp_t gfp_mask)
|
|||
return allowed;
|
||||
}
|
||||
|
||||
bool cpuset_node_allowed(struct cgroup *cgroup, int nid)
|
||||
/**
|
||||
* cpuset_nodes_allowed - return effective_mems mask from a cgroup cpuset.
|
||||
* @cgroup: pointer to struct cgroup.
|
||||
* @mask: pointer to struct nodemask_t to be returned.
|
||||
*
|
||||
* Returns effective_mems mask from a cgroup cpuset if it is cgroup v2 and
|
||||
* has cpuset subsys. Otherwise, returns node_states[N_MEMORY].
|
||||
*
|
||||
* This function intentionally avoids taking the cpuset_mutex or callback_lock
|
||||
* when accessing effective_mems. This is because the obtained effective_mems
|
||||
* is stale immediately after the query anyway (e.g., effective_mems is updated
|
||||
* immediately after releasing the lock but before returning).
|
||||
*
|
||||
* As a result, returned @mask may be empty because cs->effective_mems can be
|
||||
* rebound during this call. Besides, nodes in @mask are not guaranteed to be
|
||||
* online due to hot plugins. Callers should check the mask for validity on
|
||||
* return based on its subsequent use.
|
||||
**/
|
||||
void cpuset_nodes_allowed(struct cgroup *cgroup, nodemask_t *mask)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cpuset *cs;
|
||||
bool allowed;
|
||||
|
||||
/*
|
||||
* In v1, mem_cgroup and cpuset are unlikely in the same hierarchy
|
||||
* and mems_allowed is likely to be empty even if we could get to it,
|
||||
* so return true to avoid taking a global lock on the empty check.
|
||||
* so return directly to avoid taking a global lock on the empty check.
|
||||
*/
|
||||
if (!cpuset_v2())
|
||||
return true;
|
||||
if (!cgroup || !cpuset_v2()) {
|
||||
nodes_copy(*mask, node_states[N_MEMORY]);
|
||||
return;
|
||||
}
|
||||
|
||||
css = cgroup_get_e_css(cgroup, &cpuset_cgrp_subsys);
|
||||
if (!css)
|
||||
return true;
|
||||
if (!css) {
|
||||
nodes_copy(*mask, node_states[N_MEMORY]);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The reference taken via cgroup_get_e_css is sufficient to
|
||||
* protect css, but it does not imply safe accesses to effective_mems.
|
||||
*
|
||||
* Normally, accessing effective_mems would require the cpuset_mutex
|
||||
* or callback_lock - but node_isset is atomic and the reference
|
||||
* taken via cgroup_get_e_css is sufficient to protect css.
|
||||
*
|
||||
* Since this interface is intended for use by migration paths, we
|
||||
* relax locking here to avoid taking global locks - while accepting
|
||||
* there may be rare scenarios where the result may be innaccurate.
|
||||
*
|
||||
* Reclaim and migration are subject to these same race conditions, and
|
||||
* cannot make strong isolation guarantees, so this is acceptable.
|
||||
* or callback_lock - but the correctness of this information is stale
|
||||
* immediately after the query anyway. We do not acquire the lock
|
||||
* during this process to save lock contention in exchange for racing
|
||||
* against mems_allowed rebinds.
|
||||
*/
|
||||
cs = container_of(css, struct cpuset, css);
|
||||
allowed = node_isset(nid, cs->effective_mems);
|
||||
nodes_copy(*mask, cs->effective_mems);
|
||||
css_put(css);
|
||||
return allowed;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ static int relay_mmap_prepare_buf(struct rchan_buf *buf,
|
|||
return -EINVAL;
|
||||
|
||||
desc->vm_ops = &relay_file_mmap_ops;
|
||||
desc->vm_flags |= VM_DONTEXPAND;
|
||||
vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT);
|
||||
desc->private_data = buf;
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -4012,7 +4012,7 @@ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
|
||||
int generic_file_readonly_mmap_prepare(struct vm_area_desc *desc)
|
||||
{
|
||||
if (is_shared_maywrite(desc->vm_flags))
|
||||
if (is_shared_maywrite(&desc->vma_flags))
|
||||
return -EINVAL;
|
||||
return generic_file_mmap_prepare(desc);
|
||||
}
|
||||
|
|
|
|||
22
mm/hugetlb.c
22
mm/hugetlb.c
|
|
@ -1193,16 +1193,16 @@ static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
|
|||
|
||||
static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map)
|
||||
{
|
||||
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
|
||||
VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE);
|
||||
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
|
||||
VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));
|
||||
|
||||
desc->private_data = map;
|
||||
}
|
||||
|
||||
static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags)
|
||||
{
|
||||
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
|
||||
VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE);
|
||||
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
|
||||
VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));
|
||||
|
||||
desc->private_data = (void *)((unsigned long)desc->private_data | flags);
|
||||
}
|
||||
|
|
@ -1216,7 +1216,7 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
|
|||
|
||||
static bool is_vma_desc_resv_set(struct vm_area_desc *desc, unsigned long flag)
|
||||
{
|
||||
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
|
||||
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
|
||||
|
||||
return ((unsigned long)desc->private_data) & flag;
|
||||
}
|
||||
|
|
@ -6571,7 +6571,7 @@ next:
|
|||
long hugetlb_reserve_pages(struct inode *inode,
|
||||
long from, long to,
|
||||
struct vm_area_desc *desc,
|
||||
vm_flags_t vm_flags)
|
||||
vma_flags_t vma_flags)
|
||||
{
|
||||
long chg = -1, add = -1, spool_resv, gbl_resv;
|
||||
struct hstate *h = hstate_inode(inode);
|
||||
|
|
@ -6592,7 +6592,7 @@ long hugetlb_reserve_pages(struct inode *inode,
|
|||
* attempt will be made for VM_NORESERVE to allocate a page
|
||||
* without using reserves
|
||||
*/
|
||||
if (vm_flags & VM_NORESERVE)
|
||||
if (vma_flags_test(&vma_flags, VMA_NORESERVE_BIT))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
|
|
@ -6601,7 +6601,7 @@ long hugetlb_reserve_pages(struct inode *inode,
|
|||
* to reserve the full area even if read-only as mprotect() may be
|
||||
* called to make the mapping read-write. Assume !desc is a shm mapping
|
||||
*/
|
||||
if (!desc || desc->vm_flags & VM_MAYSHARE) {
|
||||
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
|
||||
/*
|
||||
* resv_map can not be NULL as hugetlb_reserve_pages is only
|
||||
* called for inodes for which resv_maps were created (see
|
||||
|
|
@ -6635,7 +6635,7 @@ long hugetlb_reserve_pages(struct inode *inode,
|
|||
if (err < 0)
|
||||
goto out_err;
|
||||
|
||||
if (desc && !(desc->vm_flags & VM_MAYSHARE) && h_cg) {
|
||||
if (desc && !vma_desc_test_flags(desc, VMA_MAYSHARE_BIT) && h_cg) {
|
||||
/* For private mappings, the hugetlb_cgroup uncharge info hangs
|
||||
* of the resv_map.
|
||||
*/
|
||||
|
|
@ -6672,7 +6672,7 @@ long hugetlb_reserve_pages(struct inode *inode,
|
|||
* consumed reservations are stored in the map. Hence, nothing
|
||||
* else has to be done for private mappings here
|
||||
*/
|
||||
if (!desc || desc->vm_flags & VM_MAYSHARE) {
|
||||
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
|
||||
add = region_add(resv_map, from, to, regions_needed, h, h_cg);
|
||||
|
||||
if (unlikely(add < 0)) {
|
||||
|
|
@ -6736,7 +6736,7 @@ out_uncharge_cgroup:
|
|||
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
|
||||
chg * pages_per_huge_page(h), h_cg);
|
||||
out_err:
|
||||
if (!desc || desc->vm_flags & VM_MAYSHARE)
|
||||
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT))
|
||||
/* Only call region_abort if the region_chg succeeded but the
|
||||
* region_add failed or didn't run.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -197,6 +197,9 @@ static inline void vma_close(struct vm_area_struct *vma)
|
|||
}
|
||||
}
|
||||
|
||||
/* unmap_vmas is in mm/memory.c */
|
||||
void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
static inline void get_anon_vma(struct anon_vma *anon_vma)
|
||||
|
|
@ -509,9 +512,8 @@ bool __folio_end_writeback(struct folio *folio);
|
|||
void deactivate_file_folio(struct folio *folio);
|
||||
void folio_activate(struct folio *folio);
|
||||
|
||||
void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
|
||||
struct vm_area_struct *start_vma, unsigned long floor,
|
||||
unsigned long ceiling, bool mm_wr_locked);
|
||||
void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *desc);
|
||||
|
||||
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
|
||||
|
||||
struct zap_details;
|
||||
|
|
@ -1044,7 +1046,7 @@ extern long populate_vma_page_range(struct vm_area_struct *vma,
|
|||
unsigned long start, unsigned long end, int *locked);
|
||||
extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool write, int *locked);
|
||||
bool mlock_future_ok(const struct mm_struct *mm, vm_flags_t vm_flags,
|
||||
bool mlock_future_ok(const struct mm_struct *mm, bool is_vma_locked,
|
||||
unsigned long bytes);
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -1732,7 +1732,7 @@ static bool file_backed_vma_is_retractable(struct vm_area_struct *vma)
|
|||
* obtained on guard region installation after the flag is set, so this
|
||||
* check being performed under this lock excludes races.
|
||||
*/
|
||||
if (vma_flag_test_atomic(vma, VMA_MAYBE_GUARD_BIT))
|
||||
if (vma_test_atomic_flag(vma, VMA_MAYBE_GUARD_BIT))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -1140,7 +1140,7 @@ static long madvise_guard_install(struct madvise_behavior *madv_behavior)
|
|||
* acquire an mmap/VMA write lock to read it. All remaining readers may
|
||||
* or may not see the flag set, but we don't care.
|
||||
*/
|
||||
vma_flag_set_atomic(vma, VMA_MAYBE_GUARD_BIT);
|
||||
vma_set_atomic_flag(vma, VMA_MAYBE_GUARD_BIT);
|
||||
|
||||
/*
|
||||
* If anonymous and we are establishing page tables the VMA ought to
|
||||
|
|
|
|||
|
|
@ -5649,9 +5649,21 @@ subsys_initcall(mem_cgroup_swap_init);
|
|||
|
||||
#endif /* CONFIG_SWAP */
|
||||
|
||||
bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid)
|
||||
void mem_cgroup_node_filter_allowed(struct mem_cgroup *memcg, nodemask_t *mask)
|
||||
{
|
||||
return memcg ? cpuset_node_allowed(memcg->css.cgroup, nid) : true;
|
||||
nodemask_t allowed;
|
||||
|
||||
if (!memcg)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Since this interface is intended for use by migration paths, and
|
||||
* reclaim and migration are subject to race conditions such as changes
|
||||
* in effective_mems and hot-unpluging of nodes, inaccurate allowed
|
||||
* mask is acceptable.
|
||||
*/
|
||||
cpuset_nodes_allowed(memcg->css.cgroup, &allowed);
|
||||
nodes_and(*mask, *mask, allowed);
|
||||
}
|
||||
|
||||
void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg)
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
|
|||
gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
|
||||
idx >>= huge_page_order(h);
|
||||
|
||||
nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, 0);
|
||||
nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, EMPTY_VMA_FLAGS);
|
||||
if (nr_resv < 0)
|
||||
return ERR_PTR(nr_resv);
|
||||
|
||||
|
|
@ -463,12 +463,12 @@ struct file *memfd_alloc_file(const char *name, unsigned int flags)
|
|||
int err = 0;
|
||||
|
||||
if (flags & MFD_HUGETLB) {
|
||||
file = hugetlb_file_setup(name, 0, VM_NORESERVE,
|
||||
file = hugetlb_file_setup(name, 0, mk_vma_flags(VMA_NORESERVE_BIT),
|
||||
HUGETLB_ANONHUGE_INODE,
|
||||
(flags >> MFD_HUGE_SHIFT) &
|
||||
MFD_HUGE_MASK);
|
||||
} else {
|
||||
file = shmem_file_setup(name, 0, VM_NORESERVE);
|
||||
file = shmem_file_setup(name, 0, mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
}
|
||||
if (IS_ERR(file))
|
||||
return file;
|
||||
|
|
|
|||
|
|
@ -320,16 +320,17 @@ void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
|
|||
/**
|
||||
* next_demotion_node() - Get the next node in the demotion path
|
||||
* @node: The starting node to lookup the next node
|
||||
* @allowed_mask: The pointer to allowed node mask
|
||||
*
|
||||
* Return: node id for next memory node in the demotion path hierarchy
|
||||
* from @node; NUMA_NO_NODE if @node is terminal. This does not keep
|
||||
* @node online or guarantee that it *continues* to be the next demotion
|
||||
* target.
|
||||
*/
|
||||
int next_demotion_node(int node)
|
||||
int next_demotion_node(int node, const nodemask_t *allowed_mask)
|
||||
{
|
||||
struct demotion_nodes *nd;
|
||||
int target;
|
||||
nodemask_t mask;
|
||||
|
||||
if (!node_demotion)
|
||||
return NUMA_NO_NODE;
|
||||
|
|
@ -344,6 +345,10 @@ int next_demotion_node(int node)
|
|||
* node_demotion[] reads need to be consistent.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
/* Filter out nodes that are not in allowed_mask. */
|
||||
nodes_and(mask, nd->preferred, *allowed_mask);
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* If there are multiple target nodes, just select one
|
||||
* target node randomly.
|
||||
|
|
@ -356,10 +361,16 @@ int next_demotion_node(int node)
|
|||
* caching issue, which seems more complicated. So selecting
|
||||
* target node randomly seems better until now.
|
||||
*/
|
||||
target = node_random(&nd->preferred);
|
||||
rcu_read_unlock();
|
||||
if (!nodes_empty(mask))
|
||||
return node_random(&mask);
|
||||
|
||||
return target;
|
||||
/*
|
||||
* Preferred nodes are not in allowed_mask. Flip bits in
|
||||
* allowed_mask as used node mask. Then, use it to get the
|
||||
* closest demotion target.
|
||||
*/
|
||||
nodes_complement(mask, *allowed_mask);
|
||||
return find_next_best_node(node, &mask);
|
||||
}
|
||||
|
||||
static void disable_all_demotion_targets(void)
|
||||
|
|
|
|||
101
mm/memory.c
101
mm/memory.c
|
|
@ -370,11 +370,32 @@ void free_pgd_range(struct mmu_gather *tlb,
|
|||
} while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
|
||||
struct vm_area_struct *vma, unsigned long floor,
|
||||
unsigned long ceiling, bool mm_wr_locked)
|
||||
/**
|
||||
* free_pgtables() - Free a range of page tables
|
||||
* @tlb: The mmu gather
|
||||
* @unmap: The unmap_desc
|
||||
*
|
||||
* Note: pg_start and pg_end are provided to indicate the absolute range of the
|
||||
* page tables that should be removed. This can differ from the vma mappings on
|
||||
* some archs that may have mappings that need to be removed outside the vmas.
|
||||
* Note that the prev->vm_end and next->vm_start are often used.
|
||||
*
|
||||
* The vma_end differs from the pg_end when a dup_mmap() failed and the tree has
|
||||
* unrelated data to the mm_struct being torn down.
|
||||
*/
|
||||
void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *unmap)
|
||||
{
|
||||
struct unlink_vma_file_batch vb;
|
||||
struct ma_state *mas = unmap->mas;
|
||||
struct vm_area_struct *vma = unmap->first;
|
||||
|
||||
/*
|
||||
* Note: USER_PGTABLES_CEILING may be passed as the value of pg_end and
|
||||
* may be 0. Underflow is expected in this case. Otherwise the
|
||||
* pagetable end is exclusive. vma_end is exclusive. The last vma
|
||||
* address should never be larger than the pagetable end.
|
||||
*/
|
||||
WARN_ON_ONCE(unmap->vma_end - 1 > unmap->pg_end - 1);
|
||||
|
||||
tlb_free_vmas(tlb);
|
||||
|
||||
|
|
@ -382,19 +403,13 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
|
|||
unsigned long addr = vma->vm_start;
|
||||
struct vm_area_struct *next;
|
||||
|
||||
/*
|
||||
* Note: USER_PGTABLES_CEILING may be passed as ceiling and may
|
||||
* be 0. This will underflow and is okay.
|
||||
*/
|
||||
next = mas_find(mas, ceiling - 1);
|
||||
if (unlikely(xa_is_zero(next)))
|
||||
next = NULL;
|
||||
next = mas_find(mas, unmap->tree_end - 1);
|
||||
|
||||
/*
|
||||
* Hide vma from rmap and truncate_pagecache before freeing
|
||||
* pgtables
|
||||
*/
|
||||
if (mm_wr_locked)
|
||||
if (unmap->mm_wr_locked)
|
||||
vma_start_write(vma);
|
||||
unlink_anon_vmas(vma);
|
||||
|
||||
|
|
@ -406,18 +421,16 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
|
|||
*/
|
||||
while (next && next->vm_start <= vma->vm_end + PMD_SIZE) {
|
||||
vma = next;
|
||||
next = mas_find(mas, ceiling - 1);
|
||||
if (unlikely(xa_is_zero(next)))
|
||||
next = NULL;
|
||||
if (mm_wr_locked)
|
||||
next = mas_find(mas, unmap->tree_end - 1);
|
||||
if (unmap->mm_wr_locked)
|
||||
vma_start_write(vma);
|
||||
unlink_anon_vmas(vma);
|
||||
unlink_file_vma_batch_add(&vb, vma);
|
||||
}
|
||||
unlink_file_vma_batch_final(&vb);
|
||||
|
||||
free_pgd_range(tlb, addr, vma->vm_end,
|
||||
floor, next ? next->vm_start : ceiling);
|
||||
free_pgd_range(tlb, addr, vma->vm_end, unmap->pg_start,
|
||||
next ? next->vm_start : unmap->pg_end);
|
||||
vma = next;
|
||||
} while (vma);
|
||||
}
|
||||
|
|
@ -2124,11 +2137,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
|
|||
/**
|
||||
* unmap_vmas - unmap a range of memory covered by a list of vma's
|
||||
* @tlb: address of the caller's struct mmu_gather
|
||||
* @mas: the maple state
|
||||
* @vma: the starting vma
|
||||
* @start_addr: virtual address at which to start unmapping
|
||||
* @end_addr: virtual address at which to end unmapping
|
||||
* @tree_end: The maximum index to check
|
||||
* @unmap: The unmap_desc
|
||||
*
|
||||
* Unmap all pages in the vma list.
|
||||
*
|
||||
|
|
@ -2141,10 +2150,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
|
|||
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
|
||||
* drops the lock and schedules.
|
||||
*/
|
||||
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
|
||||
struct vm_area_struct *vma, unsigned long start_addr,
|
||||
unsigned long end_addr, unsigned long tree_end)
|
||||
void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct mmu_notifier_range range;
|
||||
struct zap_details details = {
|
||||
.zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
|
||||
|
|
@ -2152,17 +2160,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
|
|||
.even_cows = true,
|
||||
};
|
||||
|
||||
vma = unmap->first;
|
||||
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
|
||||
start_addr, end_addr);
|
||||
unmap->vma_start, unmap->vma_end);
|
||||
mmu_notifier_invalidate_range_start(&range);
|
||||
do {
|
||||
unsigned long start = start_addr;
|
||||
unsigned long end = end_addr;
|
||||
unsigned long start = unmap->vma_start;
|
||||
unsigned long end = unmap->vma_end;
|
||||
hugetlb_zap_begin(vma, &start, &end);
|
||||
unmap_single_vma(tlb, vma, start, end, &details);
|
||||
hugetlb_zap_end(vma, &details);
|
||||
vma = mas_find(mas, tree_end - 1);
|
||||
} while (vma && likely(!xa_is_zero(vma)));
|
||||
vma = mas_find(unmap->mas, unmap->tree_end - 1);
|
||||
} while (vma);
|
||||
mmu_notifier_invalidate_range_end(&range);
|
||||
}
|
||||
|
||||
|
|
@ -2948,7 +2957,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int get_remap_pgoff(vm_flags_t vm_flags, unsigned long addr,
|
||||
static int get_remap_pgoff(bool is_cow, unsigned long addr,
|
||||
unsigned long end, unsigned long vm_start, unsigned long vm_end,
|
||||
unsigned long pfn, pgoff_t *vm_pgoff_p)
|
||||
{
|
||||
|
|
@ -2958,7 +2967,7 @@ static int get_remap_pgoff(vm_flags_t vm_flags, unsigned long addr,
|
|||
* un-COW'ed pages by matching them up with "vma->vm_pgoff".
|
||||
* See vm_normal_page() for details.
|
||||
*/
|
||||
if (is_cow_mapping(vm_flags)) {
|
||||
if (is_cow) {
|
||||
if (addr != vm_start || end != vm_end)
|
||||
return -EINVAL;
|
||||
*vm_pgoff_p = pfn;
|
||||
|
|
@ -2979,7 +2988,7 @@ static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long ad
|
|||
if (WARN_ON_ONCE(!PAGE_ALIGNED(addr)))
|
||||
return -EINVAL;
|
||||
|
||||
VM_WARN_ON_ONCE((vma->vm_flags & VM_REMAP_FLAGS) != VM_REMAP_FLAGS);
|
||||
VM_WARN_ON_ONCE(!vma_test_all_flags_mask(vma, VMA_REMAP_FLAGS));
|
||||
|
||||
BUG_ON(addr >= end);
|
||||
pfn -= addr >> PAGE_SHIFT;
|
||||
|
|
@ -3103,9 +3112,9 @@ void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
|
|||
* check it again on complete and will fail there if specified addr is
|
||||
* invalid.
|
||||
*/
|
||||
get_remap_pgoff(desc->vm_flags, desc->start, desc->end,
|
||||
get_remap_pgoff(vma_desc_is_cow_mapping(desc), desc->start, desc->end,
|
||||
desc->start, desc->end, pfn, &desc->pgoff);
|
||||
desc->vm_flags |= VM_REMAP_FLAGS;
|
||||
vma_desc_set_flags_mask(desc, VMA_REMAP_FLAGS);
|
||||
}
|
||||
|
||||
static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma, unsigned long addr,
|
||||
|
|
@ -3114,13 +3123,12 @@ static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma, unsigned long
|
|||
unsigned long end = addr + PAGE_ALIGN(size);
|
||||
int err;
|
||||
|
||||
err = get_remap_pgoff(vma->vm_flags, addr, end,
|
||||
vma->vm_start, vma->vm_end,
|
||||
pfn, &vma->vm_pgoff);
|
||||
err = get_remap_pgoff(is_cow_mapping(vma->vm_flags), addr, end,
|
||||
vma->vm_start, vma->vm_end, pfn, &vma->vm_pgoff);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
vm_flags_set(vma, VM_REMAP_FLAGS);
|
||||
vma_set_flags_mask(vma, VMA_REMAP_FLAGS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -7316,7 +7324,7 @@ void folio_zero_user(struct folio *folio, unsigned long addr_hint)
|
|||
const unsigned long base_addr = ALIGN_DOWN(addr_hint, folio_size(folio));
|
||||
const long fault_idx = (addr_hint - base_addr) / PAGE_SIZE;
|
||||
const struct range pg = DEFINE_RANGE(0, folio_nr_pages(folio) - 1);
|
||||
const int radius = FOLIO_ZERO_LOCALITY_RADIUS;
|
||||
const long radius = FOLIO_ZERO_LOCALITY_RADIUS;
|
||||
struct range r[3];
|
||||
int i;
|
||||
|
||||
|
|
@ -7324,20 +7332,19 @@ void folio_zero_user(struct folio *folio, unsigned long addr_hint)
|
|||
* Faulting page and its immediate neighbourhood. Will be cleared at the
|
||||
* end to keep its cachelines hot.
|
||||
*/
|
||||
r[2] = DEFINE_RANGE(clamp_t(s64, fault_idx - radius, pg.start, pg.end),
|
||||
clamp_t(s64, fault_idx + radius, pg.start, pg.end));
|
||||
r[2] = DEFINE_RANGE(fault_idx - radius < (long)pg.start ? pg.start : fault_idx - radius,
|
||||
fault_idx + radius > (long)pg.end ? pg.end : fault_idx + radius);
|
||||
|
||||
|
||||
/* Region to the left of the fault */
|
||||
r[1] = DEFINE_RANGE(pg.start,
|
||||
clamp_t(s64, r[2].start - 1, pg.start - 1, r[2].start));
|
||||
r[1] = DEFINE_RANGE(pg.start, r[2].start - 1);
|
||||
|
||||
/* Region to the right of the fault: always valid for the common fault_idx=0 case. */
|
||||
r[0] = DEFINE_RANGE(clamp_t(s64, r[2].end + 1, r[2].end, pg.end + 1),
|
||||
pg.end);
|
||||
r[0] = DEFINE_RANGE(r[2].end + 1, pg.end);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(r); i++) {
|
||||
const unsigned long addr = base_addr + r[i].start * PAGE_SIZE;
|
||||
const unsigned int nr_pages = range_len(&r[i]);
|
||||
const long nr_pages = (long)range_len(&r[i]);
|
||||
struct page *page = folio_page(folio, r[i].start);
|
||||
|
||||
if (nr_pages > 0)
|
||||
|
|
|
|||
108
mm/mmap.c
108
mm/mmap.c
|
|
@ -108,7 +108,8 @@ static int check_brk_limits(unsigned long addr, unsigned long len)
|
|||
if (IS_ERR_VALUE(mapped_addr))
|
||||
return mapped_addr;
|
||||
|
||||
return mlock_future_ok(current->mm, current->mm->def_flags, len)
|
||||
return mlock_future_ok(current->mm,
|
||||
current->mm->def_flags & VM_LOCKED, len)
|
||||
? 0 : -EAGAIN;
|
||||
}
|
||||
|
||||
|
|
@ -225,12 +226,12 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
|
|||
return hint;
|
||||
}
|
||||
|
||||
bool mlock_future_ok(const struct mm_struct *mm, vm_flags_t vm_flags,
|
||||
bool mlock_future_ok(const struct mm_struct *mm, bool is_vma_locked,
|
||||
unsigned long bytes)
|
||||
{
|
||||
unsigned long locked_pages, limit_pages;
|
||||
|
||||
if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
|
||||
if (!is_vma_locked || capable(CAP_IPC_LOCK))
|
||||
return true;
|
||||
|
||||
locked_pages = bytes >> PAGE_SHIFT;
|
||||
|
|
@ -416,7 +417,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
|
|||
if (!can_do_mlock())
|
||||
return -EPERM;
|
||||
|
||||
if (!mlock_future_ok(mm, vm_flags, len))
|
||||
if (!mlock_future_ok(mm, vm_flags & VM_LOCKED, len))
|
||||
return -EAGAIN;
|
||||
|
||||
if (file) {
|
||||
|
|
@ -594,7 +595,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
|
|||
* taken when vm_ops->mmap() is called
|
||||
*/
|
||||
file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
|
||||
VM_NORESERVE,
|
||||
mk_vma_flags(VMA_NORESERVE_BIT),
|
||||
HUGETLB_ANONHUGE_INODE,
|
||||
(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
|
||||
if (IS_ERR(file))
|
||||
|
|
@ -1247,6 +1248,29 @@ limits_failed:
|
|||
}
|
||||
EXPORT_SYMBOL(vm_brk_flags);
|
||||
|
||||
static
|
||||
unsigned long tear_down_vmas(struct mm_struct *mm, struct vma_iterator *vmi,
|
||||
struct vm_area_struct *vma, unsigned long end)
|
||||
{
|
||||
unsigned long nr_accounted = 0;
|
||||
int count = 0;
|
||||
|
||||
mmap_assert_write_locked(mm);
|
||||
vma_iter_set(vmi, vma->vm_end);
|
||||
do {
|
||||
if (vma->vm_flags & VM_ACCOUNT)
|
||||
nr_accounted += vma_pages(vma);
|
||||
vma_mark_detached(vma);
|
||||
remove_vma(vma);
|
||||
count++;
|
||||
cond_resched();
|
||||
vma = vma_next(vmi);
|
||||
} while (vma && vma->vm_end <= end);
|
||||
|
||||
VM_WARN_ON_ONCE(count != mm->map_count);
|
||||
return nr_accounted;
|
||||
}
|
||||
|
||||
/* Release all mmaps. */
|
||||
void exit_mmap(struct mm_struct *mm)
|
||||
{
|
||||
|
|
@ -1254,7 +1278,7 @@ void exit_mmap(struct mm_struct *mm)
|
|||
struct vm_area_struct *vma;
|
||||
unsigned long nr_accounted = 0;
|
||||
VMA_ITERATOR(vmi, mm, 0);
|
||||
int count = 0;
|
||||
struct unmap_desc unmap;
|
||||
|
||||
/* mm's last user has gone, and its about to be pulled down */
|
||||
mmu_notifier_release(mm);
|
||||
|
|
@ -1263,18 +1287,19 @@ void exit_mmap(struct mm_struct *mm)
|
|||
arch_exit_mmap(mm);
|
||||
|
||||
vma = vma_next(&vmi);
|
||||
if (!vma || unlikely(xa_is_zero(vma))) {
|
||||
if (!vma) {
|
||||
/* Can happen if dup_mmap() received an OOM */
|
||||
mmap_read_unlock(mm);
|
||||
mmap_write_lock(mm);
|
||||
goto destroy;
|
||||
}
|
||||
|
||||
unmap_all_init(&unmap, &vmi, vma);
|
||||
flush_cache_mm(mm);
|
||||
tlb_gather_mmu_fullmm(&tlb, mm);
|
||||
/* update_hiwater_rss(mm) here? but nobody should be looking */
|
||||
/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
|
||||
unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX);
|
||||
unmap_vmas(&tlb, &unmap);
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
/*
|
||||
|
|
@ -1283,10 +1308,10 @@ void exit_mmap(struct mm_struct *mm)
|
|||
*/
|
||||
mm_flags_set(MMF_OOM_SKIP, mm);
|
||||
mmap_write_lock(mm);
|
||||
unmap.mm_wr_locked = true;
|
||||
mt_clear_in_rcu(&mm->mm_mt);
|
||||
vma_iter_set(&vmi, vma->vm_end);
|
||||
free_pgtables(&tlb, &vmi.mas, vma, FIRST_USER_ADDRESS,
|
||||
USER_PGTABLES_CEILING, true);
|
||||
unmap_pgtable_init(&unmap, &vmi);
|
||||
free_pgtables(&tlb, &unmap);
|
||||
tlb_finish_mmu(&tlb);
|
||||
|
||||
/*
|
||||
|
|
@ -1294,22 +1319,11 @@ void exit_mmap(struct mm_struct *mm)
|
|||
* enabled, without holding any MM locks besides the unreachable
|
||||
* mmap_write_lock.
|
||||
*/
|
||||
vma_iter_set(&vmi, vma->vm_end);
|
||||
do {
|
||||
if (vma->vm_flags & VM_ACCOUNT)
|
||||
nr_accounted += vma_pages(vma);
|
||||
vma_mark_detached(vma);
|
||||
remove_vma(vma);
|
||||
count++;
|
||||
cond_resched();
|
||||
vma = vma_next(&vmi);
|
||||
} while (vma && likely(!xa_is_zero(vma)));
|
||||
nr_accounted = tear_down_vmas(mm, &vmi, vma, ULONG_MAX);
|
||||
|
||||
BUG_ON(count != mm->map_count);
|
||||
|
||||
trace_exit_mmap(mm);
|
||||
destroy:
|
||||
__mt_destroy(&mm->mm_mt);
|
||||
trace_exit_mmap(mm);
|
||||
mmap_write_unlock(mm);
|
||||
vm_unacct_memory(nr_accounted);
|
||||
}
|
||||
|
|
@ -1840,20 +1854,46 @@ loop_out:
|
|||
ksm_fork(mm, oldmm);
|
||||
khugepaged_fork(mm, oldmm);
|
||||
} else {
|
||||
unsigned long end;
|
||||
|
||||
/*
|
||||
* The entire maple tree has already been duplicated. If the
|
||||
* mmap duplication fails, mark the failure point with
|
||||
* XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered,
|
||||
* stop releasing VMAs that have not been duplicated after this
|
||||
* point.
|
||||
* The entire maple tree has already been duplicated, but
|
||||
* replacing the vmas failed at mpnt (which could be NULL if
|
||||
* all were allocated but the last vma was not fully set up).
|
||||
* Use the start address of the failure point to clean up the
|
||||
* partially initialized tree.
|
||||
*/
|
||||
if (mpnt) {
|
||||
mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
|
||||
mas_store(&vmi.mas, XA_ZERO_ENTRY);
|
||||
/* Avoid OOM iterating a broken tree */
|
||||
mm_flags_set(MMF_OOM_SKIP, mm);
|
||||
if (!mm->map_count) {
|
||||
/* zero vmas were written to the new tree. */
|
||||
end = 0;
|
||||
} else if (mpnt) {
|
||||
/* partial tree failure */
|
||||
end = mpnt->vm_start;
|
||||
} else {
|
||||
/* All vmas were written to the new tree */
|
||||
end = ULONG_MAX;
|
||||
}
|
||||
|
||||
/* Hide mm from oom killer because the memory is being freed */
|
||||
mm_flags_set(MMF_OOM_SKIP, mm);
|
||||
if (end) {
|
||||
vma_iter_set(&vmi, 0);
|
||||
tmp = vma_next(&vmi);
|
||||
UNMAP_STATE(unmap, &vmi, /* first = */ tmp,
|
||||
/* vma_start = */ 0, /* vma_end = */ end,
|
||||
/* prev = */ NULL, /* next = */ NULL);
|
||||
|
||||
/*
|
||||
* Don't iterate over vmas beyond the failure point for
|
||||
* both unmap_vma() and free_pgtables().
|
||||
*/
|
||||
unmap.tree_end = end;
|
||||
flush_cache_mm(mm);
|
||||
unmap_region(&unmap);
|
||||
charge = tear_down_vmas(mm, &vmi, tmp, end);
|
||||
vm_unacct_memory(charge);
|
||||
}
|
||||
__mt_destroy(&mm->mm_mt);
|
||||
/*
|
||||
* The mm_struct is going to exit, but the locks will be dropped
|
||||
* first. Set the mm_struct as unstable is advisable as it is
|
||||
|
|
|
|||
|
|
@ -1740,7 +1740,7 @@ static int check_prep_vma(struct vma_remap_struct *vrm)
|
|||
if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
|
||||
return -EFAULT;
|
||||
|
||||
if (!mlock_future_ok(mm, vma->vm_flags, vrm->delta))
|
||||
if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, vrm->delta))
|
||||
return -EAGAIN;
|
||||
|
||||
if (!may_expand_vm(mm, vma->vm_flags, vrm->delta >> PAGE_SHIFT))
|
||||
|
|
|
|||
|
|
@ -1429,6 +1429,7 @@ __always_inline bool __free_pages_prepare(struct page *page,
|
|||
|
||||
page_cpupid_reset_last(page);
|
||||
page->flags.f &= ~PAGE_FLAGS_CHECK_AT_PREP;
|
||||
page->private = 0;
|
||||
reset_page_owner(page, order);
|
||||
page_table_check_free(page, order);
|
||||
pgalloc_tag_sub(page, 1 << order);
|
||||
|
|
|
|||
38
mm/rmap.c
38
mm/rmap.c
|
|
@ -913,9 +913,11 @@ static bool folio_referenced_one(struct folio *folio,
|
|||
struct folio_referenced_arg *pra = arg;
|
||||
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
|
||||
int ptes = 0, referenced = 0;
|
||||
unsigned int nr;
|
||||
|
||||
while (page_vma_mapped_walk(&pvmw)) {
|
||||
address = pvmw.address;
|
||||
nr = 1;
|
||||
|
||||
if (vma->vm_flags & VM_LOCKED) {
|
||||
ptes++;
|
||||
|
|
@ -960,9 +962,21 @@ static bool folio_referenced_one(struct folio *folio,
|
|||
if (lru_gen_look_around(&pvmw))
|
||||
referenced++;
|
||||
} else if (pvmw.pte) {
|
||||
if (ptep_clear_flush_young_notify(vma, address,
|
||||
pvmw.pte))
|
||||
if (folio_test_large(folio)) {
|
||||
unsigned long end_addr = pmd_addr_end(address, vma->vm_end);
|
||||
unsigned int max_nr = (end_addr - address) >> PAGE_SHIFT;
|
||||
pte_t pteval = ptep_get(pvmw.pte);
|
||||
|
||||
nr = folio_pte_batch(folio, pvmw.pte,
|
||||
pteval, max_nr);
|
||||
}
|
||||
|
||||
ptes += nr;
|
||||
if (clear_flush_young_ptes_notify(vma, address, pvmw.pte, nr))
|
||||
referenced++;
|
||||
/* Skip the batched PTEs */
|
||||
pvmw.pte += nr - 1;
|
||||
pvmw.address += (nr - 1) * PAGE_SIZE;
|
||||
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
|
||||
if (pmdp_clear_flush_young_notify(vma, address,
|
||||
pvmw.pmd))
|
||||
|
|
@ -972,7 +986,15 @@ static bool folio_referenced_one(struct folio *folio,
|
|||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
pra->mapcount--;
|
||||
pra->mapcount -= nr;
|
||||
/*
|
||||
* If we are sure that we batched the entire folio,
|
||||
* we can just optimize and stop right here.
|
||||
*/
|
||||
if (ptes == pvmw.nr_pages) {
|
||||
page_vma_mapped_walk_done(&pvmw);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (referenced)
|
||||
|
|
@ -1923,12 +1945,16 @@ static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
|
|||
end_addr = pmd_addr_end(addr, vma->vm_end);
|
||||
max_nr = (end_addr - addr) >> PAGE_SHIFT;
|
||||
|
||||
/* We only support lazyfree batching for now ... */
|
||||
if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
|
||||
/* We only support lazyfree or file folios batching for now ... */
|
||||
if (folio_test_anon(folio) && folio_test_swapbacked(folio))
|
||||
return 1;
|
||||
|
||||
if (pte_unused(pte))
|
||||
return 1;
|
||||
|
||||
if (userfaultfd_wp(vma))
|
||||
return 1;
|
||||
|
||||
return folio_pte_batch(folio, pvmw->pte, pte, max_nr);
|
||||
}
|
||||
|
||||
|
|
@ -2291,7 +2317,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
|
|||
*
|
||||
* See Documentation/mm/mmu_notifier.rst
|
||||
*/
|
||||
dec_mm_counter(mm, mm_counter_file(folio));
|
||||
add_mm_counter(mm, mm_counter_file(folio), -nr_pages);
|
||||
}
|
||||
discard:
|
||||
if (unlikely(folio_test_hugetlb(folio))) {
|
||||
|
|
|
|||
|
|
@ -122,13 +122,12 @@ static int secretmem_mmap_prepare(struct vm_area_desc *desc)
|
|||
{
|
||||
const unsigned long len = vma_desc_size(desc);
|
||||
|
||||
if ((desc->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
|
||||
if (!vma_desc_test_flags(desc, VMA_SHARED_BIT, VMA_MAYSHARE_BIT))
|
||||
return -EINVAL;
|
||||
|
||||
if (!mlock_future_ok(desc->mm, desc->vm_flags | VM_LOCKED, len))
|
||||
vma_desc_set_flags(desc, VMA_LOCKED_BIT, VMA_DONTDUMP_BIT);
|
||||
if (!mlock_future_ok(desc->mm, /*is_vma_locked=*/ true, len))
|
||||
return -EAGAIN;
|
||||
|
||||
desc->vm_flags |= VM_LOCKED | VM_DONTDUMP;
|
||||
desc->vm_ops = &secretmem_vm_ops;
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
57
mm/shmem.c
57
mm/shmem.c
|
|
@ -3064,7 +3064,7 @@ static struct offset_ctx *shmem_get_offset_ctx(struct inode *inode)
|
|||
static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
|
||||
struct super_block *sb,
|
||||
struct inode *dir, umode_t mode,
|
||||
dev_t dev, unsigned long flags)
|
||||
dev_t dev, vma_flags_t flags)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct shmem_inode_info *info;
|
||||
|
|
@ -3092,7 +3092,8 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
|
|||
spin_lock_init(&info->lock);
|
||||
atomic_set(&info->stop_eviction, 0);
|
||||
info->seals = F_SEAL_SEAL;
|
||||
info->flags = (flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0;
|
||||
info->flags = vma_flags_test(&flags, VMA_NORESERVE_BIT)
|
||||
? SHMEM_F_NORESERVE : 0;
|
||||
info->i_crtime = inode_get_mtime(inode);
|
||||
info->fsflags = (dir == NULL) ? 0 :
|
||||
SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
|
||||
|
|
@ -3145,7 +3146,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
|
|||
#ifdef CONFIG_TMPFS_QUOTA
|
||||
static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
|
||||
struct super_block *sb, struct inode *dir,
|
||||
umode_t mode, dev_t dev, unsigned long flags)
|
||||
umode_t mode, dev_t dev, vma_flags_t flags)
|
||||
{
|
||||
int err;
|
||||
struct inode *inode;
|
||||
|
|
@ -3171,9 +3172,9 @@ errout:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
#else
|
||||
static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
|
||||
static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
|
||||
struct super_block *sb, struct inode *dir,
|
||||
umode_t mode, dev_t dev, unsigned long flags)
|
||||
umode_t mode, dev_t dev, vma_flags_t flags)
|
||||
{
|
||||
return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
|
||||
}
|
||||
|
|
@ -3880,7 +3881,8 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
|
|||
if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
|
||||
return -EINVAL;
|
||||
|
||||
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
|
||||
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev,
|
||||
mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
|
|
@ -3915,7 +3917,8 @@ shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
|
|||
struct inode *inode;
|
||||
int error;
|
||||
|
||||
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
|
||||
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0,
|
||||
mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
if (IS_ERR(inode)) {
|
||||
error = PTR_ERR(inode);
|
||||
goto err_out;
|
||||
|
|
@ -4112,7 +4115,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
|||
return -ENAMETOOLONG;
|
||||
|
||||
inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
|
||||
VM_NORESERVE);
|
||||
mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
|
|
@ -5113,7 +5116,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
|
|||
#endif /* CONFIG_TMPFS_QUOTA */
|
||||
|
||||
inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL,
|
||||
S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
|
||||
S_IFDIR | sbinfo->mode, 0,
|
||||
mk_vma_flags(VMA_NORESERVE_BIT));
|
||||
if (IS_ERR(inode)) {
|
||||
error = PTR_ERR(inode);
|
||||
goto failed;
|
||||
|
|
@ -5814,7 +5818,7 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
|
|||
|
||||
static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
|
||||
struct super_block *sb, struct inode *dir,
|
||||
umode_t mode, dev_t dev, unsigned long flags)
|
||||
umode_t mode, dev_t dev, vma_flags_t flags)
|
||||
{
|
||||
struct inode *inode = ramfs_get_inode(sb, dir, mode, dev);
|
||||
return inode ? inode : ERR_PTR(-ENOSPC);
|
||||
|
|
@ -5825,10 +5829,11 @@ static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
|
|||
/* common code */
|
||||
|
||||
static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
|
||||
loff_t size, unsigned long vm_flags,
|
||||
loff_t size, vma_flags_t flags,
|
||||
unsigned int i_flags)
|
||||
{
|
||||
unsigned long flags = (vm_flags & VM_NORESERVE) ? SHMEM_F_NORESERVE : 0;
|
||||
const unsigned long shmem_flags =
|
||||
vma_flags_test(&flags, VMA_NORESERVE_BIT) ? SHMEM_F_NORESERVE : 0;
|
||||
struct inode *inode;
|
||||
struct file *res;
|
||||
|
||||
|
|
@ -5841,13 +5846,13 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
|
|||
if (is_idmapped_mnt(mnt))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (shmem_acct_size(flags, size))
|
||||
if (shmem_acct_size(shmem_flags, size))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
|
||||
S_IFREG | S_IRWXUGO, 0, vm_flags);
|
||||
S_IFREG | S_IRWXUGO, 0, flags);
|
||||
if (IS_ERR(inode)) {
|
||||
shmem_unacct_size(flags, size);
|
||||
shmem_unacct_size(shmem_flags, size);
|
||||
return ERR_CAST(inode);
|
||||
}
|
||||
inode->i_flags |= i_flags;
|
||||
|
|
@ -5870,9 +5875,10 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
|
|||
* checks are provided at the key or shm level rather than the inode.
|
||||
* @name: name for dentry (to be seen in /proc/<pid>/maps)
|
||||
* @size: size to be set for the file
|
||||
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
|
||||
* @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size
|
||||
*/
|
||||
struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
|
||||
struct file *shmem_kernel_file_setup(const char *name, loff_t size,
|
||||
vma_flags_t flags)
|
||||
{
|
||||
return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
|
||||
}
|
||||
|
|
@ -5882,9 +5888,9 @@ EXPORT_SYMBOL_GPL(shmem_kernel_file_setup);
|
|||
* shmem_file_setup - get an unlinked file living in tmpfs
|
||||
* @name: name for dentry (to be seen in /proc/<pid>/maps)
|
||||
* @size: size to be set for the file
|
||||
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
|
||||
* @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size
|
||||
*/
|
||||
struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
|
||||
struct file *shmem_file_setup(const char *name, loff_t size, vma_flags_t flags)
|
||||
{
|
||||
return __shmem_file_setup(shm_mnt, name, size, flags, 0);
|
||||
}
|
||||
|
|
@ -5895,16 +5901,17 @@ EXPORT_SYMBOL_GPL(shmem_file_setup);
|
|||
* @mnt: the tmpfs mount where the file will be created
|
||||
* @name: name for dentry (to be seen in /proc/<pid>/maps)
|
||||
* @size: size to be set for the file
|
||||
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
|
||||
* @flags: VMA_NORESERVE_BIT suppresses pre-accounting of the entire object size
|
||||
*/
|
||||
struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name,
|
||||
loff_t size, unsigned long flags)
|
||||
loff_t size, vma_flags_t flags)
|
||||
{
|
||||
return __shmem_file_setup(mnt, name, size, flags, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);
|
||||
|
||||
static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, vm_flags_t vm_flags)
|
||||
static struct file *__shmem_zero_setup(unsigned long start, unsigned long end,
|
||||
vma_flags_t flags)
|
||||
{
|
||||
loff_t size = end - start;
|
||||
|
||||
|
|
@ -5914,7 +5921,7 @@ static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, v
|
|||
* accessible to the user through its mapping, use S_PRIVATE flag to
|
||||
* bypass file security, in the same way as shmem_kernel_file_setup().
|
||||
*/
|
||||
return shmem_kernel_file_setup("dev/zero", size, vm_flags);
|
||||
return shmem_kernel_file_setup("dev/zero", size, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -5924,7 +5931,7 @@ static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, v
|
|||
*/
|
||||
int shmem_zero_setup(struct vm_area_struct *vma)
|
||||
{
|
||||
struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->vm_flags);
|
||||
struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->flags);
|
||||
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
|
@ -5945,7 +5952,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
|
|||
*/
|
||||
int shmem_zero_setup_desc(struct vm_area_desc *desc)
|
||||
{
|
||||
struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vm_flags);
|
||||
struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vma_flags);
|
||||
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
|
|
|||
|
|
@ -1154,7 +1154,7 @@ int __compat_vma_mmap(const struct file_operations *f_op,
|
|||
|
||||
.pgoff = vma->vm_pgoff,
|
||||
.vm_file = vma->vm_file,
|
||||
.vm_flags = vma->vm_flags,
|
||||
.vma_flags = vma->flags,
|
||||
.page_prot = vma->vm_page_prot,
|
||||
|
||||
.action.type = MMAP_NOTHING, /* Default */
|
||||
|
|
|
|||
63
mm/vma.c
63
mm/vma.c
|
|
@ -15,7 +15,10 @@ struct mmap_state {
|
|||
unsigned long end;
|
||||
pgoff_t pgoff;
|
||||
unsigned long pglen;
|
||||
union {
|
||||
vm_flags_t vm_flags;
|
||||
vma_flags_t vma_flags;
|
||||
};
|
||||
struct file *file;
|
||||
pgprot_t page_prot;
|
||||
|
||||
|
|
@ -472,19 +475,16 @@ void remove_vma(struct vm_area_struct *vma)
|
|||
*
|
||||
* Called with the mm semaphore held.
|
||||
*/
|
||||
void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
|
||||
struct vm_area_struct *prev, struct vm_area_struct *next)
|
||||
void unmap_region(struct unmap_desc *unmap)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct mm_struct *mm = unmap->first->vm_mm;
|
||||
struct mmu_gather tlb;
|
||||
|
||||
tlb_gather_mmu(&tlb, mm);
|
||||
update_hiwater_rss(mm);
|
||||
unmap_vmas(&tlb, mas, vma, vma->vm_start, vma->vm_end, vma->vm_end);
|
||||
mas_set(mas, vma->vm_end);
|
||||
free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
|
||||
next ? next->vm_start : USER_PGTABLES_CEILING,
|
||||
/* mm_wr_locked = */ true);
|
||||
unmap_vmas(&tlb, unmap);
|
||||
mas_set(unmap->mas, unmap->tree_reset);
|
||||
free_pgtables(&tlb, unmap);
|
||||
tlb_finish_mmu(&tlb);
|
||||
}
|
||||
|
||||
|
|
@ -1256,26 +1256,32 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
|||
static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
|
||||
struct ma_state *mas_detach, bool mm_wr_locked)
|
||||
{
|
||||
struct mmu_gather tlb;
|
||||
|
||||
if (!vms->clear_ptes) /* Nothing to do */
|
||||
return;
|
||||
|
||||
struct unmap_desc unmap = {
|
||||
.mas = mas_detach,
|
||||
.first = vms->vma,
|
||||
/* start and end may be different if there is no prev or next vma. */
|
||||
.pg_start = vms->unmap_start,
|
||||
.pg_end = vms->unmap_end,
|
||||
.vma_start = vms->start,
|
||||
.vma_end = vms->end,
|
||||
/*
|
||||
* The tree limits and reset differ from the normal case since it's a
|
||||
* side-tree
|
||||
*/
|
||||
.tree_reset = 1,
|
||||
.tree_end = vms->vma_count,
|
||||
/*
|
||||
* We can free page tables without write-locking mmap_lock because VMAs
|
||||
* were isolated before we downgraded mmap_lock.
|
||||
*/
|
||||
mas_set(mas_detach, 1);
|
||||
tlb_gather_mmu(&tlb, vms->vma->vm_mm);
|
||||
update_hiwater_rss(vms->vma->vm_mm);
|
||||
unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end,
|
||||
vms->vma_count);
|
||||
.mm_wr_locked = mm_wr_locked,
|
||||
};
|
||||
|
||||
if (!vms->clear_ptes) /* Nothing to do */
|
||||
return;
|
||||
|
||||
mas_set(mas_detach, 1);
|
||||
/* start and end may be different if there is no prev or next vma. */
|
||||
free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start,
|
||||
vms->unmap_end, mm_wr_locked);
|
||||
tlb_finish_mmu(&tlb);
|
||||
unmap_region(&unmap);
|
||||
vms->clear_ptes = false;
|
||||
}
|
||||
|
||||
|
|
@ -2366,7 +2372,7 @@ static void set_desc_from_map(struct vm_area_desc *desc,
|
|||
|
||||
desc->pgoff = map->pgoff;
|
||||
desc->vm_file = map->file;
|
||||
desc->vm_flags = map->vm_flags;
|
||||
desc->vma_flags = map->vma_flags;
|
||||
desc->page_prot = map->page_prot;
|
||||
}
|
||||
|
||||
|
|
@ -2461,13 +2467,14 @@ static int __mmap_new_file_vma(struct mmap_state *map,
|
|||
|
||||
error = mmap_file(vma->vm_file, vma);
|
||||
if (error) {
|
||||
UNMAP_STATE(unmap, vmi, vma, vma->vm_start, vma->vm_end,
|
||||
map->prev, map->next);
|
||||
fput(vma->vm_file);
|
||||
vma->vm_file = NULL;
|
||||
|
||||
vma_iter_set(vmi, vma->vm_end);
|
||||
/* Undo any partial mapping done by a device driver. */
|
||||
unmap_region(&vmi->mas, vma, map->prev, map->next);
|
||||
|
||||
unmap_region(&unmap);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
|
@ -2646,7 +2653,7 @@ static int call_mmap_prepare(struct mmap_state *map,
|
|||
map->file_doesnt_need_get = true;
|
||||
map->file = desc->vm_file;
|
||||
}
|
||||
map->vm_flags = desc->vm_flags;
|
||||
map->vma_flags = desc->vma_flags;
|
||||
map->page_prot = desc->page_prot;
|
||||
/* User-defined fields. */
|
||||
map->vm_ops = desc->vm_ops;
|
||||
|
|
@ -2819,7 +2826,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
|||
return -EINVAL;
|
||||
|
||||
/* Map writable and ensure this isn't a sealed memfd. */
|
||||
if (file && is_shared_maywrite(vm_flags)) {
|
||||
if (file && is_shared_maywrite_vm_flags(vm_flags)) {
|
||||
int error = mapping_map_writable(file->f_mapping);
|
||||
|
||||
if (error)
|
||||
|
|
@ -3049,7 +3056,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
|
|||
return -ENOMEM;
|
||||
|
||||
/* mlock limit tests */
|
||||
if (!mlock_future_ok(mm, vma->vm_flags, grow << PAGE_SHIFT))
|
||||
if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, grow << PAGE_SHIFT))
|
||||
return -ENOMEM;
|
||||
|
||||
/* Check to ensure the stack will not grow into a hugetlb-only region */
|
||||
|
|
|
|||
73
mm/vma.h
73
mm/vma.h
|
|
@ -155,6 +155,72 @@ struct vma_merge_struct {
|
|||
|
||||
};
|
||||
|
||||
struct unmap_desc {
|
||||
struct ma_state *mas; /* the maple state point to the first vma */
|
||||
struct vm_area_struct *first; /* The first vma */
|
||||
unsigned long pg_start; /* The first pagetable address to free (floor) */
|
||||
unsigned long pg_end; /* The last pagetable address to free (ceiling) */
|
||||
unsigned long vma_start; /* The min vma address */
|
||||
unsigned long vma_end; /* The max vma address */
|
||||
unsigned long tree_end; /* Maximum for the vma tree search */
|
||||
unsigned long tree_reset; /* Where to reset the vma tree walk */
|
||||
bool mm_wr_locked; /* If the mmap write lock is held */
|
||||
};
|
||||
|
||||
/*
|
||||
* unmap_all_init() - Initialize unmap_desc to remove all vmas, point the
|
||||
* pg_start and pg_end to a safe location.
|
||||
*/
|
||||
static inline void unmap_all_init(struct unmap_desc *unmap,
|
||||
struct vma_iterator *vmi, struct vm_area_struct *vma)
|
||||
{
|
||||
unmap->mas = &vmi->mas;
|
||||
unmap->first = vma;
|
||||
unmap->pg_start = FIRST_USER_ADDRESS;
|
||||
unmap->pg_end = USER_PGTABLES_CEILING;
|
||||
unmap->vma_start = 0;
|
||||
unmap->vma_end = ULONG_MAX;
|
||||
unmap->tree_end = ULONG_MAX;
|
||||
unmap->tree_reset = vma->vm_end;
|
||||
unmap->mm_wr_locked = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* unmap_pgtable_init() - Initialize unmap_desc to remove all page tables within
|
||||
* the user range.
|
||||
*
|
||||
* ARM can have mappings outside of vmas.
|
||||
* See: e2cdef8c847b4 ("[PATCH] freepgt: free_pgtables from FIRST_USER_ADDRESS")
|
||||
*
|
||||
* ARM LPAE uses page table mappings beyond the USER_PGTABLES_CEILING
|
||||
* See: CONFIG_ARM_LPAE in arch/arm/include/asm/pgtable.h
|
||||
*/
|
||||
static inline void unmap_pgtable_init(struct unmap_desc *unmap,
|
||||
struct vma_iterator *vmi)
|
||||
{
|
||||
vma_iter_set(vmi, unmap->tree_reset);
|
||||
unmap->vma_start = FIRST_USER_ADDRESS;
|
||||
unmap->vma_end = USER_PGTABLES_CEILING;
|
||||
unmap->tree_end = USER_PGTABLES_CEILING;
|
||||
}
|
||||
|
||||
#define UNMAP_STATE(name, _vmi, _vma, _vma_start, _vma_end, _prev, _next) \
|
||||
struct unmap_desc name = { \
|
||||
.mas = &(_vmi)->mas, \
|
||||
.first = _vma, \
|
||||
.pg_start = _prev ? ((struct vm_area_struct *)_prev)->vm_end : \
|
||||
FIRST_USER_ADDRESS, \
|
||||
.pg_end = _next ? ((struct vm_area_struct *)_next)->vm_start : \
|
||||
USER_PGTABLES_CEILING, \
|
||||
.vma_start = _vma_start, \
|
||||
.vma_end = _vma_end, \
|
||||
.tree_end = _next ? \
|
||||
((struct vm_area_struct *)_next)->vm_start : \
|
||||
USER_PGTABLES_CEILING, \
|
||||
.tree_reset = _vma->vm_end, \
|
||||
.mm_wr_locked = true, \
|
||||
}
|
||||
|
||||
static inline bool vmg_nomem(struct vma_merge_struct *vmg)
|
||||
{
|
||||
return vmg->state == VMA_MERGE_ERROR_NOMEM;
|
||||
|
|
@ -243,8 +309,7 @@ static inline void set_vma_from_desc(struct vm_area_struct *vma,
|
|||
vma->vm_pgoff = desc->pgoff;
|
||||
if (desc->vm_file != vma->vm_file)
|
||||
vma_set_file(vma, desc->vm_file);
|
||||
if (desc->vm_flags != vma->vm_flags)
|
||||
vm_flags_set(vma, desc->vm_flags);
|
||||
vma->flags = desc->vma_flags;
|
||||
vma->vm_page_prot = desc->page_prot;
|
||||
|
||||
/* User-defined fields. */
|
||||
|
|
@ -262,9 +327,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
|
|||
bool unlock);
|
||||
|
||||
void remove_vma(struct vm_area_struct *vma);
|
||||
|
||||
void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
|
||||
struct vm_area_struct *prev, struct vm_area_struct *next);
|
||||
void unmap_region(struct unmap_desc *unmap);
|
||||
|
||||
/**
|
||||
* vma_modify_flags() - Perform any necessary split/merge in preparation for
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@
|
|||
#include <linux/swap.h>
|
||||
#include <linux/uprobes.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/current.h>
|
||||
#include <asm/tlb.h>
|
||||
|
|
|
|||
33
mm/vmscan.c
33
mm/vmscan.c
|
|
@ -343,19 +343,21 @@ static void flush_reclaim_state(struct scan_control *sc)
|
|||
static bool can_demote(int nid, struct scan_control *sc,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
int demotion_nid;
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
nodemask_t allowed_mask;
|
||||
|
||||
if (!numa_demotion_enabled)
|
||||
if (!pgdat || !numa_demotion_enabled)
|
||||
return false;
|
||||
if (sc && sc->no_demotion)
|
||||
return false;
|
||||
|
||||
demotion_nid = next_demotion_node(nid);
|
||||
if (demotion_nid == NUMA_NO_NODE)
|
||||
node_get_allowed_targets(pgdat, &allowed_mask);
|
||||
if (nodes_empty(allowed_mask))
|
||||
return false;
|
||||
|
||||
/* If demotion node isn't in the cgroup's mems_allowed, fall back */
|
||||
return mem_cgroup_node_allowed(memcg, demotion_nid);
|
||||
/* Filter out nodes that are not in cgroup's mems_allowed. */
|
||||
mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
|
||||
return !nodes_empty(allowed_mask);
|
||||
}
|
||||
|
||||
static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
|
||||
|
|
@ -1017,9 +1019,10 @@ static struct folio *alloc_demote_folio(struct folio *src,
|
|||
* Folios which are not demoted are left on @demote_folios.
|
||||
*/
|
||||
static unsigned int demote_folio_list(struct list_head *demote_folios,
|
||||
struct pglist_data *pgdat)
|
||||
struct pglist_data *pgdat,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
int target_nid = next_demotion_node(pgdat->node_id);
|
||||
int target_nid;
|
||||
unsigned int nr_succeeded;
|
||||
nodemask_t allowed_mask;
|
||||
|
||||
|
|
@ -1031,7 +1034,6 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
|
|||
*/
|
||||
.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
|
||||
__GFP_NOMEMALLOC | GFP_NOWAIT,
|
||||
.nid = target_nid,
|
||||
.nmask = &allowed_mask,
|
||||
.reason = MR_DEMOTION,
|
||||
};
|
||||
|
|
@ -1039,10 +1041,17 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
|
|||
if (list_empty(demote_folios))
|
||||
return 0;
|
||||
|
||||
if (target_nid == NUMA_NO_NODE)
|
||||
node_get_allowed_targets(pgdat, &allowed_mask);
|
||||
mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
|
||||
if (nodes_empty(allowed_mask))
|
||||
return 0;
|
||||
|
||||
node_get_allowed_targets(pgdat, &allowed_mask);
|
||||
target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
|
||||
if (target_nid == NUMA_NO_NODE)
|
||||
/* No lower-tier nodes or nodes were hot-unplugged. */
|
||||
return 0;
|
||||
|
||||
mtc.nid = target_nid;
|
||||
|
||||
/* Demotion ignores all cpuset and mempolicy settings */
|
||||
migrate_pages(demote_folios, alloc_demote_folio, NULL,
|
||||
|
|
@ -1564,7 +1573,7 @@ keep:
|
|||
/* 'folio_list' is always empty here */
|
||||
|
||||
/* Migrate folios selected for demotion */
|
||||
nr_demoted = demote_folio_list(&demote_folios, pgdat);
|
||||
nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
|
||||
nr_reclaimed += nr_demoted;
|
||||
stat->nr_demoted += nr_demoted;
|
||||
/* Folios that could not be demoted are still in @demote_folios */
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
|
|||
0, enckey);
|
||||
|
||||
/* save aligned data to file */
|
||||
file = shmem_kernel_file_setup("", enclen, 0);
|
||||
file = shmem_kernel_file_setup("", enclen, EMPTY_VMA_FLAGS);
|
||||
if (IS_ERR(file)) {
|
||||
ret = PTR_ERR(file);
|
||||
goto err_enckey;
|
||||
|
|
|
|||
|
|
@ -24,6 +24,10 @@ void __bitmap_set(unsigned long *map, unsigned int start, int len);
|
|||
void __bitmap_clear(unsigned long *map, unsigned int start, int len);
|
||||
bool __bitmap_intersects(const unsigned long *bitmap1,
|
||||
const unsigned long *bitmap2, unsigned int bits);
|
||||
bool __bitmap_subset(const unsigned long *bitmap1,
|
||||
const unsigned long *bitmap2, unsigned int nbits);
|
||||
bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
|
||||
const unsigned long *bitmap2, unsigned int nbits);
|
||||
|
||||
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
|
||||
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
|
||||
|
|
@ -81,6 +85,15 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
|
|||
__bitmap_or(dst, src1, src2, nbits);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
|
||||
const unsigned long *src2, unsigned int nbits)
|
||||
{
|
||||
if (small_const_nbits(nbits))
|
||||
return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
|
||||
return __bitmap_andnot(dst, src1, src2, nbits);
|
||||
}
|
||||
|
||||
static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused)
|
||||
{
|
||||
return malloc(bitmap_size(nbits));
|
||||
|
|
@ -157,6 +170,15 @@ static inline bool bitmap_intersects(const unsigned long *src1,
|
|||
return __bitmap_intersects(src1, src2, nbits);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
|
||||
{
|
||||
if (small_const_nbits(nbits))
|
||||
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
|
||||
else
|
||||
return __bitmap_subset(src1, src2, nbits);
|
||||
}
|
||||
|
||||
static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
|
||||
{
|
||||
if (__builtin_constant_p(nbits) && nbits == 1)
|
||||
|
|
|
|||
|
|
@ -140,3 +140,32 @@ void __bitmap_clear(unsigned long *map, unsigned int start, int len)
|
|||
*p &= ~mask_to_clear;
|
||||
}
|
||||
}
|
||||
|
||||
bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
|
||||
const unsigned long *bitmap2, unsigned int bits)
|
||||
{
|
||||
unsigned int k;
|
||||
unsigned int lim = bits/BITS_PER_LONG;
|
||||
unsigned long result = 0;
|
||||
|
||||
for (k = 0; k < lim; k++)
|
||||
result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
|
||||
if (bits % BITS_PER_LONG)
|
||||
result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
|
||||
BITMAP_LAST_WORD_MASK(bits));
|
||||
return result != 0;
|
||||
}
|
||||
|
||||
bool __bitmap_subset(const unsigned long *bitmap1,
|
||||
const unsigned long *bitmap2, unsigned int bits)
|
||||
{
|
||||
unsigned int k, lim = bits/BITS_PER_LONG;
|
||||
for (k = 0; k < lim; ++k)
|
||||
if (bitmap1[k] & ~bitmap2[k])
|
||||
return false;
|
||||
|
||||
if (bits % BITS_PER_LONG)
|
||||
if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
1
tools/testing/selftests/mm/.gitignore
vendored
1
tools/testing/selftests/mm/.gitignore
vendored
|
|
@ -12,6 +12,7 @@ map_hugetlb
|
|||
map_populate
|
||||
thuge-gen
|
||||
compaction_test
|
||||
memory-failure
|
||||
migration
|
||||
mlock2-tests
|
||||
mrelease_test
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ TEST_GEN_FILES += map_populate
|
|||
ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64 loongarch32 loongarch64))
|
||||
TEST_GEN_FILES += memfd_secret
|
||||
endif
|
||||
TEST_GEN_FILES += memory-failure
|
||||
TEST_GEN_FILES += migration
|
||||
TEST_GEN_FILES += mkdirty
|
||||
TEST_GEN_FILES += mlock-random-test
|
||||
|
|
@ -154,6 +155,7 @@ TEST_PROGS += ksft_ksm_numa.sh
|
|||
TEST_PROGS += ksft_madv_guard.sh
|
||||
TEST_PROGS += ksft_madv_populate.sh
|
||||
TEST_PROGS += ksft_memfd_secret.sh
|
||||
TEST_PROGS += ksft_memory_failure.sh
|
||||
TEST_PROGS += ksft_migration.sh
|
||||
TEST_PROGS += ksft_mkdirty.sh
|
||||
TEST_PROGS += ksft_mlock.sh
|
||||
|
|
|
|||
|
|
@ -11,3 +11,5 @@ CONFIG_ANON_VMA_NAME=y
|
|||
CONFIG_FTRACE=y
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_UPROBES=y
|
||||
CONFIG_MEMORY_FAILURE=y
|
||||
CONFIG_HWPOISON_INJECT=m
|
||||
|
|
|
|||
4
tools/testing/selftests/mm/ksft_memory_failure.sh
Executable file
4
tools/testing/selftests/mm/ksft_memory_failure.sh
Executable file
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh -e
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
./run_vmtests.sh -t memory-failure
|
||||
359
tools/testing/selftests/mm/memory-failure.c
Normal file
359
tools/testing/selftests/mm/memory-failure.c
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Memory-failure functional tests.
|
||||
*
|
||||
* Author(s): Miaohe Lin <linmiaohe@huawei.com>
|
||||
*/
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/string.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <setjmp.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <linux/magic.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "vm_util.h"
|
||||
|
||||
enum inject_type {
|
||||
MADV_HARD,
|
||||
MADV_SOFT,
|
||||
};
|
||||
|
||||
enum result_type {
|
||||
MADV_HARD_ANON,
|
||||
MADV_HARD_CLEAN_PAGECACHE,
|
||||
MADV_HARD_DIRTY_PAGECACHE,
|
||||
MADV_SOFT_ANON,
|
||||
MADV_SOFT_CLEAN_PAGECACHE,
|
||||
MADV_SOFT_DIRTY_PAGECACHE,
|
||||
};
|
||||
|
||||
static jmp_buf signal_jmp_buf;
|
||||
static siginfo_t siginfo;
|
||||
const char *pagemap_proc = "/proc/self/pagemap";
|
||||
const char *kpageflags_proc = "/proc/kpageflags";
|
||||
|
||||
FIXTURE(memory_failure)
|
||||
{
|
||||
unsigned long page_size;
|
||||
unsigned long corrupted_size;
|
||||
unsigned long pfn;
|
||||
int pagemap_fd;
|
||||
int kpageflags_fd;
|
||||
bool triggered;
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT(memory_failure)
|
||||
{
|
||||
enum inject_type type;
|
||||
int (*inject)(FIXTURE_DATA(memory_failure) * self, void *vaddr);
|
||||
};
|
||||
|
||||
static int madv_hard_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
|
||||
{
|
||||
return madvise(vaddr, self->page_size, MADV_HWPOISON);
|
||||
}
|
||||
|
||||
FIXTURE_VARIANT_ADD(memory_failure, madv_hard)
|
||||
{
|
||||
.type = MADV_HARD,
|
||||
.inject = madv_hard_inject,
|
||||
};
|
||||
|
||||
static int madv_soft_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
|
||||
{
|
||||
return madvise(vaddr, self->page_size, MADV_SOFT_OFFLINE);
|
||||
}
|
||||
|
||||
FIXTURE_VARIANT_ADD(memory_failure, madv_soft)
|
||||
{
|
||||
.type = MADV_SOFT,
|
||||
.inject = madv_soft_inject,
|
||||
};
|
||||
|
||||
static void sigbus_action(int signo, siginfo_t *si, void *args)
|
||||
{
|
||||
memcpy(&siginfo, si, sizeof(siginfo_t));
|
||||
siglongjmp(signal_jmp_buf, 1);
|
||||
}
|
||||
|
||||
static int setup_sighandler(void)
|
||||
{
|
||||
struct sigaction sa = {
|
||||
.sa_sigaction = sigbus_action,
|
||||
.sa_flags = SA_SIGINFO,
|
||||
};
|
||||
|
||||
return sigaction(SIGBUS, &sa, NULL);
|
||||
}
|
||||
|
||||
FIXTURE_SETUP(memory_failure)
|
||||
{
|
||||
memset(self, 0, sizeof(*self));
|
||||
|
||||
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
|
||||
|
||||
memset(&siginfo, 0, sizeof(siginfo));
|
||||
if (setup_sighandler())
|
||||
SKIP(return, "setup sighandler failed.\n");
|
||||
|
||||
self->pagemap_fd = open(pagemap_proc, O_RDONLY);
|
||||
if (self->pagemap_fd == -1)
|
||||
SKIP(return, "open %s failed.\n", pagemap_proc);
|
||||
|
||||
self->kpageflags_fd = open(kpageflags_proc, O_RDONLY);
|
||||
if (self->kpageflags_fd == -1)
|
||||
SKIP(return, "open %s failed.\n", kpageflags_proc);
|
||||
}
|
||||
|
||||
static void teardown_sighandler(void)
|
||||
{
|
||||
struct sigaction sa = {
|
||||
.sa_handler = SIG_DFL,
|
||||
.sa_flags = SA_SIGINFO,
|
||||
};
|
||||
|
||||
sigaction(SIGBUS, &sa, NULL);
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(memory_failure)
|
||||
{
|
||||
close(self->kpageflags_fd);
|
||||
close(self->pagemap_fd);
|
||||
teardown_sighandler();
|
||||
}
|
||||
|
||||
static void prepare(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
|
||||
void *vaddr)
|
||||
{
|
||||
self->pfn = pagemap_get_pfn(self->pagemap_fd, vaddr);
|
||||
ASSERT_NE(self->pfn, -1UL);
|
||||
|
||||
ASSERT_EQ(get_hardware_corrupted_size(&self->corrupted_size), 0);
|
||||
}
|
||||
|
||||
static bool check_memory(void *vaddr, unsigned long size)
|
||||
{
|
||||
char buf[64];
|
||||
|
||||
memset(buf, 0xce, sizeof(buf));
|
||||
while (size >= sizeof(buf)) {
|
||||
if (memcmp(vaddr, buf, sizeof(buf)))
|
||||
return false;
|
||||
size -= sizeof(buf);
|
||||
vaddr += sizeof(buf);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
|
||||
void *vaddr, enum result_type type, int setjmp)
|
||||
{
|
||||
unsigned long size;
|
||||
uint64_t pfn_flags;
|
||||
|
||||
switch (type) {
|
||||
case MADV_SOFT_ANON:
|
||||
case MADV_HARD_CLEAN_PAGECACHE:
|
||||
case MADV_SOFT_CLEAN_PAGECACHE:
|
||||
case MADV_SOFT_DIRTY_PAGECACHE:
|
||||
/* It is not expected to receive a SIGBUS signal. */
|
||||
ASSERT_EQ(setjmp, 0);
|
||||
|
||||
/* The page content should remain unchanged. */
|
||||
ASSERT_TRUE(check_memory(vaddr, self->page_size));
|
||||
|
||||
/* The backing pfn of addr should have changed. */
|
||||
ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn);
|
||||
break;
|
||||
case MADV_HARD_ANON:
|
||||
case MADV_HARD_DIRTY_PAGECACHE:
|
||||
/* The SIGBUS signal should have been received. */
|
||||
ASSERT_EQ(setjmp, 1);
|
||||
|
||||
/* Check if siginfo contains correct SIGBUS context. */
|
||||
ASSERT_EQ(siginfo.si_signo, SIGBUS);
|
||||
ASSERT_EQ(siginfo.si_code, BUS_MCEERR_AR);
|
||||
ASSERT_EQ(1UL << siginfo.si_addr_lsb, self->page_size);
|
||||
ASSERT_EQ(siginfo.si_addr, vaddr);
|
||||
|
||||
/* XXX Check backing pte is hwpoison entry when supported. */
|
||||
ASSERT_TRUE(pagemap_is_swapped(self->pagemap_fd, vaddr));
|
||||
break;
|
||||
default:
|
||||
SKIP(return, "unexpected inject type %d.\n", type);
|
||||
}
|
||||
|
||||
/* Check if the value of HardwareCorrupted has increased. */
|
||||
ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
|
||||
ASSERT_EQ(size, self->corrupted_size + self->page_size / 1024);
|
||||
|
||||
/* Check if HWPoison flag is set. */
|
||||
ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
|
||||
ASSERT_EQ(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
|
||||
}
|
||||
|
||||
static void cleanup(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
|
||||
void *vaddr)
|
||||
{
|
||||
unsigned long size;
|
||||
uint64_t pfn_flags;
|
||||
|
||||
ASSERT_EQ(unpoison_memory(self->pfn), 0);
|
||||
|
||||
/* Check if HWPoison flag is cleared. */
|
||||
ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
|
||||
ASSERT_NE(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
|
||||
|
||||
/* Check if the value of HardwareCorrupted has decreased. */
|
||||
ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
|
||||
ASSERT_EQ(size, self->corrupted_size);
|
||||
}
|
||||
|
||||
TEST_F(memory_failure, anon)
|
||||
{
|
||||
char *addr;
|
||||
int ret;
|
||||
|
||||
addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (addr == MAP_FAILED)
|
||||
SKIP(return, "mmap failed, not enough memory.\n");
|
||||
memset(addr, 0xce, self->page_size);
|
||||
|
||||
prepare(_metadata, self, addr);
|
||||
|
||||
ret = sigsetjmp(signal_jmp_buf, 1);
|
||||
if (!self->triggered) {
|
||||
self->triggered = true;
|
||||
ASSERT_EQ(variant->inject(self, addr), 0);
|
||||
FORCE_READ(*addr);
|
||||
}
|
||||
|
||||
if (variant->type == MADV_HARD)
|
||||
check(_metadata, self, addr, MADV_HARD_ANON, ret);
|
||||
else
|
||||
check(_metadata, self, addr, MADV_SOFT_ANON, ret);
|
||||
|
||||
cleanup(_metadata, self, addr);
|
||||
|
||||
ASSERT_EQ(munmap(addr, self->page_size), 0);
|
||||
}
|
||||
|
||||
static int prepare_file(const char *fname, unsigned long size)
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = open(fname, O_RDWR | O_CREAT, 0664);
|
||||
if (fd >= 0) {
|
||||
unlink(fname);
|
||||
ftruncate(fd, size);
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
|
||||
/* Borrowed from mm/gup_longterm.c. */
|
||||
static int get_fs_type(int fd)
|
||||
{
|
||||
struct statfs fs;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
ret = fstatfs(fd, &fs);
|
||||
} while (ret && errno == EINTR);
|
||||
|
||||
return ret ? 0 : (int)fs.f_type;
|
||||
}
|
||||
|
||||
TEST_F(memory_failure, clean_pagecache)
|
||||
{
|
||||
int fd;
|
||||
char *addr;
|
||||
int ret;
|
||||
int fs_type;
|
||||
|
||||
fd = prepare_file("./clean-page-cache-test-file", self->page_size);
|
||||
if (fd < 0)
|
||||
SKIP(return, "failed to open test file.\n");
|
||||
fs_type = get_fs_type(fd);
|
||||
if (!fs_type || fs_type == TMPFS_MAGIC)
|
||||
SKIP(return, "unsupported filesystem :%x\n", fs_type);
|
||||
|
||||
addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, fd, 0);
|
||||
if (addr == MAP_FAILED)
|
||||
SKIP(return, "mmap failed, not enough memory.\n");
|
||||
memset(addr, 0xce, self->page_size);
|
||||
fsync(fd);
|
||||
|
||||
prepare(_metadata, self, addr);
|
||||
|
||||
ret = sigsetjmp(signal_jmp_buf, 1);
|
||||
if (!self->triggered) {
|
||||
self->triggered = true;
|
||||
ASSERT_EQ(variant->inject(self, addr), 0);
|
||||
FORCE_READ(*addr);
|
||||
}
|
||||
|
||||
if (variant->type == MADV_HARD)
|
||||
check(_metadata, self, addr, MADV_HARD_CLEAN_PAGECACHE, ret);
|
||||
else
|
||||
check(_metadata, self, addr, MADV_SOFT_CLEAN_PAGECACHE, ret);
|
||||
|
||||
cleanup(_metadata, self, addr);
|
||||
|
||||
ASSERT_EQ(munmap(addr, self->page_size), 0);
|
||||
|
||||
ASSERT_EQ(close(fd), 0);
|
||||
}
|
||||
|
||||
TEST_F(memory_failure, dirty_pagecache)
|
||||
{
|
||||
int fd;
|
||||
char *addr;
|
||||
int ret;
|
||||
int fs_type;
|
||||
|
||||
fd = prepare_file("./dirty-page-cache-test-file", self->page_size);
|
||||
if (fd < 0)
|
||||
SKIP(return, "failed to open test file.\n");
|
||||
fs_type = get_fs_type(fd);
|
||||
if (!fs_type || fs_type == TMPFS_MAGIC)
|
||||
SKIP(return, "unsupported filesystem :%x\n", fs_type);
|
||||
|
||||
addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, fd, 0);
|
||||
if (addr == MAP_FAILED)
|
||||
SKIP(return, "mmap failed, not enough memory.\n");
|
||||
memset(addr, 0xce, self->page_size);
|
||||
|
||||
prepare(_metadata, self, addr);
|
||||
|
||||
ret = sigsetjmp(signal_jmp_buf, 1);
|
||||
if (!self->triggered) {
|
||||
self->triggered = true;
|
||||
ASSERT_EQ(variant->inject(self, addr), 0);
|
||||
FORCE_READ(*addr);
|
||||
}
|
||||
|
||||
if (variant->type == MADV_HARD)
|
||||
check(_metadata, self, addr, MADV_HARD_DIRTY_PAGECACHE, ret);
|
||||
else
|
||||
check(_metadata, self, addr, MADV_SOFT_DIRTY_PAGECACHE, ret);
|
||||
|
||||
cleanup(_metadata, self, addr);
|
||||
|
||||
ASSERT_EQ(munmap(addr, self->page_size), 0);
|
||||
|
||||
ASSERT_EQ(close(fd), 0);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
|
|
@ -91,6 +91,8 @@ separated by spaces:
|
|||
test VMA merge cases behave as expected
|
||||
- rmap
|
||||
test rmap behaves as expected
|
||||
- memory-failure
|
||||
test memory-failure behaves as expected
|
||||
|
||||
example: ./run_vmtests.sh -t "hmm mmap ksm"
|
||||
EOF
|
||||
|
|
@ -527,6 +529,25 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
|
|||
|
||||
CATEGORY="rmap" run_test ./rmap
|
||||
|
||||
# Try to load hwpoison_inject if not present.
|
||||
HWPOISON_DIR=/sys/kernel/debug/hwpoison/
|
||||
if [ ! -d "$HWPOISON_DIR" ]; then
|
||||
if ! modprobe -q -R hwpoison_inject; then
|
||||
echo "Module hwpoison_inject not found, skipping..."
|
||||
else
|
||||
modprobe hwpoison_inject > /dev/null 2>&1
|
||||
LOADED_MOD=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -d "$HWPOISON_DIR" ]; then
|
||||
CATEGORY="memory-failure" run_test ./memory-failure
|
||||
fi
|
||||
|
||||
if [ -n "${LOADED_MOD}" ]; then
|
||||
modprobe -r hwpoison_inject > /dev/null 2>&1
|
||||
fi
|
||||
|
||||
if [ "${HAVE_HUGEPAGES}" = 1 ]; then
|
||||
echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -723,3 +723,44 @@ int ksm_stop(void)
|
|||
close(ksm_fd);
|
||||
return ret == 1 ? 0 : -errno;
|
||||
}
|
||||
|
||||
int get_hardware_corrupted_size(unsigned long *val)
|
||||
{
|
||||
unsigned long size;
|
||||
char *line = NULL;
|
||||
size_t linelen = 0;
|
||||
FILE *f = fopen("/proc/meminfo", "r");
|
||||
int ret = -1;
|
||||
|
||||
if (!f)
|
||||
return ret;
|
||||
|
||||
while (getline(&line, &linelen, f) > 0) {
|
||||
if (sscanf(line, "HardwareCorrupted: %12lu kB", &size) == 1) {
|
||||
*val = size;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(line);
|
||||
fclose(f);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int unpoison_memory(unsigned long pfn)
|
||||
{
|
||||
int unpoison_fd, len;
|
||||
char buf[32];
|
||||
ssize_t ret;
|
||||
|
||||
unpoison_fd = open("/sys/kernel/debug/hwpoison/unpoison-pfn", O_WRONLY);
|
||||
if (unpoison_fd < 0)
|
||||
return -errno;
|
||||
|
||||
len = sprintf(buf, "0x%lx\n", pfn);
|
||||
ret = write(unpoison_fd, buf, len);
|
||||
close(unpoison_fd);
|
||||
|
||||
return ret > 0 ? 0 : -errno;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#define KPF_COMPOUND_HEAD BIT_ULL(15)
|
||||
#define KPF_COMPOUND_TAIL BIT_ULL(16)
|
||||
#define KPF_HWPOISON BIT_ULL(19)
|
||||
#define KPF_THP BIT_ULL(22)
|
||||
/*
|
||||
* Ignore the checkpatch warning, we must read from x but don't want to do
|
||||
|
|
@ -154,6 +155,8 @@ long ksm_get_full_scans(void);
|
|||
int ksm_use_zero_pages(void);
|
||||
int ksm_start(void);
|
||||
int ksm_stop(void);
|
||||
int get_hardware_corrupted_size(unsigned long *val);
|
||||
int unpoison_memory(unsigned long pfn);
|
||||
|
||||
/*
|
||||
* On ppc64 this will only work with radix 2M hugepage size
|
||||
|
|
|
|||
|
|
@ -6,10 +6,13 @@ default: vma
|
|||
|
||||
include ../shared/shared.mk
|
||||
|
||||
OFILES = $(SHARED_OFILES) vma.o maple-shim.o
|
||||
OFILES = $(SHARED_OFILES) main.o shared.o maple-shim.o
|
||||
TARGETS = vma
|
||||
|
||||
vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
|
||||
# These can be varied to test different sizes.
|
||||
CFLAGS += -DNUM_VMA_FLAG_BITS=128 -DNUM_MM_FLAG_BITS=128
|
||||
|
||||
main.o: main.c shared.c shared.h vma_internal.h tests/merge.c tests/mmap.c tests/vma.c ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h include/custom.h include/dup.h include/stubs.h
|
||||
|
||||
vma: $(OFILES)
|
||||
$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
|
||||
|
|
|
|||
119
tools/testing/vma/include/custom.h
Normal file
119
tools/testing/vma/include/custom.h
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0+ */
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* Contains declarations that exist in the kernel which have been CUSTOMISED for
|
||||
* testing purposes to faciliate userland VMA testing.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern unsigned long mmap_min_addr;
|
||||
extern unsigned long dac_mmap_min_addr;
|
||||
#else
|
||||
#define mmap_min_addr 0UL
|
||||
#define dac_mmap_min_addr 0UL
|
||||
#endif
|
||||
|
||||
#define VM_WARN_ON(_expr) (WARN_ON(_expr))
|
||||
#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
|
||||
#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
|
||||
#define VM_BUG_ON(_expr) (BUG_ON(_expr))
|
||||
#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
|
||||
|
||||
/* We hardcode this for now. */
|
||||
#define sysctl_max_map_count 0x1000000UL
|
||||
|
||||
#define TASK_SIZE ((1ul << 47)-PAGE_SIZE)
|
||||
|
||||
/*
|
||||
* The shared stubs do not implement this, it amounts to an fprintf(STDERR,...)
|
||||
* either way :)
|
||||
*/
|
||||
#define pr_warn_once pr_err
|
||||
|
||||
#define pgtable_supports_soft_dirty() 1
|
||||
|
||||
struct anon_vma {
|
||||
struct anon_vma *root;
|
||||
struct rb_root_cached rb_root;
|
||||
|
||||
/* Test fields. */
|
||||
bool was_cloned;
|
||||
bool was_unlinked;
|
||||
};
|
||||
|
||||
static inline void unlink_anon_vmas(struct vm_area_struct *vma)
|
||||
{
|
||||
/* For testing purposes, indicate that the anon_vma was unlinked. */
|
||||
vma->anon_vma->was_unlinked = true;
|
||||
}
|
||||
|
||||
static inline void vma_start_write(struct vm_area_struct *vma)
|
||||
{
|
||||
/* Used to indicate to tests that a write operation has begun. */
|
||||
vma->vm_lock_seq++;
|
||||
}
|
||||
|
||||
static inline __must_check
|
||||
int vma_start_write_killable(struct vm_area_struct *vma)
|
||||
{
|
||||
/* Used to indicate to tests that a write operation has begun. */
|
||||
vma->vm_lock_seq++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src,
|
||||
enum vma_operation operation)
|
||||
{
|
||||
/* For testing purposes. We indicate that an anon_vma has been cloned. */
|
||||
if (src->anon_vma != NULL) {
|
||||
dst->anon_vma = src->anon_vma;
|
||||
dst->anon_vma->was_cloned = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int __anon_vma_prepare(struct vm_area_struct *vma)
|
||||
{
|
||||
struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
|
||||
|
||||
if (!anon_vma)
|
||||
return -ENOMEM;
|
||||
|
||||
anon_vma->root = anon_vma;
|
||||
vma->anon_vma = anon_vma;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int anon_vma_prepare(struct vm_area_struct *vma)
|
||||
{
|
||||
if (likely(vma->anon_vma))
|
||||
return 0;
|
||||
|
||||
return __anon_vma_prepare(vma);
|
||||
}
|
||||
|
||||
static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
|
||||
{
|
||||
if (reset_refcnt)
|
||||
refcount_set(&vma->vm_refcnt, 0);
|
||||
}
|
||||
|
||||
static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits)
|
||||
{
|
||||
vma_flags_t flags;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* For testing purposes: allow invalid bit specification so we can
|
||||
* easily test.
|
||||
*/
|
||||
vma_flags_clear_all(&flags);
|
||||
for (i = 0; i < count; i++)
|
||||
if (bits[i] < NUM_VMA_FLAG_BITS)
|
||||
vma_flag_set(&flags, bits[i]);
|
||||
return flags;
|
||||
}
|
||||
1320
tools/testing/vma/include/dup.h
Normal file
1320
tools/testing/vma/include/dup.h
Normal file
File diff suppressed because it is too large
Load diff
428
tools/testing/vma/include/stubs.h
Normal file
428
tools/testing/vma/include/stubs.h
Normal file
|
|
@ -0,0 +1,428 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0+ */
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* Contains declarations that are STUBBED, that is that are rendered no-ops, in
|
||||
* order to faciliate userland VMA testing.
|
||||
*/
|
||||
|
||||
/* Forward declarations. */
|
||||
struct mm_struct;
|
||||
struct vm_area_struct;
|
||||
struct vm_area_desc;
|
||||
struct pagetable_move_control;
|
||||
struct mmap_action;
|
||||
struct file;
|
||||
struct anon_vma;
|
||||
struct anon_vma_chain;
|
||||
struct address_space;
|
||||
struct unmap_desc;
|
||||
|
||||
#define __bitwise
|
||||
#define __randomize_layout
|
||||
|
||||
#define FIRST_USER_ADDRESS 0UL
|
||||
#define USER_PGTABLES_CEILING 0UL
|
||||
|
||||
#define vma_policy(vma) NULL
|
||||
|
||||
#define down_write_nest_lock(sem, nest_lock)
|
||||
|
||||
#define data_race(expr) expr
|
||||
|
||||
#define ASSERT_EXCLUSIVE_WRITER(x)
|
||||
|
||||
struct vm_userfaultfd_ctx {};
|
||||
struct mempolicy {};
|
||||
struct mmu_gather {};
|
||||
struct mutex {};
|
||||
struct vm_fault {};
|
||||
|
||||
static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
|
||||
struct list_head *uf)
|
||||
{
|
||||
}
|
||||
|
||||
static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void free_pgd_range(struct mmu_gather *tlb,
|
||||
unsigned long addr, unsigned long end,
|
||||
unsigned long floor, unsigned long ceiling)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int ksm_execve(struct mm_struct *mm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ksm_exit(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void vma_numab_state_init(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void vma_numab_state_free(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *new_vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void free_anon_vma_name(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mmap_action_prepare(struct mmap_action *action,
|
||||
struct vm_area_desc *desc)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int mmap_action_complete(struct mmap_action *action,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool shmem_file(struct file *file)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
|
||||
const struct file *file, vm_flags_t vm_flags)
|
||||
{
|
||||
return vm_flags;
|
||||
}
|
||||
|
||||
static inline void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
|
||||
unsigned long pfn, unsigned long size, pgprot_t pgprot)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int do_munmap(struct mm_struct *, unsigned long, size_t,
|
||||
struct list_head *uf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Currently stubbed but we may later wish to un-stub. */
|
||||
static inline void vm_acct_memory(long pages);
|
||||
|
||||
static inline void mmap_assert_locked(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void i_mmap_unlock_write(struct address_space *mapping)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
struct list_head *unmaps)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void mmap_write_downgrade(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mmap_read_unlock(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mmap_write_unlock(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int mmap_write_lock_killable(struct mm_struct *mm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool can_modify_mm(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void arch_unmap(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
|
||||
vm_flags_t vm_flags)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool mapping_can_writeback(struct address_space *mapping)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_wp(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void mmap_assert_write_locked(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mutex_lock(struct mutex *lock)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mutex_unlock(struct mutex *lock)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool mutex_is_locked(struct mutex *lock)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool signal_pending(void *p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_file_hugepages(struct file *file)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
|
||||
unsigned long npages)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int shmem_zero_setup(struct vm_area_struct *vma)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static inline void vm_acct_memory(long pages)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void vma_interval_tree_insert(struct vm_area_struct *vma,
|
||||
struct rb_root_cached *rb)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void vma_interval_tree_remove(struct vm_area_struct *vma,
|
||||
struct rb_root_cached *rb)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void flush_dcache_mmap_unlock(struct address_space *mapping)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc,
|
||||
struct rb_root_cached *rb)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc,
|
||||
struct rb_root_cached *rb)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void uprobe_mmap(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void uprobe_munmap(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void i_mmap_lock_write(struct address_space *mapping)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ksm_add_vma(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void perf_event_mmap(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool vma_is_dax(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool arch_validate_flags(vm_flags_t flags)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void vma_close(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int is_hugepage_only_range(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long len)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool capable(int cap)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
||||
struct vm_userfaultfd_ctx vm_ctx)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
|
||||
struct anon_vma_name *anon_name2)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void might_sleep(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void fput(struct file *file)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mpol_put(struct mempolicy *pol)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void lru_add_drain(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void update_hiwater_rss(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void update_hiwater_vm(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *unmap)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mapping_unmap_writable(struct address_space *mapping)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void flush_dcache_mmap_lock(struct address_space *mapping)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void tlb_finish_mmu(struct mmu_gather *tlb)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct file *get_file(struct file *f)
|
||||
{
|
||||
return f;
|
||||
}
|
||||
|
||||
static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
struct vm_area_struct *next)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
|
||||
55
tools/testing/vma/main.c
Normal file
55
tools/testing/vma/main.c
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shared.h"
|
||||
/*
|
||||
* Directly import the VMA implementation here. Our vma_internal.h wrapper
|
||||
* provides userland-equivalent functionality for everything vma.c uses.
|
||||
*/
|
||||
#include "../../../mm/vma_init.c"
|
||||
#include "../../../mm/vma_exec.c"
|
||||
#include "../../../mm/vma.c"
|
||||
|
||||
/* Tests are included directly so they can test static functions in mm/vma.c. */
|
||||
#include "tests/merge.c"
|
||||
#include "tests/mmap.c"
|
||||
#include "tests/vma.c"
|
||||
|
||||
/* Helper functions which utilise static kernel functions. */
|
||||
|
||||
struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
vma = vma_merge_existing_range(vmg);
|
||||
if (vma)
|
||||
vma_assert_attached(vma);
|
||||
return vma;
|
||||
}
|
||||
|
||||
int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
|
||||
{
|
||||
int res;
|
||||
|
||||
res = vma_link(mm, vma);
|
||||
if (!res)
|
||||
vma_assert_attached(vma);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Main test running which invokes tests/ *.c runners. */
|
||||
int main(void)
|
||||
{
|
||||
int num_tests = 0, num_fail = 0;
|
||||
|
||||
maple_tree_init();
|
||||
vma_state_init();
|
||||
|
||||
run_merge_tests(&num_tests, &num_fail);
|
||||
run_mmap_tests(&num_tests, &num_fail);
|
||||
run_vma_tests(&num_tests, &num_fail);
|
||||
|
||||
printf("%d tests run, %d passed, %d failed.\n",
|
||||
num_tests, num_tests - num_fail, num_fail);
|
||||
|
||||
return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
131
tools/testing/vma/shared.c
Normal file
131
tools/testing/vma/shared.c
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shared.h"
|
||||
|
||||
|
||||
bool fail_prealloc;
|
||||
unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
|
||||
unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
|
||||
unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
|
||||
|
||||
const struct vm_operations_struct vma_dummy_vm_ops;
|
||||
struct anon_vma dummy_anon_vma;
|
||||
struct task_struct __current;
|
||||
|
||||
struct vm_area_struct *alloc_vma(struct mm_struct *mm,
|
||||
unsigned long start, unsigned long end,
|
||||
pgoff_t pgoff, vm_flags_t vm_flags)
|
||||
{
|
||||
struct vm_area_struct *vma = vm_area_alloc(mm);
|
||||
|
||||
if (vma == NULL)
|
||||
return NULL;
|
||||
|
||||
vma->vm_start = start;
|
||||
vma->vm_end = end;
|
||||
vma->vm_pgoff = pgoff;
|
||||
vm_flags_reset(vma, vm_flags);
|
||||
vma_assert_detached(vma);
|
||||
|
||||
return vma;
|
||||
}
|
||||
|
||||
void detach_free_vma(struct vm_area_struct *vma)
|
||||
{
|
||||
vma_mark_detached(vma);
|
||||
vm_area_free(vma);
|
||||
}
|
||||
|
||||
struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
|
||||
unsigned long start, unsigned long end,
|
||||
pgoff_t pgoff, vm_flags_t vm_flags)
|
||||
{
|
||||
struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags);
|
||||
|
||||
if (vma == NULL)
|
||||
return NULL;
|
||||
|
||||
if (attach_vma(mm, vma)) {
|
||||
detach_free_vma(vma);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset this counter which we use to track whether writes have
|
||||
* begun. Linking to the tree will have caused this to be incremented,
|
||||
* which means we will get a false positive otherwise.
|
||||
*/
|
||||
vma->vm_lock_seq = UINT_MAX;
|
||||
|
||||
return vma;
|
||||
}
|
||||
|
||||
void reset_dummy_anon_vma(void)
|
||||
{
|
||||
dummy_anon_vma.was_cloned = false;
|
||||
dummy_anon_vma.was_unlinked = false;
|
||||
}
|
||||
|
||||
int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
int count = 0;
|
||||
|
||||
fail_prealloc = false;
|
||||
reset_dummy_anon_vma();
|
||||
|
||||
vma_iter_set(vmi, 0);
|
||||
for_each_vma(*vmi, vma) {
|
||||
detach_free_vma(vma);
|
||||
count++;
|
||||
}
|
||||
|
||||
mtree_destroy(&mm->mm_mt);
|
||||
mm->map_count = 0;
|
||||
return count;
|
||||
}
|
||||
|
||||
bool vma_write_started(struct vm_area_struct *vma)
|
||||
{
|
||||
int seq = vma->vm_lock_seq;
|
||||
|
||||
/* We reset after each check. */
|
||||
vma->vm_lock_seq = UINT_MAX;
|
||||
|
||||
/* The vma_start_write() stub simply increments this value. */
|
||||
return seq > -1;
|
||||
}
|
||||
|
||||
void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
|
||||
struct anon_vma_chain *avc, struct anon_vma *anon_vma)
|
||||
{
|
||||
vma->anon_vma = anon_vma;
|
||||
INIT_LIST_HEAD(&vma->anon_vma_chain);
|
||||
list_add(&avc->same_vma, &vma->anon_vma_chain);
|
||||
avc->anon_vma = vma->anon_vma;
|
||||
}
|
||||
|
||||
void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
|
||||
struct anon_vma_chain *avc)
|
||||
{
|
||||
__vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
|
||||
}
|
||||
|
||||
struct task_struct *get_current(void)
|
||||
{
|
||||
return &__current;
|
||||
}
|
||||
|
||||
unsigned long rlimit(unsigned int limit)
|
||||
{
|
||||
return (unsigned long)-1;
|
||||
}
|
||||
|
||||
void vma_set_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
pgoff_t pgoff)
|
||||
{
|
||||
vma->vm_start = start;
|
||||
vma->vm_end = end;
|
||||
vma->vm_pgoff = pgoff;
|
||||
}
|
||||
114
tools/testing/vma/shared.h
Normal file
114
tools/testing/vma/shared.h
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "generated/bit-length.h"
|
||||
#include "maple-shared.h"
|
||||
#include "vma_internal.h"
|
||||
#include "../../../mm/vma.h"
|
||||
|
||||
/* Simple test runner. Assumes local num_[fail, tests] counters. */
|
||||
#define TEST(name) \
|
||||
do { \
|
||||
(*num_tests)++; \
|
||||
if (!test_##name()) { \
|
||||
(*num_fail)++; \
|
||||
fprintf(stderr, "Test " #name " FAILED\n"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_TRUE(_expr) \
|
||||
do { \
|
||||
if (!(_expr)) { \
|
||||
fprintf(stderr, \
|
||||
"Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \
|
||||
__FILE__, __LINE__, __FUNCTION__, #_expr); \
|
||||
return false; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr))
|
||||
#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2))
|
||||
#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2))
|
||||
|
||||
#define IS_SET(_val, _flags) ((_val & _flags) == _flags)
|
||||
|
||||
extern bool fail_prealloc;
|
||||
|
||||
/* Override vma_iter_prealloc() so we can choose to fail it. */
|
||||
#define vma_iter_prealloc(vmi, vma) \
|
||||
(fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL))
|
||||
|
||||
#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536
|
||||
|
||||
extern unsigned long mmap_min_addr;
|
||||
extern unsigned long dac_mmap_min_addr;
|
||||
extern unsigned long stack_guard_gap;
|
||||
|
||||
extern const struct vm_operations_struct vma_dummy_vm_ops;
|
||||
extern struct anon_vma dummy_anon_vma;
|
||||
extern struct task_struct __current;
|
||||
|
||||
/*
|
||||
* Helper function which provides a wrapper around a merge existing VMA
|
||||
* operation.
|
||||
*
|
||||
* Declared in main.c as uses static VMA function.
|
||||
*/
|
||||
struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg);
|
||||
|
||||
/*
|
||||
* Helper function to allocate a VMA and link it to the tree.
|
||||
*
|
||||
* Declared in main.c as uses static VMA function.
|
||||
*/
|
||||
int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma);
|
||||
|
||||
/* Helper function providing a dummy vm_ops->close() method.*/
|
||||
static inline void dummy_close(struct vm_area_struct *)
|
||||
{
|
||||
}
|
||||
|
||||
/* Helper function to simply allocate a VMA. */
|
||||
struct vm_area_struct *alloc_vma(struct mm_struct *mm,
|
||||
unsigned long start, unsigned long end,
|
||||
pgoff_t pgoff, vm_flags_t vm_flags);
|
||||
|
||||
/* Helper function to detach and free a VMA. */
|
||||
void detach_free_vma(struct vm_area_struct *vma);
|
||||
|
||||
/* Helper function to allocate a VMA and link it to the tree. */
|
||||
struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
|
||||
unsigned long start, unsigned long end,
|
||||
pgoff_t pgoff, vm_flags_t vm_flags);
|
||||
|
||||
/*
|
||||
* Helper function to reset the dummy anon_vma to indicate it has not been
|
||||
* duplicated.
|
||||
*/
|
||||
void reset_dummy_anon_vma(void);
|
||||
|
||||
/*
|
||||
* Helper function to remove all VMAs and destroy the maple tree associated with
|
||||
* a virtual address space. Returns a count of VMAs in the tree.
|
||||
*/
|
||||
int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi);
|
||||
|
||||
/* Helper function to determine if VMA has had vma_start_write() performed. */
|
||||
bool vma_write_started(struct vm_area_struct *vma);
|
||||
|
||||
void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
|
||||
struct anon_vma_chain *avc, struct anon_vma *anon_vma);
|
||||
|
||||
/* Provide a simple dummy VMA/anon_vma dummy setup for testing. */
|
||||
void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
|
||||
struct anon_vma_chain *avc);
|
||||
|
||||
/* Helper function to specify a VMA's range. */
|
||||
void vma_set_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
pgoff_t pgoff);
|
||||
|
|
@ -1,132 +1,5 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "generated/bit-length.h"
|
||||
|
||||
#include "maple-shared.h"
|
||||
#include "vma_internal.h"
|
||||
|
||||
/* Include so header guard set. */
|
||||
#include "../../../mm/vma.h"
|
||||
|
||||
static bool fail_prealloc;
|
||||
|
||||
/* Then override vma_iter_prealloc() so we can choose to fail it. */
|
||||
#define vma_iter_prealloc(vmi, vma) \
|
||||
(fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL))
|
||||
|
||||
#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536
|
||||
|
||||
unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
|
||||
unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
|
||||
unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Directly import the VMA implementation here. Our vma_internal.h wrapper
|
||||
* provides userland-equivalent functionality for everything vma.c uses.
|
||||
*/
|
||||
#include "../../../mm/vma_init.c"
|
||||
#include "../../../mm/vma_exec.c"
|
||||
#include "../../../mm/vma.c"
|
||||
|
||||
const struct vm_operations_struct vma_dummy_vm_ops;
|
||||
static struct anon_vma dummy_anon_vma;
|
||||
|
||||
#define ASSERT_TRUE(_expr) \
|
||||
do { \
|
||||
if (!(_expr)) { \
|
||||
fprintf(stderr, \
|
||||
"Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \
|
||||
__FILE__, __LINE__, __FUNCTION__, #_expr); \
|
||||
return false; \
|
||||
} \
|
||||
} while (0)
|
||||
#define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr))
|
||||
#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2))
|
||||
#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2))
|
||||
|
||||
#define IS_SET(_val, _flags) ((_val & _flags) == _flags)
|
||||
|
||||
static struct task_struct __current;
|
||||
|
||||
struct task_struct *get_current(void)
|
||||
{
|
||||
return &__current;
|
||||
}
|
||||
|
||||
unsigned long rlimit(unsigned int limit)
|
||||
{
|
||||
return (unsigned long)-1;
|
||||
}
|
||||
|
||||
/* Helper function to simply allocate a VMA. */
|
||||
static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
pgoff_t pgoff,
|
||||
vm_flags_t vm_flags)
|
||||
{
|
||||
struct vm_area_struct *vma = vm_area_alloc(mm);
|
||||
|
||||
if (vma == NULL)
|
||||
return NULL;
|
||||
|
||||
vma->vm_start = start;
|
||||
vma->vm_end = end;
|
||||
vma->vm_pgoff = pgoff;
|
||||
vm_flags_reset(vma, vm_flags);
|
||||
vma_assert_detached(vma);
|
||||
|
||||
return vma;
|
||||
}
|
||||
|
||||
/* Helper function to allocate a VMA and link it to the tree. */
|
||||
static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
|
||||
{
|
||||
int res;
|
||||
|
||||
res = vma_link(mm, vma);
|
||||
if (!res)
|
||||
vma_assert_attached(vma);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void detach_free_vma(struct vm_area_struct *vma)
|
||||
{
|
||||
vma_mark_detached(vma);
|
||||
vm_area_free(vma);
|
||||
}
|
||||
|
||||
/* Helper function to allocate a VMA and link it to the tree. */
|
||||
static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
pgoff_t pgoff,
|
||||
vm_flags_t vm_flags)
|
||||
{
|
||||
struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags);
|
||||
|
||||
if (vma == NULL)
|
||||
return NULL;
|
||||
|
||||
if (attach_vma(mm, vma)) {
|
||||
detach_free_vma(vma);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset this counter which we use to track whether writes have
|
||||
* begun. Linking to the tree will have caused this to be incremented,
|
||||
* which means we will get a false positive otherwise.
|
||||
*/
|
||||
vma->vm_lock_seq = UINT_MAX;
|
||||
|
||||
return vma;
|
||||
}
|
||||
|
||||
/* Helper function which provides a wrapper around a merge new VMA operation. */
|
||||
static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
|
||||
{
|
||||
|
|
@ -146,20 +19,6 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
|
|||
return vma;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function which provides a wrapper around a merge existing VMA
|
||||
* operation.
|
||||
*/
|
||||
static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
vma = vma_merge_existing_range(vmg);
|
||||
if (vma)
|
||||
vma_assert_attached(vma);
|
||||
return vma;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function which provides a wrapper around the expansion of an existing
|
||||
* VMA.
|
||||
|
|
@ -173,7 +32,7 @@ static int expand_existing(struct vma_merge_struct *vmg)
|
|||
* Helper function to reset merge state the associated VMA iterator to a
|
||||
* specified new range.
|
||||
*/
|
||||
static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
|
||||
void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
|
||||
unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags)
|
||||
{
|
||||
vma_iter_set(vmg->vmi, start);
|
||||
|
|
@ -211,9 +70,8 @@ static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long s
|
|||
* VMA, link it to the maple tree and return it.
|
||||
*/
|
||||
static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
|
||||
struct vma_merge_struct *vmg,
|
||||
unsigned long start, unsigned long end,
|
||||
pgoff_t pgoff, vm_flags_t vm_flags,
|
||||
struct vma_merge_struct *vmg, unsigned long start,
|
||||
unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags,
|
||||
bool *was_merged)
|
||||
{
|
||||
struct vm_area_struct *merged;
|
||||
|
|
@ -234,72 +92,6 @@ static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
|
|||
return alloc_and_link_vma(mm, start, end, pgoff, vm_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function to reset the dummy anon_vma to indicate it has not been
|
||||
* duplicated.
|
||||
*/
|
||||
static void reset_dummy_anon_vma(void)
|
||||
{
|
||||
dummy_anon_vma.was_cloned = false;
|
||||
dummy_anon_vma.was_unlinked = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function to remove all VMAs and destroy the maple tree associated with
|
||||
* a virtual address space. Returns a count of VMAs in the tree.
|
||||
*/
|
||||
static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
int count = 0;
|
||||
|
||||
fail_prealloc = false;
|
||||
reset_dummy_anon_vma();
|
||||
|
||||
vma_iter_set(vmi, 0);
|
||||
for_each_vma(*vmi, vma) {
|
||||
detach_free_vma(vma);
|
||||
count++;
|
||||
}
|
||||
|
||||
mtree_destroy(&mm->mm_mt);
|
||||
mm->map_count = 0;
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Helper function to determine if VMA has had vma_start_write() performed. */
|
||||
static bool vma_write_started(struct vm_area_struct *vma)
|
||||
{
|
||||
int seq = vma->vm_lock_seq;
|
||||
|
||||
/* We reset after each check. */
|
||||
vma->vm_lock_seq = UINT_MAX;
|
||||
|
||||
/* The vma_start_write() stub simply increments this value. */
|
||||
return seq > -1;
|
||||
}
|
||||
|
||||
/* Helper function providing a dummy vm_ops->close() method.*/
|
||||
static void dummy_close(struct vm_area_struct *)
|
||||
{
|
||||
}
|
||||
|
||||
static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
|
||||
struct anon_vma_chain *avc,
|
||||
struct anon_vma *anon_vma)
|
||||
{
|
||||
vma->anon_vma = anon_vma;
|
||||
INIT_LIST_HEAD(&vma->anon_vma_chain);
|
||||
list_add(&avc->same_vma, &vma->anon_vma_chain);
|
||||
avc->anon_vma = vma->anon_vma;
|
||||
}
|
||||
|
||||
static void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
|
||||
struct anon_vma_chain *avc)
|
||||
{
|
||||
__vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
|
||||
}
|
||||
|
||||
static bool test_simple_merge(void)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
|
@ -1616,39 +1408,6 @@ static bool test_merge_extend(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool test_copy_vma(void)
|
||||
{
|
||||
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
|
||||
struct mm_struct mm = {};
|
||||
bool need_locks = false;
|
||||
VMA_ITERATOR(vmi, &mm, 0);
|
||||
struct vm_area_struct *vma, *vma_new, *vma_next;
|
||||
|
||||
/* Move backwards and do not merge. */
|
||||
|
||||
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
|
||||
vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
|
||||
ASSERT_NE(vma_new, vma);
|
||||
ASSERT_EQ(vma_new->vm_start, 0);
|
||||
ASSERT_EQ(vma_new->vm_end, 0x2000);
|
||||
ASSERT_EQ(vma_new->vm_pgoff, 0);
|
||||
vma_assert_attached(vma_new);
|
||||
|
||||
cleanup_mm(&mm, &vmi);
|
||||
|
||||
/* Move a VMA into position next to another and merge the two. */
|
||||
|
||||
vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
|
||||
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags);
|
||||
vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
|
||||
vma_assert_attached(vma_new);
|
||||
|
||||
ASSERT_EQ(vma_new, vma_next);
|
||||
|
||||
cleanup_mm(&mm, &vmi);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool test_expand_only_mode(void)
|
||||
{
|
||||
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
|
||||
|
|
@ -1689,73 +1448,8 @@ static bool test_expand_only_mode(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool test_mmap_region_basic(void)
|
||||
static void run_merge_tests(int *num_tests, int *num_fail)
|
||||
{
|
||||
struct mm_struct mm = {};
|
||||
unsigned long addr;
|
||||
struct vm_area_struct *vma;
|
||||
VMA_ITERATOR(vmi, &mm, 0);
|
||||
|
||||
current->mm = &mm;
|
||||
|
||||
/* Map at 0x300000, length 0x3000. */
|
||||
addr = __mmap_region(NULL, 0x300000, 0x3000,
|
||||
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
|
||||
0x300, NULL);
|
||||
ASSERT_EQ(addr, 0x300000);
|
||||
|
||||
/* Map at 0x250000, length 0x3000. */
|
||||
addr = __mmap_region(NULL, 0x250000, 0x3000,
|
||||
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
|
||||
0x250, NULL);
|
||||
ASSERT_EQ(addr, 0x250000);
|
||||
|
||||
/* Map at 0x303000, merging to 0x300000 of length 0x6000. */
|
||||
addr = __mmap_region(NULL, 0x303000, 0x3000,
|
||||
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
|
||||
0x303, NULL);
|
||||
ASSERT_EQ(addr, 0x303000);
|
||||
|
||||
/* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
|
||||
addr = __mmap_region(NULL, 0x24d000, 0x3000,
|
||||
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
|
||||
0x24d, NULL);
|
||||
ASSERT_EQ(addr, 0x24d000);
|
||||
|
||||
ASSERT_EQ(mm.map_count, 2);
|
||||
|
||||
for_each_vma(vmi, vma) {
|
||||
if (vma->vm_start == 0x300000) {
|
||||
ASSERT_EQ(vma->vm_end, 0x306000);
|
||||
ASSERT_EQ(vma->vm_pgoff, 0x300);
|
||||
} else if (vma->vm_start == 0x24d000) {
|
||||
ASSERT_EQ(vma->vm_end, 0x253000);
|
||||
ASSERT_EQ(vma->vm_pgoff, 0x24d);
|
||||
} else {
|
||||
ASSERT_FALSE(true);
|
||||
}
|
||||
}
|
||||
|
||||
cleanup_mm(&mm, &vmi);
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int num_tests = 0, num_fail = 0;
|
||||
|
||||
maple_tree_init();
|
||||
vma_state_init();
|
||||
|
||||
#define TEST(name) \
|
||||
do { \
|
||||
num_tests++; \
|
||||
if (!test_##name()) { \
|
||||
num_fail++; \
|
||||
fprintf(stderr, "Test " #name " FAILED\n"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Very simple tests to kick the tyres. */
|
||||
TEST(simple_merge);
|
||||
TEST(simple_modify);
|
||||
|
|
@ -1771,15 +1465,5 @@ int main(void)
|
|||
TEST(dup_anon_vma);
|
||||
TEST(vmi_prealloc_fail);
|
||||
TEST(merge_extend);
|
||||
TEST(copy_vma);
|
||||
TEST(expand_only_mode);
|
||||
|
||||
TEST(mmap_region_basic);
|
||||
|
||||
#undef TEST
|
||||
|
||||
printf("%d tests run, %d passed, %d failed.\n",
|
||||
num_tests, num_tests - num_fail, num_fail);
|
||||
|
||||
return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
57
tools/testing/vma/tests/mmap.c
Normal file
57
tools/testing/vma/tests/mmap.c
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
static bool test_mmap_region_basic(void)
|
||||
{
|
||||
struct mm_struct mm = {};
|
||||
unsigned long addr;
|
||||
struct vm_area_struct *vma;
|
||||
VMA_ITERATOR(vmi, &mm, 0);
|
||||
|
||||
current->mm = &mm;
|
||||
|
||||
/* Map at 0x300000, length 0x3000. */
|
||||
addr = __mmap_region(NULL, 0x300000, 0x3000,
|
||||
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
|
||||
0x300, NULL);
|
||||
ASSERT_EQ(addr, 0x300000);
|
||||
|
||||
/* Map at 0x250000, length 0x3000. */
|
||||
addr = __mmap_region(NULL, 0x250000, 0x3000,
|
||||
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
|
||||
0x250, NULL);
|
||||
ASSERT_EQ(addr, 0x250000);
|
||||
|
||||
/* Map at 0x303000, merging to 0x300000 of length 0x6000. */
|
||||
addr = __mmap_region(NULL, 0x303000, 0x3000,
|
||||
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
|
||||
0x303, NULL);
|
||||
ASSERT_EQ(addr, 0x303000);
|
||||
|
||||
/* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
|
||||
addr = __mmap_region(NULL, 0x24d000, 0x3000,
|
||||
VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
|
||||
0x24d, NULL);
|
||||
ASSERT_EQ(addr, 0x24d000);
|
||||
|
||||
ASSERT_EQ(mm.map_count, 2);
|
||||
|
||||
for_each_vma(vmi, vma) {
|
||||
if (vma->vm_start == 0x300000) {
|
||||
ASSERT_EQ(vma->vm_end, 0x306000);
|
||||
ASSERT_EQ(vma->vm_pgoff, 0x300);
|
||||
} else if (vma->vm_start == 0x24d000) {
|
||||
ASSERT_EQ(vma->vm_end, 0x253000);
|
||||
ASSERT_EQ(vma->vm_pgoff, 0x24d);
|
||||
} else {
|
||||
ASSERT_FALSE(true);
|
||||
}
|
||||
}
|
||||
|
||||
cleanup_mm(&mm, &vmi);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void run_mmap_tests(int *num_tests, int *num_fail)
|
||||
{
|
||||
TEST(mmap_region_basic);
|
||||
}
|
||||
339
tools/testing/vma/tests/vma.c
Normal file
339
tools/testing/vma/tests/vma.c
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
static bool compare_legacy_flags(vm_flags_t legacy_flags, vma_flags_t flags)
|
||||
{
|
||||
const unsigned long legacy_val = legacy_flags;
|
||||
/* The lower word should contain the precise same value. */
|
||||
const unsigned long flags_lower = flags.__vma_flags[0];
|
||||
#if NUM_VMA_FLAGS > BITS_PER_LONG
|
||||
int i;
|
||||
|
||||
/* All bits in higher flag values should be zero. */
|
||||
for (i = 1; i < NUM_VMA_FLAGS / BITS_PER_LONG; i++) {
|
||||
if (flags.__vma_flags[i] != 0)
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static_assert(sizeof(legacy_flags) == sizeof(unsigned long));
|
||||
|
||||
return legacy_val == flags_lower;
|
||||
}
|
||||
|
||||
static bool test_copy_vma(void)
|
||||
{
|
||||
vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
|
||||
struct mm_struct mm = {};
|
||||
bool need_locks = false;
|
||||
VMA_ITERATOR(vmi, &mm, 0);
|
||||
struct vm_area_struct *vma, *vma_new, *vma_next;
|
||||
|
||||
/* Move backwards and do not merge. */
|
||||
|
||||
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
|
||||
vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
|
||||
ASSERT_NE(vma_new, vma);
|
||||
ASSERT_EQ(vma_new->vm_start, 0);
|
||||
ASSERT_EQ(vma_new->vm_end, 0x2000);
|
||||
ASSERT_EQ(vma_new->vm_pgoff, 0);
|
||||
vma_assert_attached(vma_new);
|
||||
|
||||
cleanup_mm(&mm, &vmi);
|
||||
|
||||
/* Move a VMA into position next to another and merge the two. */
|
||||
|
||||
vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
|
||||
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags);
|
||||
vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
|
||||
vma_assert_attached(vma_new);
|
||||
|
||||
ASSERT_EQ(vma_new, vma_next);
|
||||
|
||||
cleanup_mm(&mm, &vmi);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool test_vma_flags_unchanged(void)
|
||||
{
|
||||
vma_flags_t flags = EMPTY_VMA_FLAGS;
|
||||
vm_flags_t legacy_flags = 0;
|
||||
int bit;
|
||||
struct vm_area_struct vma;
|
||||
struct vm_area_desc desc;
|
||||
|
||||
|
||||
vma.flags = EMPTY_VMA_FLAGS;
|
||||
desc.vma_flags = EMPTY_VMA_FLAGS;
|
||||
|
||||
for (bit = 0; bit < BITS_PER_LONG; bit++) {
|
||||
vma_flags_t mask = mk_vma_flags(bit);
|
||||
|
||||
legacy_flags |= (1UL << bit);
|
||||
|
||||
/* Individual flags. */
|
||||
vma_flags_set(&flags, bit);
|
||||
ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags));
|
||||
|
||||
/* Via mask. */
|
||||
vma_flags_set_mask(&flags, mask);
|
||||
ASSERT_TRUE(compare_legacy_flags(legacy_flags, flags));
|
||||
|
||||
/* Same for VMA. */
|
||||
vma_set_flags(&vma, bit);
|
||||
ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags));
|
||||
vma_set_flags_mask(&vma, mask);
|
||||
ASSERT_TRUE(compare_legacy_flags(legacy_flags, vma.flags));
|
||||
|
||||
/* Same for VMA descriptor. */
|
||||
vma_desc_set_flags(&desc, bit);
|
||||
ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags));
|
||||
vma_desc_set_flags_mask(&desc, mask);
|
||||
ASSERT_TRUE(compare_legacy_flags(legacy_flags, desc.vma_flags));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool test_vma_flags_cleared(void)
|
||||
{
|
||||
const vma_flags_t empty = EMPTY_VMA_FLAGS;
|
||||
vma_flags_t flags;
|
||||
int i;
|
||||
|
||||
/* Set all bits high. */
|
||||
memset(&flags, 1, sizeof(flags));
|
||||
/* Try to clear. */
|
||||
vma_flags_clear_all(&flags);
|
||||
/* Equal to EMPTY_VMA_FLAGS? */
|
||||
ASSERT_EQ(memcmp(&empty, &flags, sizeof(flags)), 0);
|
||||
/* Make sure every unsigned long entry in bitmap array zero. */
|
||||
for (i = 0; i < sizeof(flags) / BITS_PER_LONG; i++) {
|
||||
const unsigned long val = flags.__vma_flags[i];
|
||||
|
||||
ASSERT_EQ(val, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Assert that VMA flag functions that operate at the system word level function
|
||||
* correctly.
|
||||
*/
|
||||
static bool test_vma_flags_word(void)
|
||||
{
|
||||
vma_flags_t flags = EMPTY_VMA_FLAGS;
|
||||
const vma_flags_t comparison =
|
||||
mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT, 64, 65);
|
||||
|
||||
/* Set some custom high flags. */
|
||||
vma_flags_set(&flags, 64, 65);
|
||||
/* Now overwrite the first word. */
|
||||
vma_flags_overwrite_word(&flags, VM_READ | VM_WRITE);
|
||||
/* Ensure they are equal. */
|
||||
ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
|
||||
|
||||
flags = EMPTY_VMA_FLAGS;
|
||||
vma_flags_set(&flags, 64, 65);
|
||||
|
||||
/* Do the same with the _once() equivalent. */
|
||||
vma_flags_overwrite_word_once(&flags, VM_READ | VM_WRITE);
|
||||
ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
|
||||
|
||||
flags = EMPTY_VMA_FLAGS;
|
||||
vma_flags_set(&flags, 64, 65);
|
||||
|
||||
/* Make sure we can set a word without disturbing other bits. */
|
||||
vma_flags_set(&flags, VMA_WRITE_BIT);
|
||||
vma_flags_set_word(&flags, VM_READ);
|
||||
ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
|
||||
|
||||
flags = EMPTY_VMA_FLAGS;
|
||||
vma_flags_set(&flags, 64, 65);
|
||||
|
||||
/* Make sure we can clear a word without disturbing other bits. */
|
||||
vma_flags_set(&flags, VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
|
||||
vma_flags_clear_word(&flags, VM_EXEC);
|
||||
ASSERT_EQ(memcmp(&flags, &comparison, sizeof(flags)), 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Ensure that vma_flags_test() and friends works correctly. */
|
||||
static bool test_vma_flags_test(void)
|
||||
{
|
||||
const vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
|
||||
VMA_EXEC_BIT, 64, 65);
|
||||
struct vm_area_struct vma;
|
||||
struct vm_area_desc desc;
|
||||
|
||||
vma.flags = flags;
|
||||
desc.vma_flags = flags;
|
||||
|
||||
#define do_test(...) \
|
||||
ASSERT_TRUE(vma_flags_test(&flags, __VA_ARGS__)); \
|
||||
ASSERT_TRUE(vma_desc_test_flags(&desc, __VA_ARGS__))
|
||||
|
||||
#define do_test_all_true(...) \
|
||||
ASSERT_TRUE(vma_flags_test_all(&flags, __VA_ARGS__)); \
|
||||
ASSERT_TRUE(vma_test_all_flags(&vma, __VA_ARGS__))
|
||||
|
||||
#define do_test_all_false(...) \
|
||||
ASSERT_FALSE(vma_flags_test_all(&flags, __VA_ARGS__)); \
|
||||
ASSERT_FALSE(vma_test_all_flags(&vma, __VA_ARGS__))
|
||||
|
||||
/*
|
||||
* Testing for some flags that are present, some that are not - should
|
||||
* pass. ANY flags matching should work.
|
||||
*/
|
||||
do_test(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT);
|
||||
/* However, the ...test_all() variant should NOT pass. */
|
||||
do_test_all_false(VMA_READ_BIT, VMA_MAYREAD_BIT, VMA_SEQ_READ_BIT);
|
||||
/* But should pass for flags present. */
|
||||
do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65);
|
||||
/* Also subsets... */
|
||||
do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64);
|
||||
do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
|
||||
do_test_all_true(VMA_READ_BIT, VMA_WRITE_BIT);
|
||||
do_test_all_true(VMA_READ_BIT);
|
||||
/*
|
||||
* Check _mask variant. We don't need to test extensively as macro
|
||||
* helper is the equivalent.
|
||||
*/
|
||||
ASSERT_TRUE(vma_flags_test_mask(&flags, flags));
|
||||
ASSERT_TRUE(vma_flags_test_all_mask(&flags, flags));
|
||||
|
||||
/* Single bits. */
|
||||
do_test(VMA_READ_BIT);
|
||||
do_test(VMA_WRITE_BIT);
|
||||
do_test(VMA_EXEC_BIT);
|
||||
#if NUM_VMA_FLAG_BITS > 64
|
||||
do_test(64);
|
||||
do_test(65);
|
||||
#endif
|
||||
|
||||
/* Two bits. */
|
||||
do_test(VMA_READ_BIT, VMA_WRITE_BIT);
|
||||
do_test(VMA_READ_BIT, VMA_EXEC_BIT);
|
||||
do_test(VMA_WRITE_BIT, VMA_EXEC_BIT);
|
||||
/* Ordering shouldn't matter. */
|
||||
do_test(VMA_WRITE_BIT, VMA_READ_BIT);
|
||||
do_test(VMA_EXEC_BIT, VMA_READ_BIT);
|
||||
do_test(VMA_EXEC_BIT, VMA_WRITE_BIT);
|
||||
#if NUM_VMA_FLAG_BITS > 64
|
||||
do_test(VMA_READ_BIT, 64);
|
||||
do_test(VMA_WRITE_BIT, 64);
|
||||
do_test(64, VMA_READ_BIT);
|
||||
do_test(64, VMA_WRITE_BIT);
|
||||
do_test(VMA_READ_BIT, 65);
|
||||
do_test(VMA_WRITE_BIT, 65);
|
||||
do_test(65, VMA_READ_BIT);
|
||||
do_test(65, VMA_WRITE_BIT);
|
||||
#endif
|
||||
/* Three bits. */
|
||||
do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT);
|
||||
#if NUM_VMA_FLAG_BITS > 64
|
||||
/* No need to consider every single permutation. */
|
||||
do_test(VMA_READ_BIT, VMA_WRITE_BIT, 64);
|
||||
do_test(VMA_READ_BIT, VMA_WRITE_BIT, 65);
|
||||
|
||||
/* Four bits. */
|
||||
do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64);
|
||||
do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 65);
|
||||
|
||||
/* Five bits. */
|
||||
do_test(VMA_READ_BIT, VMA_WRITE_BIT, VMA_EXEC_BIT, 64, 65);
|
||||
#endif
|
||||
|
||||
#undef do_test
|
||||
#undef do_test_all_true
|
||||
#undef do_test_all_false
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Ensure that vma_flags_clear() and friends works correctly. */
|
||||
static bool test_vma_flags_clear(void)
|
||||
{
|
||||
vma_flags_t flags = mk_vma_flags(VMA_READ_BIT, VMA_WRITE_BIT,
|
||||
VMA_EXEC_BIT, 64, 65);
|
||||
vma_flags_t mask = mk_vma_flags(VMA_EXEC_BIT, 64);
|
||||
struct vm_area_struct vma;
|
||||
struct vm_area_desc desc;
|
||||
|
||||
vma.flags = flags;
|
||||
desc.vma_flags = flags;
|
||||
|
||||
/* Cursory check of _mask() variant, as the helper macros imply. */
|
||||
vma_flags_clear_mask(&flags, mask);
|
||||
vma_flags_clear_mask(&vma.flags, mask);
|
||||
vma_desc_clear_flags_mask(&desc, mask);
|
||||
ASSERT_FALSE(vma_flags_test(&flags, VMA_EXEC_BIT, 64));
|
||||
ASSERT_FALSE(vma_flags_test(&vma.flags, VMA_EXEC_BIT, 64));
|
||||
ASSERT_FALSE(vma_desc_test_flags(&desc, VMA_EXEC_BIT, 64));
|
||||
/* Reset. */
|
||||
vma_flags_set(&flags, VMA_EXEC_BIT, 64);
|
||||
vma_set_flags(&vma, VMA_EXEC_BIT, 64);
|
||||
vma_desc_set_flags(&desc, VMA_EXEC_BIT, 64);
|
||||
|
||||
/*
|
||||
* Clear the flags and assert clear worked, then reset flags back to
|
||||
* include specified flags.
|
||||
*/
|
||||
#define do_test_and_reset(...) \
|
||||
vma_flags_clear(&flags, __VA_ARGS__); \
|
||||
vma_flags_clear(&vma.flags, __VA_ARGS__); \
|
||||
vma_desc_clear_flags(&desc, __VA_ARGS__); \
|
||||
ASSERT_FALSE(vma_flags_test(&flags, __VA_ARGS__)); \
|
||||
ASSERT_FALSE(vma_flags_test(&vma.flags, __VA_ARGS__)); \
|
||||
ASSERT_FALSE(vma_desc_test_flags(&desc, __VA_ARGS__)); \
|
||||
vma_flags_set(&flags, __VA_ARGS__); \
|
||||
vma_set_flags(&vma, __VA_ARGS__); \
|
||||
vma_desc_set_flags(&desc, __VA_ARGS__)
|
||||
|
||||
/* Single flags. */
|
||||
do_test_and_reset(VMA_READ_BIT);
|
||||
do_test_and_reset(VMA_WRITE_BIT);
|
||||
do_test_and_reset(VMA_EXEC_BIT);
|
||||
do_test_and_reset(64);
|
||||
do_test_and_reset(65);
|
||||
|
||||
/* Two flags, in different orders. */
|
||||
do_test_and_reset(VMA_READ_BIT, VMA_WRITE_BIT);
|
||||
do_test_and_reset(VMA_READ_BIT, VMA_EXEC_BIT);
|
||||
do_test_and_reset(VMA_READ_BIT, 64);
|
||||
do_test_and_reset(VMA_READ_BIT, 65);
|
||||
do_test_and_reset(VMA_WRITE_BIT, VMA_READ_BIT);
|
||||
do_test_and_reset(VMA_WRITE_BIT, VMA_EXEC_BIT);
|
||||
do_test_and_reset(VMA_WRITE_BIT, 64);
|
||||
do_test_and_reset(VMA_WRITE_BIT, 65);
|
||||
do_test_and_reset(VMA_EXEC_BIT, VMA_READ_BIT);
|
||||
do_test_and_reset(VMA_EXEC_BIT, VMA_WRITE_BIT);
|
||||
do_test_and_reset(VMA_EXEC_BIT, 64);
|
||||
do_test_and_reset(VMA_EXEC_BIT, 65);
|
||||
do_test_and_reset(64, VMA_READ_BIT);
|
||||
do_test_and_reset(64, VMA_WRITE_BIT);
|
||||
do_test_and_reset(64, VMA_EXEC_BIT);
|
||||
do_test_and_reset(64, 65);
|
||||
do_test_and_reset(65, VMA_READ_BIT);
|
||||
do_test_and_reset(65, VMA_WRITE_BIT);
|
||||
do_test_and_reset(65, VMA_EXEC_BIT);
|
||||
do_test_and_reset(65, 64);
|
||||
|
||||
/* Three flags. */
|
||||
|
||||
#undef do_test_some_missing
|
||||
#undef do_test_and_reset
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void run_vma_tests(int *num_tests, int *num_fail)
|
||||
{
|
||||
TEST(copy_vma);
|
||||
TEST(vma_flags_unchanged);
|
||||
TEST(vma_flags_cleared);
|
||||
TEST(vma_flags_word);
|
||||
TEST(vma_flags_test);
|
||||
TEST(vma_flags_clear);
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue