mm: update hugetlbfs to use VMA flags on mmap_prepare

In order to update all mmap_prepare users to utilising the new VMA flags
type vma_flags_t and associated helper functions, we start by updating
hugetlbfs which has a lot of additional logic that requires updating to
make this change.

This is laying the groundwork for eliminating the vm_flags_t from struct
vm_area_desc and using vma_flags_t only, which further lays the ground for
removing the deprecated vm_flags_t type altogether.

No functional changes intended.

Link: https://lkml.kernel.org/r/9226bec80c9aa3447cc2b83354f733841dba8a50.1769097829.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Yury Norov <ynorov@nvidia.com>
Cc: Chris Mason <clm@fb.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Lorenzo Stoakes 2026-01-22 16:06:15 +00:00 committed by Andrew Morton
parent bae0ba7c7c
commit 097e8db5e2
7 changed files with 41 additions and 29 deletions

View file

@ -109,7 +109,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
vm_flags_t vm_flags;
vma_flags_t vma_flags;
/*
* vma address alignment (but not the pgoff alignment) has
@ -119,7 +119,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
* way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma_desc_set_flags(desc, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT);
desc->vm_ops = &hugetlb_vm_ops;
/*
@ -148,23 +148,23 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
ret = -ENOMEM;
vm_flags = desc->vm_flags;
vma_flags = desc->vma_flags;
/*
* for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
* reserving here. Note: only for SHM hugetlbfs file, the inode
* flag S_PRIVATE is set.
*/
if (inode->i_flags & S_PRIVATE)
vm_flags |= VM_NORESERVE;
vma_flags_set(&vma_flags, VMA_NORESERVE_BIT);
if (hugetlb_reserve_pages(inode,
desc->pgoff >> huge_page_order(h),
len >> huge_page_shift(h), desc,
vm_flags) < 0)
vma_flags) < 0)
goto out;
ret = 0;
if ((desc->vm_flags & VM_WRITE) && inode->i_size < len)
if (vma_desc_test_flags(desc, VMA_WRITE_BIT) && inode->i_size < len)
i_size_write(inode, len);
out:
inode_unlock(inode);
@ -1527,7 +1527,7 @@ static int get_hstate_idx(int page_size_log)
* otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
*/
struct file *hugetlb_file_setup(const char *name, size_t size,
vm_flags_t acctflag, int creat_flags,
vma_flags_t acctflag, int creat_flags,
int page_size_log)
{
struct inode *inode;

View file

@ -150,7 +150,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
struct folio **foliop);
#endif /* CONFIG_USERFAULTFD */
long hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_desc *desc, vm_flags_t vm_flags);
struct vm_area_desc *desc, vma_flags_t vma_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
@ -529,7 +529,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
}
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
struct file *hugetlb_file_setup(const char *name, size_t size, vma_flags_t acct,
int creat_flags, int page_size_log);
static inline bool is_file_hugepages(const struct file *file)
@ -545,7 +545,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
#define is_file_hugepages(file) false
static inline struct file *
hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
hugetlb_file_setup(const char *name, size_t size, vma_flags_t acctflag,
int creat_flags, int page_size_log)
{
return ERR_PTR(-ENOSYS);

View file

@ -11,6 +11,11 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
return !!(vm_flags & VM_HUGETLB);
}
static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
{
return vma_flags_test(flags, VMA_HUGETLB_BIT);
}
#else
static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
@ -18,6 +23,11 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
return false;
}
static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
{
return false;
}
#endif
static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)

View file

@ -707,9 +707,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
int error;
struct shmid_kernel *shp;
size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
const bool has_no_reserve = shmflg & SHM_NORESERVE;
struct file *file;
char name[13];
vm_flags_t acctflag = 0;
if (size < SHMMIN || size > ns->shm_ctlmax)
return -EINVAL;
@ -738,6 +738,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
sprintf(name, "SYSV%08x", key);
if (shmflg & SHM_HUGETLB) {
vma_flags_t acctflag = EMPTY_VMA_FLAGS;
struct hstate *hs;
size_t hugesize;
@ -749,17 +750,18 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
hugesize = ALIGN(size, huge_page_size(hs));
/* hugetlb_file_setup applies strict accounting */
if (shmflg & SHM_NORESERVE)
acctflag = VM_NORESERVE;
if (has_no_reserve)
vma_flags_set(&acctflag, VMA_NORESERVE_BIT);
file = hugetlb_file_setup(name, hugesize, acctflag,
HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
} else {
vm_flags_t acctflag = 0;
/*
* Do not allow no accounting for OVERCOMMIT_NEVER, even
* if it's asked for.
*/
if ((shmflg & SHM_NORESERVE) &&
sysctl_overcommit_memory != OVERCOMMIT_NEVER)
if (has_no_reserve && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
acctflag = VM_NORESERVE;
file = shmem_kernel_file_setup(name, size, acctflag);
}

View file

@ -1193,16 +1193,16 @@ static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map)
{
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE);
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));
desc->private_data = map;
}
static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags)
{
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE);
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));
desc->private_data = (void *)((unsigned long)desc->private_data | flags);
}
@ -1216,7 +1216,7 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
static bool is_vma_desc_resv_set(struct vm_area_desc *desc, unsigned long flag)
{
VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
return ((unsigned long)desc->private_data) & flag;
}
@ -6571,7 +6571,7 @@ next:
long hugetlb_reserve_pages(struct inode *inode,
long from, long to,
struct vm_area_desc *desc,
vm_flags_t vm_flags)
vma_flags_t vma_flags)
{
long chg = -1, add = -1, spool_resv, gbl_resv;
struct hstate *h = hstate_inode(inode);
@ -6592,7 +6592,7 @@ long hugetlb_reserve_pages(struct inode *inode,
* attempt will be made for VM_NORESERVE to allocate a page
* without using reserves
*/
if (vm_flags & VM_NORESERVE)
if (vma_flags_test(&vma_flags, VMA_NORESERVE_BIT))
return 0;
/*
@ -6601,7 +6601,7 @@ long hugetlb_reserve_pages(struct inode *inode,
* to reserve the full area even if read-only as mprotect() may be
* called to make the mapping read-write. Assume !desc is a shm mapping
*/
if (!desc || desc->vm_flags & VM_MAYSHARE) {
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
/*
* resv_map can not be NULL as hugetlb_reserve_pages is only
* called for inodes for which resv_maps were created (see
@ -6635,7 +6635,7 @@ long hugetlb_reserve_pages(struct inode *inode,
if (err < 0)
goto out_err;
if (desc && !(desc->vm_flags & VM_MAYSHARE) && h_cg) {
if (desc && !vma_desc_test_flags(desc, VMA_MAYSHARE_BIT) && h_cg) {
/* For private mappings, the hugetlb_cgroup uncharge info hangs
* of the resv_map.
*/
@ -6672,7 +6672,7 @@ long hugetlb_reserve_pages(struct inode *inode,
* consumed reservations are stored in the map. Hence, nothing
* else has to be done for private mappings here
*/
if (!desc || desc->vm_flags & VM_MAYSHARE) {
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
add = region_add(resv_map, from, to, regions_needed, h, h_cg);
if (unlikely(add < 0)) {
@ -6727,7 +6727,7 @@ out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
out_err:
if (!desc || desc->vm_flags & VM_MAYSHARE)
if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT))
/* Only call region_abort if the region_chg succeeded but the
* region_add failed or didn't run.
*/

View file

@ -86,7 +86,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
idx >>= huge_page_order(h);
nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, 0);
nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, EMPTY_VMA_FLAGS);
if (nr_resv < 0)
return ERR_PTR(nr_resv);
@ -463,7 +463,7 @@ struct file *memfd_alloc_file(const char *name, unsigned int flags)
int err = 0;
if (flags & MFD_HUGETLB) {
file = hugetlb_file_setup(name, 0, VM_NORESERVE,
file = hugetlb_file_setup(name, 0, mk_vma_flags(VMA_NORESERVE_BIT),
HUGETLB_ANONHUGE_INODE,
(flags >> MFD_HUGE_SHIFT) &
MFD_HUGE_MASK);

View file

@ -594,7 +594,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
* taken when vm_ops->mmap() is called
*/
file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
VM_NORESERVE,
mk_vma_flags(VMA_NORESERVE_BIT),
HUGETLB_ANONHUGE_INODE,
(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
if (IS_ERR(file))