diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ec89e0607424..58bac83e7fa3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1309,6 +1309,7 @@ static void set_huge_zero_folio(pgtable_t pgtable, struct mm_struct *mm, { pmd_t entry; entry = folio_mk_pmd(zero_folio, vma->vm_page_prot); + entry = pmd_mkspecial(entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); mm_inc_nr_ptes(mm); @@ -1418,7 +1419,9 @@ static vm_fault_t insert_pmd(struct vm_area_struct *vma, unsigned long addr, if (fop.is_folio) { entry = folio_mk_pmd(fop.folio, vma->vm_page_prot); - if (!is_huge_zero_folio(fop.folio)) { + if (is_huge_zero_folio(fop.folio)) { + entry = pmd_mkspecial(entry); + } else { folio_get(fop.folio); folio_add_file_rmap_pmd(fop.folio, &fop.folio->page, vma); add_mm_counter(mm, mm_counter_file(fop.folio), HPAGE_PMD_NR); @@ -1643,7 +1646,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, int ret = -ENOMEM; pmd = pmdp_get_lockless(src_pmd); - if (unlikely(pmd_present(pmd) && pmd_special(pmd))) { + if (unlikely(pmd_present(pmd) && pmd_special(pmd) && + !is_huge_zero_pmd(pmd))) { dst_ptl = pmd_lock(dst_mm, dst_pmd); src_ptl = pmd_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); diff --git a/mm/memory.c b/mm/memory.c index 0ba4f6b71847..626caedce35e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -555,7 +555,14 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, * * "Special" mappings do not wish to be associated with a "struct page" (either * it doesn't exist, or it exists but they don't want to touch it). In this - * case, NULL is returned here. "Normal" mappings do have a struct page. + * case, NULL is returned here. "Normal" mappings do have a struct page and + * are ordinarily refcounted. + * + * Page mappings of the shared zero folios are always considered "special", as + * they are not ordinarily refcounted: neither the refcount nor the mapcount + * of these folios is adjusted when mapping them into user page tables. + * Selected page table walkers (such as GUP) can still identify mappings of the + * shared zero folios and work with the underlying "struct page". * * There are 2 broad cases. Firstly, an architecture may define a pte_special() * pte bit, in which case this function is trivial. Secondly, an architecture @@ -585,9 +592,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, * * VM_MIXEDMAP mappings can likewise contain memory with or without "struct * page" backing, however the difference is that _all_ pages with a struct - * page (that is, those where pfn_valid is true) are refcounted and considered - * normal pages by the VM. The only exception are zeropages, which are - * *never* refcounted. + * page (that is, those where pfn_valid is true, except the shared zero + * folios) are refcounted and considered normal pages by the VM. * * The disadvantage is that pages are refcounted (which can be slower and * simply not an option for some PFNMAP users). The advantage is that we @@ -667,7 +673,6 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, { unsigned long pfn = pmd_pfn(pmd); - /* Currently it's only used for huge pfnmaps */ if (unlikely(pmd_special(pmd))) return NULL;