mm/gup: remove pXX_devmap usage from get_user_pages()

GUP uses pXX_devmap() calls to see if it needs to a get a reference on the
associated pgmap data structure to ensure the pages won't go away. 
However it's a driver responsibility to ensure that if pages are mapped
(ie.  discoverable by GUP) that they are not offlined or removed from the
memmap so there is no need to hold a reference on the pgmap data structure
to ensure this.

Furthermore mappings with PFN_DEV are no longer created, hence this
effectively dead code anyway so can be removed.

Link: https://lkml.kernel.org/r/708b2be76876659ec5261fe5d059b07268b98b36.1750323463.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Björn Töpel <bjorn@kernel.org>
Cc: Björn Töpel <bjorn@rivosinc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Deepak Gupta <debug@rivosinc.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Inki Dae <m.szyprowski@samsung.com>
Cc: John Groves <john@groves.net>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Alistair Popple 2025-06-19 18:57:57 +10:00 committed by Andrew Morton
parent 4b1d3145c1
commit fd2825b076
3 changed files with 5 additions and 198 deletions

View File

@ -473,9 +473,6 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
return folio_order(folio) >= HPAGE_PMD_ORDER;
}
struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
extern struct folio *huge_zero_folio;

160
mm/gup.c
View File

@ -679,31 +679,9 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
return NULL;
pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
pud_devmap(pud)) {
/*
* device mapped pages can only be returned if the caller
* will manage the page reference count.
*
* At least one of FOLL_GET | FOLL_PIN must be set, so
* assert that here:
*/
if (!(flags & (FOLL_GET | FOLL_PIN)))
return ERR_PTR(-EEXIST);
if (flags & FOLL_TOUCH)
touch_pud(vma, addr, pudp, flags & FOLL_WRITE);
ctx->pgmap = get_dev_pagemap(pfn, ctx->pgmap);
if (!ctx->pgmap)
return ERR_PTR(-EFAULT);
}
page = pfn_to_page(pfn);
if (!pud_devmap(pud) && !pud_write(pud) &&
gup_must_unshare(vma, flags, page))
if (!pud_write(pud) && gup_must_unshare(vma, flags, page))
return ERR_PTR(-EMLINK);
ret = try_grab_folio(page_folio(page), 1, flags);
@ -857,8 +835,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
page = vm_normal_page(vma, address, pte);
/*
* We only care about anon pages in can_follow_write_pte() and don't
* have to worry about pte_devmap() because they are never anon.
* We only care about anon pages in can_follow_write_pte().
*/
if ((flags & FOLL_WRITE) &&
!can_follow_write_pte(pte, page, vma, flags)) {
@ -866,18 +843,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
goto out;
}
if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
/*
* Only return device mapping pages in the FOLL_GET or FOLL_PIN
* case since they are only valid while holding the pgmap
* reference.
*/
*pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
if (*pgmap)
page = pte_page(pte);
else
goto no_page;
} else if (unlikely(!page)) {
if (unlikely(!page)) {
if (flags & FOLL_DUMP) {
/* Avoid special (like zero) pages in core dumps */
page = ERR_PTR(-EFAULT);
@ -959,14 +925,6 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
return no_page_table(vma, flags, address);
if (!pmd_present(pmdval))
return no_page_table(vma, flags, address);
if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
spin_unlock(ptl);
if (page)
return page;
return no_page_table(vma, flags, address);
}
if (likely(!pmd_leaf(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
@ -2896,7 +2854,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
int *nr)
{
struct dev_pagemap *pgmap = NULL;
int nr_start = *nr, ret = 0;
int ret = 0;
pte_t *ptep, *ptem;
ptem = ptep = pte_offset_map(&pmd, addr);
@ -2920,16 +2878,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
goto pte_unmap;
if (pte_devmap(pte)) {
if (unlikely(flags & FOLL_LONGTERM))
goto pte_unmap;
pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
if (unlikely(!pgmap)) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
goto pte_unmap;
}
} else if (pte_special(pte))
if (pte_special(pte))
goto pte_unmap;
/* If it's not marked as special it must have a valid memmap. */
@ -3001,91 +2950,6 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
}
#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages, int *nr)
{
int nr_start = *nr;
struct dev_pagemap *pgmap = NULL;
do {
struct folio *folio;
struct page *page = pfn_to_page(pfn);
pgmap = get_dev_pagemap(pfn, pgmap);
if (unlikely(!pgmap)) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
break;
}
folio = try_grab_folio_fast(page, 1, flags);
if (!folio) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
break;
}
folio_set_referenced(folio);
pages[*nr] = page;
(*nr)++;
pfn++;
} while (addr += PAGE_SIZE, addr != end);
put_dev_pagemap(pgmap);
return addr == end;
}
static int gup_fast_devmap_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
{
unsigned long fault_pfn;
int nr_start = *nr;
fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
if (!gup_fast_devmap_leaf(fault_pfn, addr, end, flags, pages, nr))
return 0;
if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
return 0;
}
return 1;
}
static int gup_fast_devmap_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
{
unsigned long fault_pfn;
int nr_start = *nr;
fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
if (!gup_fast_devmap_leaf(fault_pfn, addr, end, flags, pages, nr))
return 0;
if (unlikely(pud_val(orig) != pud_val(*pudp))) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
return 0;
}
return 1;
}
#else
static int gup_fast_devmap_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
{
BUILD_BUG();
return 0;
}
static int gup_fast_devmap_pud_leaf(pud_t pud, pud_t *pudp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
{
BUILD_BUG();
return 0;
}
#endif
static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
@ -3100,13 +2964,6 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
if (pmd_special(orig))
return 0;
if (pmd_devmap(orig)) {
if (unlikely(flags & FOLL_LONGTERM))
return 0;
return gup_fast_devmap_pmd_leaf(orig, pmdp, addr, end, flags,
pages, nr);
}
page = pmd_page(orig);
refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
@ -3147,13 +3004,6 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
if (pud_special(orig))
return 0;
if (pud_devmap(orig)) {
if (unlikely(flags & FOLL_LONGTERM))
return 0;
return gup_fast_devmap_pud_leaf(orig, pudp, addr, end, flags,
pages, nr);
}
page = pud_page(orig);
refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);

View File

@ -1672,46 +1672,6 @@ void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
update_mmu_cache_pmd(vma, addr, pmd);
}
struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
{
unsigned long pfn = pmd_pfn(*pmd);
struct mm_struct *mm = vma->vm_mm;
struct page *page;
int ret;
assert_spin_locked(pmd_lockptr(mm, pmd));
if (flags & FOLL_WRITE && !pmd_write(*pmd))
return NULL;
if (pmd_present(*pmd) && pmd_devmap(*pmd))
/* pass */;
else
return NULL;
if (flags & FOLL_TOUCH)
touch_pmd(vma, addr, pmd, flags & FOLL_WRITE);
/*
* device mapped pages can only be returned if the
* caller will manage the page reference count.
*/
if (!(flags & (FOLL_GET | FOLL_PIN)))
return ERR_PTR(-EEXIST);
pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
*pgmap = get_dev_pagemap(pfn, *pgmap);
if (!*pgmap)
return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn);
ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
page = ERR_PTR(ret);
return page;
}
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)