drm/amdgpu: use hmm_pfns instead of array of pages

we dont need to allocate local array of pages to hold
the pages returned by the hmm, instead we could use
the hmm_range structure itself to get to hmm_pfn
and get the required pages directly.

This avoids call to alloc/free quite a lot.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Sunil Khatri 2025-09-17 20:12:43 +05:30 committed by Alex Deucher
parent ae4d627e43
commit c5b3cc417b
9 changed files with 25 additions and 49 deletions

View File

@ -1089,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
return 0;
}
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
ret = amdgpu_ttm_tt_get_user_pages(bo, &range);
if (ret) {
if (ret == -EAGAIN)
pr_debug("Failed to get user pages, try again\n");
@ -1103,6 +1103,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
pr_err("%s: Failed to reserve BO\n", __func__);
goto release_out;
}
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
@ -2565,8 +2568,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
}
/* Get updated user pages */
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
&mem->range);
ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range);
if (ret) {
pr_debug("Failed %d to get user pages\n", ret);
@ -2595,6 +2597,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
ret = 0;
}
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
mutex_lock(&process_info->notifier_lock);
/* Mark the BO as valid unless it was invalidated

View File

@ -38,7 +38,6 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo *bo;
struct amdgpu_bo_va *bo_va;
uint32_t priority;
struct page **user_pages;
struct hmm_range *range;
bool user_invalidated;
};

View File

@ -29,6 +29,7 @@
#include <linux/pagemap.h>
#include <linux/sync_file.h>
#include <linux/dma-buf.h>
#include <linux/hmm.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
@ -885,24 +886,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo *bo = e->bo;
int i;
e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL);
if (!e->user_pages) {
drm_err(adev_to_drm(p->adev), "kvmalloc_array failure\n");
r = -ENOMEM;
r = amdgpu_ttm_tt_get_user_pages(bo, &e->range);
if (r)
goto out_free_user_pages;
}
r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
if (r) {
kvfree(e->user_pages);
e->user_pages = NULL;
goto out_free_user_pages;
}
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) {
userpage_invalidated = true;
break;
}
@ -946,7 +935,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
e->user_invalidated && e->user_pages) {
e->user_invalidated) {
amdgpu_bo_placement_from_domain(e->bo,
AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
@ -955,11 +944,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out_free_user_pages;
amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
e->user_pages);
e->range);
}
kvfree(e->user_pages);
e->user_pages = NULL;
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@ -1001,11 +987,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->bo;
if (!e->user_pages)
continue;
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
kvfree(e->user_pages);
e->user_pages = NULL;
e->range = NULL;
}
mutex_unlock(&p->bo_list->bo_list_mutex);

View File

@ -572,8 +572,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
goto release_object;
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
&range);
r = amdgpu_ttm_tt_get_user_pages(bo, &range);
if (r)
goto release_object;
@ -581,6 +580,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (r)
goto user_pages_done;
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);

View File

@ -167,13 +167,12 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
void *owner, struct page **pages,
void *owner,
struct hmm_range **phmm_range)
{
struct hmm_range *hmm_range;
unsigned long end;
unsigned long timeout;
unsigned long i;
unsigned long *pfns;
int r = 0;
@ -222,14 +221,6 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
hmm_range->start = start;
hmm_range->hmm_pfns = pfns;
/*
* Due to default_flags, all pages are HMM_PFN_VALID or
* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
* the notifier_lock, and mmu_interval_read_retry() must be done first.
*/
for (i = 0; pages && i < npages; i++)
pages[i] = hmm_pfn_to_page(pfns[i]);
*phmm_range = hmm_range;
return 0;

View File

@ -33,7 +33,7 @@
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
void *owner, struct page **pages,
void *owner,
struct hmm_range **phmm_range);
bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);

View File

@ -708,7 +708,7 @@ struct amdgpu_ttm_tt {
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
struct hmm_range **range)
{
struct ttm_tt *ttm = bo->tbo.ttm;
@ -745,7 +745,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
readonly = amdgpu_ttm_tt_is_readonly(ttm);
r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
readonly, NULL, pages, range);
readonly, NULL, range);
out_unlock:
mmap_read_unlock(mm);
if (r)
@ -797,12 +797,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range)
{
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
ttm->pages[i] = pages ? pages[i] : NULL;
ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL;
}
/*

View File

@ -191,7 +191,7 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
struct hmm_range **range);
void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
struct hmm_range *range);
@ -199,7 +199,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
struct hmm_range *range);
#else
static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
struct page **pages,
struct hmm_range **range)
{
return -EPERM;
@ -215,7 +214,7 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
}
#endif
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range);
int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
uint64_t *user_addr);
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,

View File

@ -1738,7 +1738,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
WRITE_ONCE(p->svms.faulting_task, current);
r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
readonly, owner, NULL,
readonly, owner,
&hmm_range);
WRITE_ONCE(p->svms.faulting_task, NULL);
if (r)