mirror of https://github.com/torvalds/linux.git
drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this
xe_gt_tlb_invalidation_range accepts a start and end address rather than a VMA. This will enable multiple VMAs to be invalidated in a single invalidation. Update the PT layer to use this new function. Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Oak Zeng <oak.zeng@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-13-matthew.brost@intel.com
This commit is contained in:
parent
5aa5eea09a
commit
c4f1870362
|
|
@ -262,6 +262,96 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
|
||||
* address range
|
||||
*
|
||||
* @gt: graphics tile
|
||||
* @fence: invalidation fence which will be signal on TLB invalidation
|
||||
* completion, can be NULL
|
||||
* @start: start address
|
||||
* @end: end address
|
||||
* @asid: address space id
|
||||
*
|
||||
* Issue a range based TLB invalidation if supported, if not fallback to a full
|
||||
* TLB invalidation. Completion of TLB is asynchronous and caller can either use
|
||||
* the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
|
||||
* completion.
|
||||
*
|
||||
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
|
||||
* negative error code on error.
|
||||
*/
|
||||
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
|
||||
struct xe_gt_tlb_invalidation_fence *fence,
|
||||
u64 start, u64 end, u32 asid)
|
||||
{
|
||||
struct xe_device *xe = gt_to_xe(gt);
|
||||
#define MAX_TLB_INVALIDATION_LEN 7
|
||||
u32 action[MAX_TLB_INVALIDATION_LEN];
|
||||
int len = 0;
|
||||
|
||||
/* Execlists not supported */
|
||||
if (gt_to_xe(gt)->info.force_execlist) {
|
||||
if (fence)
|
||||
__invalidation_fence_signal(fence);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
|
||||
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
|
||||
if (!xe->info.has_range_tlb_invalidation) {
|
||||
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
|
||||
} else {
|
||||
u64 orig_start = start;
|
||||
u64 length = end - start;
|
||||
u64 align, end;
|
||||
|
||||
if (length < SZ_4K)
|
||||
length = SZ_4K;
|
||||
|
||||
/*
|
||||
* We need to invalidate a higher granularity if start address
|
||||
* is not aligned to length. When start is not aligned with
|
||||
* length we need to find the length large enough to create an
|
||||
* address mask covering the required range.
|
||||
*/
|
||||
align = roundup_pow_of_two(length);
|
||||
start = ALIGN_DOWN(start, align);
|
||||
end = ALIGN(end, align);
|
||||
length = align;
|
||||
while (start + length < end) {
|
||||
length <<= 1;
|
||||
start = ALIGN_DOWN(orig_start, length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Minimum invalidation size for a 2MB page that the hardware
|
||||
* expects is 16MB
|
||||
*/
|
||||
if (length >= SZ_2M) {
|
||||
length = max_t(u64, SZ_16M, length);
|
||||
start = ALIGN_DOWN(orig_start, length);
|
||||
}
|
||||
|
||||
xe_gt_assert(gt, length >= SZ_4K);
|
||||
xe_gt_assert(gt, is_power_of_2(length));
|
||||
xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
|
||||
ilog2(SZ_2M) + 1)));
|
||||
xe_gt_assert(gt, IS_ALIGNED(start, length));
|
||||
|
||||
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
|
||||
action[len++] = asid;
|
||||
action[len++] = lower_32_bits(start);
|
||||
action[len++] = upper_32_bits(start);
|
||||
action[len++] = ilog2(length) - ilog2(SZ_4K);
|
||||
}
|
||||
|
||||
xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
|
||||
|
||||
return send_tlb_invalidation(>->uc.guc, fence, action, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
|
||||
* @gt: graphics tile
|
||||
|
|
@ -281,72 +371,11 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
|
|||
struct xe_gt_tlb_invalidation_fence *fence,
|
||||
struct xe_vma *vma)
|
||||
{
|
||||
struct xe_device *xe = gt_to_xe(gt);
|
||||
#define MAX_TLB_INVALIDATION_LEN 7
|
||||
u32 action[MAX_TLB_INVALIDATION_LEN];
|
||||
int len = 0;
|
||||
|
||||
xe_gt_assert(gt, vma);
|
||||
|
||||
/* Execlists not supported */
|
||||
if (gt_to_xe(gt)->info.force_execlist) {
|
||||
if (fence)
|
||||
__invalidation_fence_signal(fence);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
|
||||
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
|
||||
if (!xe->info.has_range_tlb_invalidation) {
|
||||
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
|
||||
} else {
|
||||
u64 start = xe_vma_start(vma);
|
||||
u64 length = xe_vma_size(vma);
|
||||
u64 align, end;
|
||||
|
||||
if (length < SZ_4K)
|
||||
length = SZ_4K;
|
||||
|
||||
/*
|
||||
* We need to invalidate a higher granularity if start address
|
||||
* is not aligned to length. When start is not aligned with
|
||||
* length we need to find the length large enough to create an
|
||||
* address mask covering the required range.
|
||||
*/
|
||||
align = roundup_pow_of_two(length);
|
||||
start = ALIGN_DOWN(xe_vma_start(vma), align);
|
||||
end = ALIGN(xe_vma_end(vma), align);
|
||||
length = align;
|
||||
while (start + length < end) {
|
||||
length <<= 1;
|
||||
start = ALIGN_DOWN(xe_vma_start(vma), length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Minimum invalidation size for a 2MB page that the hardware
|
||||
* expects is 16MB
|
||||
*/
|
||||
if (length >= SZ_2M) {
|
||||
length = max_t(u64, SZ_16M, length);
|
||||
start = ALIGN_DOWN(xe_vma_start(vma), length);
|
||||
}
|
||||
|
||||
xe_gt_assert(gt, length >= SZ_4K);
|
||||
xe_gt_assert(gt, is_power_of_2(length));
|
||||
xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
|
||||
xe_gt_assert(gt, IS_ALIGNED(start, length));
|
||||
|
||||
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
|
||||
action[len++] = xe_vma_vm(vma)->usm.asid;
|
||||
action[len++] = lower_32_bits(start);
|
||||
action[len++] = upper_32_bits(start);
|
||||
action[len++] = ilog2(length) - ilog2(SZ_4K);
|
||||
}
|
||||
|
||||
xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
|
||||
|
||||
return send_tlb_invalidation(>->uc.guc, fence, action, len);
|
||||
return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
|
||||
xe_vma_end(vma),
|
||||
xe_vma_vm(vma)->usm.asid);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
|
|||
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
|
||||
struct xe_gt_tlb_invalidation_fence *fence,
|
||||
struct xe_vma *vma);
|
||||
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
|
||||
struct xe_gt_tlb_invalidation_fence *fence,
|
||||
u64 start, u64 end, u32 asid);
|
||||
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
|
||||
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
|
||||
|
||||
|
|
|
|||
|
|
@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
|
|||
struct invalidation_fence {
|
||||
struct xe_gt_tlb_invalidation_fence base;
|
||||
struct xe_gt *gt;
|
||||
struct xe_vma *vma;
|
||||
struct dma_fence *fence;
|
||||
struct dma_fence_cb cb;
|
||||
struct work_struct work;
|
||||
u64 start;
|
||||
u64 end;
|
||||
u32 asid;
|
||||
};
|
||||
|
||||
static const char *
|
||||
|
|
@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct work_struct *w)
|
|||
container_of(w, struct invalidation_fence, work);
|
||||
|
||||
trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
|
||||
xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
|
||||
xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
|
||||
ifence->end, ifence->asid);
|
||||
}
|
||||
|
||||
static int invalidation_fence_init(struct xe_gt *gt,
|
||||
struct invalidation_fence *ifence,
|
||||
struct dma_fence *fence,
|
||||
struct xe_vma *vma)
|
||||
u64 start, u64 end, u32 asid)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
|
@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
|
|||
dma_fence_get(&ifence->base.base); /* Ref for caller */
|
||||
ifence->fence = fence;
|
||||
ifence->gt = gt;
|
||||
ifence->vma = vma;
|
||||
ifence->start = start;
|
||||
ifence->end = end;
|
||||
ifence->asid = asid;
|
||||
|
||||
INIT_WORK(&ifence->work, invalidation_fence_work_func);
|
||||
ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
|
||||
|
|
@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
|
|||
|
||||
/* TLB invalidation must be done before signaling rebind */
|
||||
if (ifence) {
|
||||
int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
|
||||
vma);
|
||||
int err = invalidation_fence_init(tile->primary_gt,
|
||||
ifence, fence,
|
||||
xe_vma_start(vma),
|
||||
xe_vma_end(vma),
|
||||
xe_vma_vm(vma)->usm.asid);
|
||||
if (err) {
|
||||
dma_fence_put(fence);
|
||||
kfree(ifence);
|
||||
|
|
@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
|
|||
dma_fence_wait(fence, false);
|
||||
|
||||
/* TLB invalidation must be done before signaling unbind */
|
||||
err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
|
||||
err = invalidation_fence_init(tile->primary_gt, ifence, fence,
|
||||
xe_vma_start(vma),
|
||||
xe_vma_end(vma),
|
||||
xe_vma_vm(vma)->usm.asid);
|
||||
if (err) {
|
||||
dma_fence_put(fence);
|
||||
kfree(ifence);
|
||||
|
|
|
|||
Loading…
Reference in New Issue