From 266c85885263022954928b125d46ab7a78c77a69 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:31 +0530 Subject: [PATCH] drm/xe/xe2: Handle flat ccs move for igfx. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Clear flat ccs during user bo creation. - copy ccs meta data between flat ccs and bo during eviction and restore. - Add a bool field ccs_cleared in bo, true means ccs region of bo is already cleared. v2: - Rebase. v3: - Maintain order of xe_bo_move_notify for ttm_bo_type_sg. v4: - xe_migrate_copy can be used to copy src to dst bo on igfx too. Add a bool which handles only ccs metadata copy. v5: - on dgfx ccs should be cleared even if the bo is not compression enabled. Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 4 +- drivers/gpu/drm/xe/xe_bo.c | 33 +++++++++----- drivers/gpu/drm/xe/xe_bo_types.h | 4 ++ drivers/gpu/drm/xe/xe_migrate.c | 64 +++++++++++++++------------ drivers/gpu/drm/xe/xe_migrate.h | 3 +- 5 files changed, 66 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index d6c23441632a4..7a32faa2f6888 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -152,7 +152,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, - bo->ttm.resource); + bo->ttm.resource, false); if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" : "Copying small bo remote -> vram", test)) { retval = xe_map_rd(xe, &bo->vmap, 0, u64); @@ -169,7 +169,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, - remote->ttm.resource); + remote->ttm.resource, false); if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" : "Copying small bo vram -> remote", test)) { retval = xe_map_rd(xe, &remote->vmap, 0, u64); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index c10aa5a63a701..7c0037aecff3a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -627,10 +627,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool move_lacks_source; bool tt_has_data; bool needs_clear; + bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && + ttm && ttm_tt_is_populated(ttm)) ? true : false; int ret = 0; - - /* Bo creation path, moving to system or TT. No clearing required. */ - if (!old_mem && ttm) { + /* Bo creation path, moving to system or TT. */ + if ((!old_mem && ttm) && !handle_system_ccs) { ttm_bo_move_null(ttm_bo, new_mem); return 0; } @@ -645,14 +646,18 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tt_has_data = ttm && (ttm_tt_is_populated(ttm) || (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data; + move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : + (!mem_type_is_vram(old_mem_type) && !tt_has_data); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); - if ((move_lacks_source && !needs_clear) || - (old_mem_type == XE_PL_SYSTEM && - new_mem->mem_type == XE_PL_TT)) { + if ((move_lacks_source && !needs_clear)) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) { ttm_bo_move_null(ttm_bo, new_mem); goto out; } @@ -683,8 +688,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, ret = timeout; goto out; } - ttm_bo_move_null(ttm_bo, new_mem); - goto out; + + if (!handle_system_ccs) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } } if (!move_lacks_source && @@ -705,6 +713,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = mem_type_to_migrate(xe, new_mem->mem_type); else if (mem_type_is_vram(old_mem_type)) migrate = mem_type_to_migrate(xe, old_mem_type); + else + migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); @@ -747,8 +757,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (move_lacks_source) fence = xe_migrate_clear(migrate, bo, new_mem); else - fence = xe_migrate_copy(migrate, - bo, bo, old_mem, new_mem); + fence = xe_migrate_copy(migrate, bo, bo, old_mem, + new_mem, handle_system_ccs); if (IS_ERR(fence)) { ret = PTR_ERR(fence); xe_device_mem_access_put(xe); @@ -1234,6 +1244,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return bo; } + bo->ccs_cleared = false; bo->tile = tile; bo->size = size; bo->flags = flags; diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index f71dbc5189586..64c2249a4e407 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -79,6 +79,10 @@ struct xe_bo { struct llist_node freed; /** @created: Whether the bo has passed initial creation */ bool created; + + /** @ccs_cleared */ + bool ccs_cleared; + /** * @cpu_caching: CPU caching mode. Currently only used for userspace * objects. diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 48ada083d0b3f..adf1dab5eba25 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -577,14 +577,14 @@ static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) static u32 xe_migrate_ccs_copy(struct xe_migrate *m, struct xe_bb *bb, - u64 src_ofs, bool src_is_vram, - u64 dst_ofs, bool dst_is_vram, u32 dst_size, + u64 src_ofs, bool src_is_indirect, + u64 dst_ofs, bool dst_is_indirect, u32 dst_size, u64 ccs_ofs, bool copy_ccs) { struct xe_gt *gt = m->tile->primary_gt; u32 flush_flags = 0; - if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) { + if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) { /* * If the src is already in vram, then it should already * have been cleared by us, or has been populated by the @@ -593,28 +593,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, * Otherwise if the bo doesn't have any CCS metadata attached, * we still need to clear it for security reasons. */ - u64 ccs_src_ofs = src_is_vram ? src_ofs : m->cleared_mem_ofs; + u64 ccs_src_ofs = src_is_indirect ? src_ofs : m->cleared_mem_ofs; emit_copy_ccs(gt, bb, dst_ofs, true, - ccs_src_ofs, src_is_vram, dst_size); + ccs_src_ofs, src_is_indirect, dst_size); flush_flags = MI_FLUSH_DW_CCS; } else if (copy_ccs) { - if (!src_is_vram) + if (!src_is_indirect) src_ofs = ccs_ofs; - else if (!dst_is_vram) + else if (!dst_is_indirect) dst_ofs = ccs_ofs; - /* - * At the moment, we don't support copying CCS metadata from - * system to system. - */ - xe_gt_assert(gt, src_is_vram || dst_is_vram); + xe_gt_assert(gt, src_is_indirect || dst_is_indirect); - emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs, - src_is_vram, dst_size); - if (dst_is_vram) + emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs, + src_is_indirect, dst_size); + if (dst_is_indirect) flush_flags = MI_FLUSH_DW_CCS; } @@ -630,6 +626,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, * the buffer object @dst is currently bound to. * @src: The source TTM resource. * @dst: The dst TTM resource. + * @copy_only_ccs: If true copy only CCS metadata * * Copies the contents of @src to @dst: On flat CCS devices, * the CCS metadata is copied as well if needed, or if not present, @@ -643,7 +640,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, struct xe_bo *dst_bo, struct ttm_resource *src, - struct ttm_resource *dst) + struct ttm_resource *dst, + bool copy_only_ccs) { struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); @@ -655,6 +653,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, u64 src_L0, dst_L0; int pass = 0; int err; + bool src_is_pltt = src->mem_type == XE_PL_TT; + bool dst_is_pltt = dst->mem_type == XE_PL_TT; bool src_is_vram = mem_type_is_vram(src->mem_type); bool dst_is_vram = mem_type_is_vram(dst->mem_type); bool copy_ccs = xe_device_has_flat_ccs(xe) && @@ -719,8 +719,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, } /* Add copy commands size here */ - batch_size += EMIT_COPY_DW + - (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0); + batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + + ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0)); bb = xe_bb_new(gt, batch_size, usm); if (IS_ERR(bb)) { @@ -746,10 +746,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, - XE_PAGE_SIZE); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram, - dst_L0_ofs, dst_is_vram, + if (!copy_only_ccs) + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); + + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, + IS_DGFX(xe) ? src_is_vram : src_is_pltt, + dst_L0_ofs, + IS_DGFX(xe) ? dst_is_vram : dst_is_pltt, src_L0, ccs_ofs, copy_ccs); mutex_lock(&m->job_mutex); @@ -922,6 +925,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, bool clear_vram = mem_type_is_vram(dst->mem_type); struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; struct dma_fence *fence = NULL; u64 size = bo->size; struct xe_res_cursor src_it; @@ -954,9 +958,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, clear_vram, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - emit_clear_cmd_len(gt), 0, + clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, avail_pts); - if (xe_device_has_flat_ccs(xe) && clear_vram) + + if (xe_device_has_flat_ccs(xe)) batch_size += EMIT_COPY_CCS_DW; /* Clear commands */ @@ -971,7 +976,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, } size -= clear_L0; - /* Preemption is enabled again by the ring ops. */ if (!clear_vram) { emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, @@ -982,9 +986,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, - clear_vram); - if (xe_device_has_flat_ccs(xe) && clear_vram) { + if (!clear_system_ccs) + emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); + + if (xe_device_has_flat_ccs(xe)) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_mem_ofs, false, clear_L0); flush_flags = MI_FLUSH_DW_CCS; @@ -1041,6 +1046,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, return ERR_PTR(err); } + if (clear_system_ccs) + bo->ccs_cleared = true; + return fence; } diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index c729241776adf..951f19318ea48 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -85,7 +85,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, struct xe_bo *dst_bo, struct ttm_resource *src, - struct ttm_resource *dst); + struct ttm_resource *dst, + bool copy_only_ccs); struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo,