drm/xe: Split xe_device_td_flush()
xe_device_td_flush() has 2 possible implementations: an entire L2 flush or a transient flush, depending on WA 16023588340. Make this clear by splitting the function so it calls each of them. Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-3-b888388477f2@intel.com Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
This commit is contained in:
@@ -986,38 +986,15 @@ void xe_device_wmb(struct xe_device *xe)
|
||||
xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_device_td_flush() - Flush transient L3 cache entries
|
||||
* @xe: The device
|
||||
*
|
||||
* Display engine has direct access to memory and is never coherent with L3/L4
|
||||
* caches (or CPU caches), however KMD is responsible for specifically flushing
|
||||
* transient L3 GPU cache entries prior to the flip sequence to ensure scanout
|
||||
* can happen from such a surface without seeing corruption.
|
||||
*
|
||||
* Display surfaces can be tagged as transient by mapping it using one of the
|
||||
* various L3:XD PAT index modes on Xe2.
|
||||
*
|
||||
* Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
|
||||
* at the end of each submission via PIPE_CONTROL for compute/render, since SA
|
||||
* Media is not coherent with L3 and we want to support render-vs-media
|
||||
* usescases. For other engines like copy/blt the HW internally forces uncached
|
||||
* behaviour, hence why we can skip the TDF on such platforms.
|
||||
/*
|
||||
* Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt.
|
||||
*/
|
||||
void xe_device_td_flush(struct xe_device *xe)
|
||||
static void tdf_request_sync(struct xe_device *xe)
|
||||
{
|
||||
struct xe_gt *gt;
|
||||
unsigned int fw_ref;
|
||||
struct xe_gt *gt;
|
||||
u8 id;
|
||||
|
||||
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
|
||||
return;
|
||||
|
||||
if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
|
||||
xe_device_l2_flush(xe);
|
||||
return;
|
||||
}
|
||||
|
||||
for_each_gt(gt, xe, id) {
|
||||
if (xe_gt_is_media_type(gt))
|
||||
continue;
|
||||
@@ -1027,6 +1004,7 @@ void xe_device_td_flush(struct xe_device *xe)
|
||||
return;
|
||||
|
||||
xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
|
||||
|
||||
/*
|
||||
* FIXME: We can likely do better here with our choice of
|
||||
* timeout. Currently we just assume the worst case, i.e. 150us,
|
||||
@@ -1057,15 +1035,49 @@ void xe_device_l2_flush(struct xe_device *xe)
|
||||
return;
|
||||
|
||||
spin_lock(>->global_invl_lock);
|
||||
xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1);
|
||||
|
||||
xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1);
|
||||
if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
|
||||
xe_gt_err_once(gt, "Global invalidation timeout\n");
|
||||
|
||||
spin_unlock(>->global_invl_lock);
|
||||
|
||||
xe_force_wake_put(gt_to_fw(gt), fw_ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_device_td_flush() - Flush transient L3 cache entries
|
||||
* @xe: The device
|
||||
*
|
||||
* Display engine has direct access to memory and is never coherent with L3/L4
|
||||
* caches (or CPU caches), however KMD is responsible for specifically flushing
|
||||
* transient L3 GPU cache entries prior to the flip sequence to ensure scanout
|
||||
* can happen from such a surface without seeing corruption.
|
||||
*
|
||||
* Display surfaces can be tagged as transient by mapping it using one of the
|
||||
* various L3:XD PAT index modes on Xe2.
|
||||
*
|
||||
* Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
|
||||
* at the end of each submission via PIPE_CONTROL for compute/render, since SA
|
||||
* Media is not coherent with L3 and we want to support render-vs-media
|
||||
* usescases. For other engines like copy/blt the HW internally forces uncached
|
||||
* behaviour, hence why we can skip the TDF on such platforms.
|
||||
*/
|
||||
void xe_device_td_flush(struct xe_device *xe)
|
||||
{
|
||||
struct xe_gt *root_gt;
|
||||
|
||||
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
|
||||
return;
|
||||
|
||||
root_gt = xe_root_mmio_gt(xe);
|
||||
if (XE_WA(root_gt, 16023588340))
|
||||
/* A transient flush is not sufficient: flush the L2 */
|
||||
xe_device_l2_flush(xe);
|
||||
else
|
||||
tdf_request_sync(xe);
|
||||
}
|
||||
|
||||
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
|
||||
{
|
||||
return xe_device_has_flat_ccs(xe) ?
|
||||
|
||||
Reference in New Issue
Block a user