From 95af8f4fdce8349a5fe75264007f1af2aa1082ea Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Mon, 3 Nov 2025 18:01:47 +0530 Subject: [PATCH 1/4] drm/xe/guc: Synchronize Dead CT worker with unbind Cancel and wait for any Dead CT worker to complete before continuing with device unbinding. Else the worker will end up using resources freed by the undind operation. Cc: Zhanjun Dong Fixes: d2c5a5a926f4 ("drm/xe/guc: Dead CT helper") Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20251103123144.3231829-6-balasubramani.vivekanandan@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 492671339114e376aaa38626d637a2751cdef263) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_ct.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 18f6327bf552..283d846c3512 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -200,6 +200,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + cancel_work_sync(&ct->dead.worker); +#endif ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); From 9cd27eec872f0b95dcdd811edc39d2d32e4158c8 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 7 Oct 2025 15:32:08 +0530 Subject: [PATCH 2/4] drm/xe: Move declarations under conditional branch The xe_device_shutdown() function was needing a few declarations that were only required under a specific condition. This change moves those declarations to be within that conditional branch to avoid unnecessary declarations. Reviewed-by: Nitin Gote Link: https://patchwork.freedesktop.org/patch/msgid/20251007100208.1407021-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 15b3036045188f4da4ca62b2ed01b0f160252e9b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 34d33965eac2..132530e5a3db 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -983,12 +983,12 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { - struct xe_gt *gt; - u8 id; - drm_dbg(&xe->drm, "Shutting down device\n"); if (xe_driver_flr_disabled(xe)) { + struct xe_gt *gt; + u8 id; + xe_display_pm_shutdown(xe); xe_irq_suspend(xe); From b11a020d914c3b7628f56a9ea476a5b03679489b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Fri, 31 Oct 2025 14:23:11 +0200 Subject: [PATCH 3/4] drm/xe: Do clean shutdown also when using flr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently Xe driver is triggering flr without any clean-up on shutdown. This is causing random warnings from pending related works as the underlying hardware is reset in the middle of their execution. Fix this by performing clean shutdown also when using flr. Fixes: 501d799a47e2 ("drm/xe: Wire up device shutdown handler") Cc: Maarten Lankhorst Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/20251031122312.1836534-1-jouni.hogander@intel.com Signed-off-by: Maarten Lankhorst (cherry picked from commit a4ff26b7c8ef38e4dd34f77cbcd73576fdde6dd4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 132530e5a3db..456899238377 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -983,21 +983,21 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + drm_dbg(&xe->drm, "Shutting down device\n"); - if (xe_driver_flr_disabled(xe)) { - struct xe_gt *gt; - u8 id; + xe_display_pm_shutdown(xe); - xe_display_pm_shutdown(xe); + xe_irq_suspend(xe); - xe_irq_suspend(xe); + for_each_gt(gt, xe, id) + xe_gt_shutdown(gt); - for_each_gt(gt, xe, id) - xe_gt_shutdown(gt); + xe_display_pm_shutdown_late(xe); - xe_display_pm_shutdown_late(xe); - } else { + if (!xe_driver_flr_disabled(xe)) { /* BOOM! */ __xe_driver_flr(xe); } From 0995c2fc39b0f998d40f5d276f67ae22fc1c37c3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 31 Oct 2025 16:40:45 -0700 Subject: [PATCH 4/4] drm/xe: Enforce correct user fence signaling order using MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent application hangs caused by out-of-order fence signaling when user fences are attached. Use drm_syncobj (via dma-fence-chain) to guarantee that each user fence signals in order, regardless of the signaling order of the attached fences. Ensure user fence writebacks to user space occur in the correct sequence. v7: - Skip drm_syncbj create of error (CI) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patch.msgid.link/20251031234050.3043507-2-matthew.brost@intel.com (cherry picked from commit adda4e855ab6409a3edaa585293f1f2069ab7299) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec.c | 3 +- drivers/gpu/drm/xe/xe_exec_queue.c | 14 ++++++++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 7 ++++ drivers/gpu/drm/xe/xe_oa.c | 45 ++++++++++++++++-------- drivers/gpu/drm/xe/xe_oa_types.h | 8 +++++ drivers/gpu/drm/xe/xe_sync.c | 17 +++++++-- drivers/gpu/drm/xe/xe_sync.h | 3 ++ drivers/gpu/drm/xe/xe_sync_types.h | 3 ++ drivers/gpu/drm/xe/xe_vm.c | 4 +++ 9 files changed, 86 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 7715e74bb945..a8ab363a8046 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -165,7 +165,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | + &syncs_user[num_syncs], NULL, 0, + SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 37b2b93b73d6..cb5f204c08ed 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "xe_dep_scheduler.h" @@ -324,6 +325,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } xe_vm_put(migrate_vm); + if (!IS_ERR(q)) { + int err = drm_syncobj_create(&q->ufence_syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) { + xe_exec_queue_put(q); + return ERR_PTR(err); + } + } + return q; } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); @@ -333,6 +344,9 @@ void xe_exec_queue_destroy(struct kref *ref) struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + if (q->ufence_syncobj) + drm_syncobj_put(q->ufence_syncobj); + if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 27b76cf9da89..df1c69dc81f1 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -15,6 +15,7 @@ #include "xe_hw_fence_types.h" #include "xe_lrc_types.h" +struct drm_syncobj; struct xe_execlist_exec_queue; struct xe_gt; struct xe_guc_exec_queue; @@ -155,6 +156,12 @@ struct xe_exec_queue { struct list_head link; } pxp; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a4894eb0d7f3..125698a9ecf1 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -1389,7 +1390,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro return 0; } -static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +static int xe_oa_parse_syncs(struct xe_oa *oa, + struct xe_oa_stream *stream, + struct xe_oa_open_param *param) { int ret, num_syncs, num_ufence = 0; @@ -1409,7 +1412,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], - ¶m->syncs_user[num_syncs], 0); + ¶m->syncs_user[num_syncs], + stream->ufence_syncobj, + ++stream->ufence_timeline_value, 0); if (ret) goto err_syncs; @@ -1539,7 +1544,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return -ENODEV; param.xef = stream->xef; - err = xe_oa_parse_syncs(stream->oa, ¶m); + err = xe_oa_parse_syncs(stream->oa, stream, ¶m); if (err) goto err_config_put; @@ -1635,6 +1640,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream) if (stream->exec_q) xe_exec_queue_put(stream->exec_q); + drm_syncobj_put(stream->ufence_syncobj); kfree(stream); } @@ -1826,6 +1832,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, struct xe_oa_open_param *param) { struct xe_oa_stream *stream; + struct drm_syncobj *ufence_syncobj; int stream_fd; int ret; @@ -1836,17 +1843,31 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, goto exit; } + ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (ret) + goto exit; + stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) { ret = -ENOMEM; - goto exit; + goto err_syncobj; } - + stream->ufence_syncobj = ufence_syncobj; stream->oa = oa; - ret = xe_oa_stream_init(stream, param); + + ret = xe_oa_parse_syncs(oa, stream, param); if (ret) goto err_free; + ret = xe_oa_stream_init(stream, param); + if (ret) { + while (param->num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[param->num_syncs]); + kfree(param->syncs); + goto err_free; + } + if (!param->disabled) { ret = xe_oa_enable_locked(stream); if (ret) @@ -1870,6 +1891,8 @@ err_destroy: xe_oa_stream_destroy(stream); err_free: kfree(stream); +err_syncobj: + drm_syncobj_put(ufence_syncobj); exit: return ret; } @@ -2083,22 +2106,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - ret = xe_oa_parse_syncs(oa, ¶m); - if (ret) - goto err_exec_q; - mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); if (ret < 0) - goto err_sync_cleanup; + goto err_exec_q; return ret; -err_sync_cleanup: - while (param.num_syncs--) - xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); - kfree(param.syncs); err_exec_q: if (param.exec_q) xe_exec_queue_put(param.exec_q); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 2628f78c4e8d..daf701b5d48b 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,6 +15,8 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" +struct drm_syncobj; + #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { @@ -248,6 +250,12 @@ struct xe_oa_stream { /** @xef: xe_file with which the stream was opened */ struct xe_file *xef; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @last_fence: fence to use in stream destroy when needed */ struct dma_fence *last_fence; diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 82872a51f098..d48ab7b32ca5 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags) { struct drm_xe_sync sync_in; @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (exec) { sync->addr = sync_in.addr; } else { + sync->ufence_timeline_value = ufence_timeline_value; sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); + sync->ufence_chain_fence = dma_fence_chain_alloc(); + if (!sync->ufence_chain_fence) + return -ENOMEM; + sync->ufence_syncobj = ufence_syncobj; } break; @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) } else if (sync->ufence) { int err; - dma_fence_get(fence); + drm_syncobj_add_point(sync->ufence_syncobj, + sync->ufence_chain_fence, + fence, sync->ufence_timeline_value); + sync->ufence_chain_fence = NULL; + + fence = drm_syncobj_fence_get(sync->ufence_syncobj); user_fence_get(sync->ufence); err = dma_fence_add_callback(fence, &sync->ufence->cb, user_fence_cb); @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) drm_syncobj_put(sync->syncobj); dma_fence_put(sync->fence); dma_fence_chain_free(sync->chain_fence); - if (sync->ufence) + dma_fence_chain_free(sync->ufence_chain_fence); + if (!IS_ERR_OR_NULL(sync->ufence)) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 256ffc1e54dc..51f2d803e977 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -8,6 +8,7 @@ #include "xe_sync_types.h" +struct drm_syncobj; struct xe_device; struct xe_exec_queue; struct xe_file; @@ -21,6 +22,8 @@ struct xe_vm; int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 30ac3f51993b..b88f1833e28c 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,9 +18,12 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; + struct dma_fence_chain *ufence_chain_fence; + struct drm_syncobj *ufence_syncobj; struct xe_user_fence *ufence; u64 addr; u64 timeline_value; + u64 ufence_timeline_value; u32 type; u32 flags; }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 63c65e3d207b..ccb09ef4ec9e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3606,8 +3606,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) syncs_user = u64_to_user_ptr(args->syncs); for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + struct xe_exec_queue *__q = q ?: vm->q[0]; + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], + __q->ufence_syncobj, + ++__q->ufence_timeline_value, (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0) | (!args->num_binds ?