drm/amdkfd: Remove kfd_process_hw_exception worker

With GPU reset-domain worker implemented, KFD hw_exception worker is not
needed any more, just call amdgpu_amdkfd_gpu_reset directly from
kfd_hws_hang.

Suggested-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Philip Yang
2025-02-25 10:04:06 -05:00
committed by Alex Deucher
parent f9234217d0
commit ee3ed10066
2 changed files with 1 additions and 11 deletions
@@ -66,7 +66,6 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id);
static void kfd_process_hw_exception(struct work_struct *work);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
@@ -170,7 +169,7 @@ static void kfd_hws_hang(struct device_queue_manager *dqm)
/*
* Issue a GPU reset if HWS is unresponsive
*/
schedule_work(&dqm->hw_exception_work);
amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}
static int convert_to_mes_queue_type(int queue_type)
@@ -1740,7 +1739,6 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
dqm->trap_debug_vmid = 0;
init_sdma_bitmaps(dqm);
@@ -3080,13 +3078,6 @@ int kfd_evict_process_device(struct kfd_process_device *pdd)
return ret;
}
static void kfd_process_hw_exception(struct work_struct *work)
{
struct device_queue_manager *dqm = container_of(work,
struct device_queue_manager, hw_exception_work);
amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}
int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -269,7 +269,6 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
bool is_resetting;
struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
bool sched_running;
bool sched_halt;