From a70cb2176f7ef6fec68c999d1f58635744125a8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 1 Sep 2023 14:32:36 +0200 Subject: [PATCH] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the SDMA workaround necessary for Navi 1x into a higher layer. v2: use dev_err Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 48 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 5 +- drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 3 + drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 190 +++++++---------------- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 2 +- 5 files changed, 114 insertions(+), 134 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index f357d1f7353a..6530f5d2b35b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -582,6 +582,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) return 0; } +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + struct dma_fence *fence; + struct amdgpu_job *job; + int r; + + if (!hub->sdma_invalidation_workaround || vmid || + !adev->mman.buffer_funcs_enabled || + !adev->ib_pool_ready || amdgpu_in_reset(adev) || + !ring->sched.ready) { + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub, + flush_type); + return; + } + + /* The SDMA on Navi 1x has a bug which can theoretically result in memory + * corruption if an invalidation happens at the same time as an VA + * translation. Avoid this by doing the invalidation from the SDMA + * itself at least for GART. + */ + mutex_lock(&adev->mman.gtt_window_lock); + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, + AMDGPU_FENCE_OWNER_UNDEFINED, + 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, + &job); + if (r) + goto error_alloc; + + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); + job->vm_needs_flush = true; + job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + fence = amdgpu_job_submit(job); + mutex_unlock(&adev->mman.gtt_window_lock); + + dma_fence_wait(fence, false); + dma_fence_put(fence); + + return; + +error_alloc: + mutex_unlock(&adev->mman.gtt_window_lock); + dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r); +} + /** * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index fdc25cd559b6..9e7df2f69123 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -117,6 +117,8 @@ struct amdgpu_vmhub { uint32_t vm_contexts_disable; + bool sdma_invalidation_workaround; + const struct amdgpu_vmhub_funcs *vmhub_funcs; }; @@ -335,7 +337,6 @@ struct amdgpu_gmc { u64 noretry_flags; }; -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \ ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ ((adev), (pasid), (type), (allhub), (inst))) @@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type); extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index a041c6c970e1..8521c45e8f38 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev) GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + /* TODO: This is only needed on some Navi 1x revisions */ + hub->sdma_invalidation_workaround = true; + hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index e582073b57c8..70c953919089 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -231,87 +231,6 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( * by the amdgpu vm/hsa code. */ -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, - unsigned int vmhub, uint32_t flush_type) -{ - bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); - struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); - u32 tmp; - /* Use register 17 for GART */ - const unsigned int eng = 17; - unsigned int i; - unsigned char hub_ip = 0; - - hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? - GC_HWIP : MMHUB_HWIP; - - spin_lock(&adev->gmc.invalidate_lock); - /* - * It may lose gpuvm invalidate acknowldege state across power-gating - * off cycle, add semaphore acquire before invalidation and semaphore - * release after invalidation to avoid entering power gated state - * to WA the Issue - */ - - /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (use_semaphore) { - for (i = 0; i < adev->usec_timeout; i++) { - /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, hub_ip); - - if (tmp & 0x1) - break; - udelay(1); - } - - if (i >= adev->usec_timeout) - DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); - } - - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng, - inv_req, hub_ip); - - /* - * Issue a dummy read to wait for the ACK register to be cleared - * to avoid a false ACK due to the new fast GRBM interface. - */ - if ((vmhub == AMDGPU_GFXHUB(0)) && - (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0))) - RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng, hub_ip); - - /* Wait for ACK with a delay.*/ - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + - hub->eng_distance * eng, hub_ip); - - tmp &= 1 << vmid; - if (tmp) - break; - - udelay(1); - } - - /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (use_semaphore) - /* - * add semaphore release after invalidation, - * write with 0 means semaphore release - */ - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, 0, hub_ip); - - spin_unlock(&adev->gmc.invalidate_lock); - - if (i < adev->usec_timeout) - return; - - DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub); -} - /** * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback * @@ -325,11 +244,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct dma_fence *fence; - struct amdgpu_job *job; + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); + /* Use register 17 for GART */ + const unsigned int eng = 17; + unsigned char hub_ip = 0; + u32 sem, req, ack; + unsigned int i; + u32 tmp; - int r; + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ adev->hdp.funcs->flush_hdp(adev, NULL); @@ -340,66 +267,65 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && down_read_trylock(&adev->reset_domain->sem)) { - struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - const unsigned int eng = 17; - u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); - u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; - u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); - up_read(&adev->reset_domain->sem); return; } - mutex_lock(&adev->mman.gtt_window_lock); + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; - if (vmhub == AMDGPU_MMHUB0(0)) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; - } - - BUG_ON(vmhub != AMDGPU_GFXHUB(0)); - - if (!adev->mman.buffer_funcs_enabled || - !adev->ib_pool_ready || - amdgpu_in_reset(adev) || - ring->sched.ready == false) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; - } - - /* The SDMA on Navi has a bug which can theoretically result in memory - * corruption if an invalidation happens at the same time as an VA - * translation. Avoid this by doing the invalidation from the SDMA - * itself. + spin_lock(&adev->gmc.invalidate_lock); + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue */ - r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, - AMDGPU_FENCE_OWNER_UNDEFINED, - 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, - &job); - if (r) - goto error_alloc; - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); - job->vm_needs_flush = true; - job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - fence = amdgpu_job_submit(job); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) { + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_RLC_NO_KIQ(sem, hub_ip); + if (tmp & 0x1) + break; + udelay(1); + } - mutex_unlock(&adev->mman.gtt_window_lock); + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } - dma_fence_wait(fence, false); - dma_fence_put(fence); + WREG32_RLC_NO_KIQ(req, inv_req, hub_ip); - return; + /* + * Issue a dummy read to wait for the ACK register to be cleared + * to avoid a false ACK due to the new fast GRBM interface. + */ + if ((vmhub == AMDGPU_GFXHUB(0)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0))) + RREG32_RLC_NO_KIQ(req, hub_ip); -error_alloc: - mutex_unlock(&adev->mman.gtt_window_lock); - DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); + /* Wait for ACK with a delay.*/ + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32_RLC_NO_KIQ(ack, hub_ip); + tmp &= 1 << vmid; + if (tmp) + break; + + udelay(1); + } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (use_semaphore) + WREG32_RLC_NO_KIQ(sem, 0, hub_ip); + + spin_unlock(&adev->gmc.invalidate_lock); + + if (i >= adev->usec_timeout) + dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n", + vmhub); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 69f65e9c4f93..f9c1ff2d940b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -271,7 +271,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (i < adev->usec_timeout) return; - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); + dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n"); } /**