drm/amdkfd: Add device repartition support
GFX9.4.3 will support dynamic repartitioning of the GPU through sysfs.
Add device repartitioning support in KFD to repartition GPU from one
mode to other.
v2: squash in fix ("drm/amdkfd: Fix warning kgd2kfd_unlock_kfd defined but not used")
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
fe1f05df59
commit
0c7315e7d5
@@ -773,3 +773,13 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
|
||||
{
|
||||
return kgd2kfd_check_and_lock_kfd();
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
|
||||
{
|
||||
kgd2kfd_unlock_kfd();
|
||||
}
|
||||
|
||||
@@ -151,6 +151,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
|
||||
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
|
||||
void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
|
||||
enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
@@ -373,6 +375,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd);
|
||||
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
|
||||
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
|
||||
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
|
||||
int kgd2kfd_check_and_lock_kfd(void);
|
||||
void kgd2kfd_unlock_kfd(void);
|
||||
#else
|
||||
static inline int kgd2kfd_init(void)
|
||||
{
|
||||
@@ -438,5 +442,14 @@ static inline
|
||||
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int kgd2kfd_check_and_lock_kfd(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kgd2kfd_unlock_kfd(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
|
||||
|
||||
@@ -1233,10 +1233,30 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!adev->kfd.init_complete)
|
||||
return -EPERM;
|
||||
|
||||
mutex_lock(&adev->gfx.partition_mutex);
|
||||
|
||||
ret = adev->gfx.funcs->switch_partition_mode(adev, mode);
|
||||
if (mode == adev->gfx.funcs->query_partition_mode(adev))
|
||||
goto out;
|
||||
|
||||
ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
amdgpu_amdkfd_device_fini_sw(adev);
|
||||
|
||||
adev->gfx.funcs->switch_partition_mode(adev, mode);
|
||||
|
||||
amdgpu_amdkfd_device_probe(adev);
|
||||
amdgpu_amdkfd_device_init(adev);
|
||||
/* If KFD init failed, return failure */
|
||||
if (!adev->kfd.init_complete)
|
||||
ret = -EIO;
|
||||
|
||||
amdgpu_amdkfd_unlock_kfd(adev);
|
||||
out:
|
||||
mutex_unlock(&adev->gfx.partition_mutex);
|
||||
|
||||
if (ret)
|
||||
|
||||
@@ -675,7 +675,7 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
|
||||
static enum amdgpu_gfx_partition
|
||||
gfx_v9_4_3_query_compute_partition(struct amdgpu_device *adev)
|
||||
{
|
||||
enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
|
||||
enum amdgpu_gfx_partition mode = adev->gfx.partition_mode;
|
||||
|
||||
if (adev->nbio.funcs->get_compute_partition_mode)
|
||||
mode = adev->nbio.funcs->get_compute_partition_mode(adev);
|
||||
@@ -689,9 +689,6 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
|
||||
u32 tmp = 0;
|
||||
int num_xcc_per_partition, i, num_xcc;
|
||||
|
||||
if (mode == adev->gfx.partition_mode)
|
||||
return mode;
|
||||
|
||||
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
|
||||
switch (mode) {
|
||||
case AMDGPU_SPX_PARTITION_MODE:
|
||||
|
||||
@@ -1356,6 +1356,27 @@ unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node)
|
||||
kfd_get_num_sdma_engines(node);
|
||||
}
|
||||
|
||||
int kgd2kfd_check_and_lock_kfd(void)
|
||||
{
|
||||
mutex_lock(&kfd_processes_mutex);
|
||||
if (!hash_empty(kfd_processes_table) || kfd_is_locked()) {
|
||||
mutex_unlock(&kfd_processes_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
++kfd_locked;
|
||||
mutex_unlock(&kfd_processes_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kgd2kfd_unlock_kfd(void)
|
||||
{
|
||||
mutex_lock(&kfd_processes_mutex);
|
||||
--kfd_locked;
|
||||
mutex_unlock(&kfd_processes_mutex);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
/* This function will send a package to HIQ to hang the HWS
|
||||
|
||||
Reference in New Issue
Block a user