| From 3102e81009934e70182f1883174dfff534c7cad2 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Sun, 14 Nov 2021 12:38:18 -0500 |
| Subject: drm/amd/amdkfd: Fix kernel panic when reset failed and been triggered |
| again |
| |
| From: shaoyunl <shaoyun.liu@amd.com> |
| |
| [ Upstream commit 2cf49e00d40d5132e3d067b5aa6d84791929ab15 ] |
| |
| In SRIOV configuration, the reset may failed to bring asic back to normal but stop cpsch |
| already been called, the start_cpsch will not be called since there is no resume in this |
| case. When reset been triggered again, driver should avoid to do uninitialization again. |
| |
| Signed-off-by: shaoyunl <shaoyun.liu@amd.com> |
| Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> |
| Signed-off-by: Alex Deucher <alexander.deucher@amd.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 5 +++++ |
| 1 file changed, 5 insertions(+) |
| |
| diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |
| index 352a32dc609b2..2645ebc63a14d 100644 |
| --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |
| +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |
| @@ -1207,6 +1207,11 @@ static int stop_cpsch(struct device_queue_manager *dqm) |
| bool hanging; |
| |
| dqm_lock(dqm); |
| + if (!dqm->sched_running) { |
| + dqm_unlock(dqm); |
| + return 0; |
| + } |
| + |
| if (!dqm->is_hws_hang) |
| unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); |
| hanging = dqm->is_hws_hang || dqm->is_resetting; |
| -- |
| 2.33.0 |
| |