mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 04:04:43 +01:00
accel/amdxdna: Stop job scheduling across aie2_release_resource()
Running jobs on a hardware context while it is in the process of
releasing resources can lead to use-after-free and crashes.
Fix this by stopping job scheduling before calling
aie2_release_resource() and restarting it after the release completes.
Additionally, aie2_sched_job_run() now checks whether the hardware
context is still active.
Fixes: 4fd6ca90fc ("accel/amdxdna: Refactor hardware context destroy routine")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260130003255.2083255-1-lizhi.hou@amd.com
This commit is contained in:
parent
a9162439ad
commit
f1370241fe
1 changed files with 6 additions and 0 deletions
|
|
@ -315,6 +315,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
|
|||
struct dma_fence *fence;
|
||||
int ret;
|
||||
|
||||
if (hwctx->status != HWCTX_STAT_READY)
|
||||
return NULL;
|
||||
|
||||
if (!mmget_not_zero(job->mm))
|
||||
return ERR_PTR(-ESRCH);
|
||||
|
||||
|
|
@ -705,7 +708,10 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
|
|||
aie2_hwctx_wait_for_idle(hwctx);
|
||||
|
||||
/* Request fw to destroy hwctx and cancel the rest pending requests */
|
||||
drm_sched_stop(&hwctx->priv->sched, NULL);
|
||||
aie2_release_resource(hwctx);
|
||||
hwctx->status = HWCTX_STAT_STOP;
|
||||
drm_sched_start(&hwctx->priv->sched, 0);
|
||||
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
drm_sched_entity_destroy(&hwctx->priv->entity);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue