Skip to content

Commit 12578c0

Browse files
committed
drm/msm/gpu: Skip retired submits in recover worker
If we somehow raced with submit retiring, either while waiting for worker to have a chance to run or acquiring the gpu lock, then the recover worker should just bail. Signed-off-by: Rob Clark <[email protected]> Patchwork: https://patchwork.freedesktop.org/patch/568034/
1 parent 4bea53b commit 12578c0

File tree

1 file changed

+22
-19
lines changed

1 file changed

+22
-19
lines changed

drivers/gpu/drm/msm/msm_gpu.c

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -365,29 +365,31 @@ static void recover_worker(struct kthread_work *work)
365365
DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
366366

367367
submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
368-
if (submit) {
369-
/* Increment the fault counts */
370-
submit->queue->faults++;
371-
if (submit->aspace)
372-
submit->aspace->faults++;
373368

374-
get_comm_cmdline(submit, &comm, &cmd);
369+
/*
370+
* If the submit retired while we were waiting for the worker to run,
371+
* or waiting to acquire the gpu lock, then nothing more to do.
372+
*/
373+
if (!submit)
374+
goto out_unlock;
375375

376-
if (comm && cmd) {
377-
DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
378-
gpu->name, comm, cmd);
376+
/* Increment the fault counts */
377+
submit->queue->faults++;
378+
if (submit->aspace)
379+
submit->aspace->faults++;
379380

380-
msm_rd_dump_submit(priv->hangrd, submit,
381-
"offending task: %s (%s)", comm, cmd);
382-
} else {
383-
msm_rd_dump_submit(priv->hangrd, submit, NULL);
384-
}
381+
get_comm_cmdline(submit, &comm, &cmd);
382+
383+
if (comm && cmd) {
384+
DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
385+
gpu->name, comm, cmd);
386+
387+
msm_rd_dump_submit(priv->hangrd, submit,
388+
"offending task: %s (%s)", comm, cmd);
385389
} else {
386-
/*
387-
* We couldn't attribute this fault to any particular context,
388-
* so increment the global fault count instead.
389-
*/
390-
gpu->global_faults++;
390+
DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n", gpu->name);
391+
392+
msm_rd_dump_submit(priv->hangrd, submit, NULL);
391393
}
392394

393395
/* Record the crash state */
@@ -440,6 +442,7 @@ static void recover_worker(struct kthread_work *work)
440442

441443
pm_runtime_put(&gpu->pdev->dev);
442444

445+
out_unlock:
443446
mutex_unlock(&gpu->lock);
444447

445448
msm_gpu_retire(gpu);

0 commit comments

Comments
 (0)