mirror of
https://github.com/torvalds/linux.git
synced 2026-03-07 23:04:33 +01:00
If the job submit fails before adding the job to the scheduler queue
such as when the GEM buffer bounds checks fail, then doing a
ethosu_job_put() results in a pm_runtime_put_autosuspend() without the
corresponding pm_runtime_resume_and_get(). The dma_fence_put()'s are
also unnecessary, but seem to be harmless.
Split the ethosu_job_cleanup() function into 2 parts for the before
and after the job is queued.
Fixes: 5a5e9c0228 ("accel: Add Arm Ethos-U NPU driver")
Reviewed-and-Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://patch.msgid.link/20260218-ethos-fixes-v1-1-be3fa3ea9a30@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
507 lines
12 KiB
C
507 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0-only OR MIT
|
|
/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
|
|
/* Copyright 2025 Arm, Ltd. */
|
|
|
|
#include <linux/bitfield.h>
|
|
#include <linux/genalloc.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/iopoll.h>
|
|
#include <linux/platform_device.h>
|
|
#include <linux/pm_runtime.h>
|
|
|
|
#include <drm/drm_file.h>
|
|
#include <drm/drm_gem.h>
|
|
#include <drm/drm_gem_dma_helper.h>
|
|
#include <drm/drm_print.h>
|
|
#include <drm/ethosu_accel.h>
|
|
|
|
#include "ethosu_device.h"
|
|
#include "ethosu_drv.h"
|
|
#include "ethosu_gem.h"
|
|
#include "ethosu_job.h"
|
|
|
|
#define JOB_TIMEOUT_MS 500
|
|
|
|
static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job)
|
|
{
|
|
return container_of(sched_job, struct ethosu_job, base);
|
|
}
|
|
|
|
static const char *ethosu_fence_get_driver_name(struct dma_fence *fence)
|
|
{
|
|
return "ethosu";
|
|
}
|
|
|
|
static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence)
|
|
{
|
|
return "ethosu-npu";
|
|
}
|
|
|
|
static const struct dma_fence_ops ethosu_fence_ops = {
|
|
.get_driver_name = ethosu_fence_get_driver_name,
|
|
.get_timeline_name = ethosu_fence_get_timeline_name,
|
|
};
|
|
|
|
static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job)
|
|
{
|
|
struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo);
|
|
struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info;
|
|
|
|
for (int i = 0; i < job->region_cnt; i++) {
|
|
struct drm_gem_dma_object *bo;
|
|
int region = job->region_bo_num[i];
|
|
|
|
bo = to_drm_gem_dma_obj(job->region_bo[i]);
|
|
writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region));
|
|
writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region));
|
|
dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr);
|
|
}
|
|
|
|
if (job->sram_size) {
|
|
writel_relaxed(lower_32_bits(dev->sramphys),
|
|
dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION));
|
|
writel_relaxed(upper_32_bits(dev->sramphys),
|
|
dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION));
|
|
dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n",
|
|
ETHOSU_SRAM_REGION, &dev->sramphys);
|
|
}
|
|
|
|
writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE);
|
|
writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI);
|
|
writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE);
|
|
|
|
writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD);
|
|
|
|
dev_dbg(dev->base.dev,
|
|
"Submitted cmd at %pad to core\n", &cmd_bo->dma_addr);
|
|
}
|
|
|
|
static int ethosu_acquire_object_fences(struct ethosu_job *job)
|
|
{
|
|
int i, ret;
|
|
struct drm_gem_object **bos = job->region_bo;
|
|
struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
|
|
|
|
for (i = 0; i < job->region_cnt; i++) {
|
|
bool is_write;
|
|
|
|
if (!bos[i])
|
|
break;
|
|
|
|
ret = dma_resv_reserve_fences(bos[i]->resv, 1);
|
|
if (ret)
|
|
return ret;
|
|
|
|
is_write = info->output_region[job->region_bo_num[i]];
|
|
ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i],
|
|
is_write);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void ethosu_attach_object_fences(struct ethosu_job *job)
|
|
{
|
|
int i;
|
|
struct dma_fence *fence = job->inference_done_fence;
|
|
struct drm_gem_object **bos = job->region_bo;
|
|
struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info;
|
|
|
|
for (i = 0; i < job->region_cnt; i++)
|
|
if (info->output_region[job->region_bo_num[i]])
|
|
dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
|
|
}
|
|
|
|
static int ethosu_job_push(struct ethosu_job *job)
|
|
{
|
|
struct ww_acquire_ctx acquire_ctx;
|
|
int ret;
|
|
|
|
ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = ethosu_acquire_object_fences(job);
|
|
if (ret)
|
|
goto out;
|
|
|
|
ret = pm_runtime_resume_and_get(job->dev->base.dev);
|
|
if (!ret) {
|
|
guard(mutex)(&job->dev->sched_lock);
|
|
|
|
drm_sched_job_arm(&job->base);
|
|
job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
|
|
kref_get(&job->refcount); /* put by scheduler job completion */
|
|
drm_sched_entity_push_job(&job->base);
|
|
ethosu_attach_object_fences(job);
|
|
}
|
|
|
|
out:
|
|
drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx);
|
|
return ret;
|
|
}
|
|
|
|
static void ethosu_job_err_cleanup(struct ethosu_job *job)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < job->region_cnt; i++)
|
|
drm_gem_object_put(job->region_bo[i]);
|
|
|
|
drm_gem_object_put(job->cmd_bo);
|
|
|
|
kfree(job);
|
|
}
|
|
|
|
static void ethosu_job_cleanup(struct kref *ref)
|
|
{
|
|
struct ethosu_job *job = container_of(ref, struct ethosu_job,
|
|
refcount);
|
|
|
|
pm_runtime_put_autosuspend(job->dev->base.dev);
|
|
|
|
dma_fence_put(job->done_fence);
|
|
dma_fence_put(job->inference_done_fence);
|
|
|
|
ethosu_job_err_cleanup(job);
|
|
}
|
|
|
|
static void ethosu_job_put(struct ethosu_job *job)
|
|
{
|
|
kref_put(&job->refcount, ethosu_job_cleanup);
|
|
}
|
|
|
|
static void ethosu_job_free(struct drm_sched_job *sched_job)
|
|
{
|
|
struct ethosu_job *job = to_ethosu_job(sched_job);
|
|
|
|
drm_sched_job_cleanup(sched_job);
|
|
ethosu_job_put(job);
|
|
}
|
|
|
|
static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job)
|
|
{
|
|
struct ethosu_job *job = to_ethosu_job(sched_job);
|
|
struct ethosu_device *dev = job->dev;
|
|
struct dma_fence *fence = job->done_fence;
|
|
|
|
if (unlikely(job->base.s_fence->finished.error))
|
|
return NULL;
|
|
|
|
dma_fence_init(fence, ðosu_fence_ops, &dev->fence_lock,
|
|
dev->fence_context, ++dev->emit_seqno);
|
|
dma_fence_get(fence);
|
|
|
|
scoped_guard(mutex, &dev->job_lock) {
|
|
dev->in_flight_job = job;
|
|
ethosu_job_hw_submit(dev, job);
|
|
}
|
|
|
|
return fence;
|
|
}
|
|
|
|
static void ethosu_job_handle_irq(struct ethosu_device *dev)
|
|
{
|
|
u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
|
|
|
|
if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) {
|
|
dev_err(dev->base.dev, "Error IRQ - %x\n", status);
|
|
drm_sched_fault(&dev->sched);
|
|
return;
|
|
}
|
|
|
|
scoped_guard(mutex, &dev->job_lock) {
|
|
if (dev->in_flight_job) {
|
|
dma_fence_signal(dev->in_flight_job->done_fence);
|
|
dev->in_flight_job = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data)
|
|
{
|
|
struct ethosu_device *dev = data;
|
|
|
|
ethosu_job_handle_irq(dev);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
static irqreturn_t ethosu_job_irq_handler(int irq, void *data)
|
|
{
|
|
struct ethosu_device *dev = data;
|
|
u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
|
|
|
|
if (!(status & STATUS_IRQ_RAISED))
|
|
return IRQ_NONE;
|
|
|
|
writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD);
|
|
return IRQ_WAKE_THREAD;
|
|
}
|
|
|
|
static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad)
|
|
{
|
|
struct ethosu_job *job = to_ethosu_job(bad);
|
|
struct ethosu_device *dev = job->dev;
|
|
bool running;
|
|
u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr;
|
|
u32 cmdaddr;
|
|
|
|
cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD);
|
|
running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS));
|
|
|
|
if (running) {
|
|
int ret;
|
|
u32 reg;
|
|
|
|
ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD,
|
|
reg,
|
|
reg != cmdaddr,
|
|
USEC_PER_MSEC, 100 * USEC_PER_MSEC);
|
|
|
|
/* If still running and progress is being made, just return */
|
|
if (!ret)
|
|
return DRM_GPU_SCHED_STAT_NO_HANG;
|
|
}
|
|
|
|
dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n",
|
|
running ? "running" : "stopped",
|
|
cmdaddr, bocmds[cmdaddr / 4]);
|
|
|
|
drm_sched_stop(&dev->sched, bad);
|
|
|
|
scoped_guard(mutex, &dev->job_lock)
|
|
dev->in_flight_job = NULL;
|
|
|
|
/* Proceed with reset now. */
|
|
pm_runtime_force_suspend(dev->base.dev);
|
|
pm_runtime_force_resume(dev->base.dev);
|
|
|
|
/* Restart the scheduler */
|
|
drm_sched_start(&dev->sched, 0);
|
|
|
|
return DRM_GPU_SCHED_STAT_RESET;
|
|
}
|
|
|
|
static const struct drm_sched_backend_ops ethosu_sched_ops = {
|
|
.run_job = ethosu_job_run,
|
|
.timedout_job = ethosu_job_timedout,
|
|
.free_job = ethosu_job_free
|
|
};
|
|
|
|
int ethosu_job_init(struct ethosu_device *edev)
|
|
{
|
|
struct device *dev = edev->base.dev;
|
|
struct drm_sched_init_args args = {
|
|
.ops = ðosu_sched_ops,
|
|
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
|
|
.credit_limit = 1,
|
|
.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
|
|
.name = dev_name(dev),
|
|
.dev = dev,
|
|
};
|
|
int ret;
|
|
|
|
spin_lock_init(&edev->fence_lock);
|
|
ret = devm_mutex_init(dev, &edev->job_lock);
|
|
if (ret)
|
|
return ret;
|
|
ret = devm_mutex_init(dev, &edev->sched_lock);
|
|
if (ret)
|
|
return ret;
|
|
|
|
edev->irq = platform_get_irq(to_platform_device(dev), 0);
|
|
if (edev->irq < 0)
|
|
return edev->irq;
|
|
|
|
ret = devm_request_threaded_irq(dev, edev->irq,
|
|
ethosu_job_irq_handler,
|
|
ethosu_job_irq_handler_thread,
|
|
IRQF_SHARED, KBUILD_MODNAME,
|
|
edev);
|
|
if (ret) {
|
|
dev_err(dev, "failed to request irq\n");
|
|
return ret;
|
|
}
|
|
|
|
edev->fence_context = dma_fence_context_alloc(1);
|
|
|
|
ret = drm_sched_init(&edev->sched, &args);
|
|
if (ret) {
|
|
dev_err(dev, "Failed to create scheduler: %d\n", ret);
|
|
goto err_sched;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err_sched:
|
|
drm_sched_fini(&edev->sched);
|
|
return ret;
|
|
}
|
|
|
|
void ethosu_job_fini(struct ethosu_device *dev)
|
|
{
|
|
drm_sched_fini(&dev->sched);
|
|
}
|
|
|
|
int ethosu_job_open(struct ethosu_file_priv *ethosu_priv)
|
|
{
|
|
struct ethosu_device *dev = ethosu_priv->edev;
|
|
struct drm_gpu_scheduler *sched = &dev->sched;
|
|
int ret;
|
|
|
|
ret = drm_sched_entity_init(ðosu_priv->sched_entity,
|
|
DRM_SCHED_PRIORITY_NORMAL,
|
|
&sched, 1, NULL);
|
|
return WARN_ON(ret);
|
|
}
|
|
|
|
void ethosu_job_close(struct ethosu_file_priv *ethosu_priv)
|
|
{
|
|
struct drm_sched_entity *entity = ðosu_priv->sched_entity;
|
|
|
|
drm_sched_entity_destroy(entity);
|
|
}
|
|
|
|
static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
|
|
struct drm_ethosu_job *job)
|
|
{
|
|
struct ethosu_device *edev = to_ethosu_device(dev);
|
|
struct ethosu_file_priv *file_priv = file->driver_priv;
|
|
struct ethosu_job *ejob = NULL;
|
|
struct ethosu_validated_cmdstream_info *cmd_info;
|
|
int ret = 0;
|
|
|
|
/* BO region 2 is reserved if SRAM is used */
|
|
if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size)
|
|
return -EINVAL;
|
|
|
|
if (edev->npu_info.sram_size < job->sram_size)
|
|
return -EINVAL;
|
|
|
|
ejob = kzalloc_obj(*ejob);
|
|
if (!ejob)
|
|
return -ENOMEM;
|
|
|
|
kref_init(&ejob->refcount);
|
|
|
|
ejob->dev = edev;
|
|
ejob->sram_size = job->sram_size;
|
|
|
|
ejob->done_fence = kzalloc_obj(*ejob->done_fence);
|
|
if (!ejob->done_fence) {
|
|
ret = -ENOMEM;
|
|
goto out_cleanup_job;
|
|
}
|
|
|
|
ret = drm_sched_job_init(&ejob->base,
|
|
&file_priv->sched_entity,
|
|
1, NULL, file->client_id);
|
|
if (ret)
|
|
goto out_put_job;
|
|
|
|
ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo);
|
|
if (!ejob->cmd_bo) {
|
|
ret = -ENOENT;
|
|
goto out_cleanup_job;
|
|
}
|
|
cmd_info = to_ethosu_bo(ejob->cmd_bo)->info;
|
|
if (!cmd_info) {
|
|
ret = -EINVAL;
|
|
goto out_cleanup_job;
|
|
}
|
|
|
|
for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) {
|
|
struct drm_gem_object *gem;
|
|
|
|
/* Can only omit a BO handle if the region is not used or used for SRAM */
|
|
if (!job->region_bo_handles[i] &&
|
|
(!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size)))
|
|
continue;
|
|
|
|
if (job->region_bo_handles[i] && !cmd_info->region_size[i]) {
|
|
dev_err(dev->dev,
|
|
"Cmdstream BO handle %d set for unused region %d\n",
|
|
job->region_bo_handles[i], i);
|
|
ret = -EINVAL;
|
|
goto out_cleanup_job;
|
|
}
|
|
|
|
gem = drm_gem_object_lookup(file, job->region_bo_handles[i]);
|
|
if (!gem) {
|
|
dev_err(dev->dev,
|
|
"Invalid BO handle %d for region %d\n",
|
|
job->region_bo_handles[i], i);
|
|
ret = -ENOENT;
|
|
goto out_cleanup_job;
|
|
}
|
|
|
|
ejob->region_bo[ejob->region_cnt] = gem;
|
|
ejob->region_bo_num[ejob->region_cnt] = i;
|
|
ejob->region_cnt++;
|
|
|
|
if (to_ethosu_bo(gem)->info) {
|
|
dev_err(dev->dev,
|
|
"Cmdstream BO handle %d used for region %d\n",
|
|
job->region_bo_handles[i], i);
|
|
ret = -EINVAL;
|
|
goto out_cleanup_job;
|
|
}
|
|
|
|
/* Verify the command stream doesn't have accesses outside the BO */
|
|
if (cmd_info->region_size[i] > gem->size) {
|
|
dev_err(dev->dev,
|
|
"cmd stream region %d size greater than BO size (%llu > %zu)\n",
|
|
i, cmd_info->region_size[i], gem->size);
|
|
ret = -EOVERFLOW;
|
|
goto out_cleanup_job;
|
|
}
|
|
}
|
|
ret = ethosu_job_push(ejob);
|
|
if (!ret) {
|
|
ethosu_job_put(ejob);
|
|
return 0;
|
|
}
|
|
|
|
out_cleanup_job:
|
|
if (ret)
|
|
drm_sched_job_cleanup(&ejob->base);
|
|
out_put_job:
|
|
ethosu_job_err_cleanup(ejob);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file)
|
|
{
|
|
struct drm_ethosu_submit *args = data;
|
|
int ret = 0;
|
|
unsigned int i = 0;
|
|
|
|
if (args->pad) {
|
|
drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
struct drm_ethosu_job __free(kvfree) *jobs =
|
|
kvmalloc_objs(*jobs, args->job_count);
|
|
if (!jobs)
|
|
return -ENOMEM;
|
|
|
|
if (copy_from_user(jobs,
|
|
(void __user *)(uintptr_t)args->jobs,
|
|
args->job_count * sizeof(*jobs))) {
|
|
drm_dbg(dev, "Failed to copy incoming job array\n");
|
|
return -EFAULT;
|
|
}
|
|
|
|
for (i = 0; i < args->job_count; i++) {
|
|
ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|