mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 04:04:43 +01:00
Starting from NPU6, the driver can pass boot parameters address through
the AON retention register and toggle between cold/warm boot types using
the boot_type parameter, while setting the cold boot entry point in both
cases.
Refactor the existing cold/warm boot handling to be consistent with the
new NPU6 boot flow requirements and still maintain compatibility with
older boot flows.
This will allow firmware to remove support for legacy warm boot starting
from NPU6.
Fixes: 550f4dd2ce ("accel/ivpu: Add support for Nova Lake's NPU")
Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Reviewed-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
Reviewed-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com>
Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
Link: https://patch.msgid.link/20251230142116.540026-1-maciej.falkowski@linux.intel.com
514 lines
12 KiB
C
514 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2020-2024 Intel Corporation
|
|
*/
|
|
|
|
#include <linux/highmem.h>
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/pm_runtime.h>
|
|
#include <linux/reboot.h>
|
|
|
|
#include "ivpu_coredump.h"
|
|
#include "ivpu_drv.h"
|
|
#include "ivpu_fw.h"
|
|
#include "ivpu_fw_log.h"
|
|
#include "ivpu_hw.h"
|
|
#include "ivpu_ipc.h"
|
|
#include "ivpu_job.h"
|
|
#include "ivpu_jsm_msg.h"
|
|
#include "ivpu_mmu.h"
|
|
#include "ivpu_ms.h"
|
|
#include "ivpu_pm.h"
|
|
#include "ivpu_trace.h"
|
|
#include "vpu_boot_api.h"
|
|
|
|
static bool ivpu_disable_recovery;
|
|
#if IS_ENABLED(CONFIG_DRM_ACCEL_IVPU_DEBUG)
|
|
module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
|
|
MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected");
|
|
#endif
|
|
|
|
static unsigned long ivpu_tdr_timeout_ms;
|
|
module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
|
|
MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
|
|
|
|
static unsigned long ivpu_inference_timeout_ms;
|
|
module_param_named(inference_timeout_ms, ivpu_inference_timeout_ms, ulong, 0644);
|
|
MODULE_PARM_DESC(inference_timeout_ms, "Inference maximum duration, in milliseconds, 0 - default");
|
|
|
|
#define PM_RESCHEDULE_LIMIT 5
|
|
|
|
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
|
|
{
|
|
struct ivpu_fw_info *fw = vdev->fw;
|
|
|
|
ivpu_cmdq_reset_all_contexts(vdev);
|
|
ivpu_ipc_reset(vdev);
|
|
ivpu_fw_log_reset(vdev);
|
|
ivpu_fw_load(vdev);
|
|
fw->last_heartbeat = 0;
|
|
|
|
ivpu_dbg(vdev, FW_BOOT, "Cold boot entry point 0x%llx", vdev->fw->cold_boot_entry_point);
|
|
fw->next_boot_mode = VPU_BOOT_TYPE_COLDBOOT;
|
|
}
|
|
|
|
static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
|
|
{
|
|
struct ivpu_fw_info *fw = vdev->fw;
|
|
struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem_bp);
|
|
|
|
fw->warm_boot_entry_point = bp->save_restore_ret_address;
|
|
if (!fw->warm_boot_entry_point) {
|
|
ivpu_pm_prepare_cold_boot(vdev);
|
|
return;
|
|
}
|
|
|
|
ivpu_dbg(vdev, FW_BOOT, "Warm boot entry point 0x%llx", fw->warm_boot_entry_point);
|
|
fw->next_boot_mode = VPU_BOOT_TYPE_WARMBOOT;
|
|
}
|
|
|
|
static int ivpu_suspend(struct ivpu_device *vdev)
|
|
{
|
|
int ret;
|
|
|
|
ivpu_prepare_for_reset(vdev);
|
|
|
|
ret = ivpu_shutdown(vdev);
|
|
if (ret)
|
|
ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int ivpu_resume(struct ivpu_device *vdev)
|
|
{
|
|
int ret;
|
|
|
|
retry:
|
|
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
|
|
pci_restore_state(to_pci_dev(vdev->drm.dev));
|
|
|
|
ret = ivpu_hw_power_up(vdev);
|
|
if (ret) {
|
|
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
|
|
goto err_power_down;
|
|
}
|
|
|
|
ret = ivpu_mmu_enable(vdev);
|
|
if (ret) {
|
|
ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
|
|
goto err_power_down;
|
|
}
|
|
|
|
ret = ivpu_boot(vdev);
|
|
if (ret)
|
|
goto err_mmu_disable;
|
|
|
|
return 0;
|
|
|
|
err_mmu_disable:
|
|
ivpu_mmu_disable(vdev);
|
|
err_power_down:
|
|
ivpu_hw_power_down(vdev);
|
|
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
|
|
|
|
if (ivpu_fw_is_warm_boot(vdev)) {
|
|
ivpu_pm_prepare_cold_boot(vdev);
|
|
goto retry;
|
|
} else {
|
|
ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void ivpu_pm_reset_begin(struct ivpu_device *vdev)
|
|
{
|
|
pm_runtime_disable(vdev->drm.dev);
|
|
|
|
atomic_inc(&vdev->pm->reset_counter);
|
|
atomic_set(&vdev->pm->reset_pending, 1);
|
|
down_write(&vdev->pm->reset_lock);
|
|
}
|
|
|
|
static void ivpu_pm_reset_complete(struct ivpu_device *vdev)
|
|
{
|
|
int ret;
|
|
|
|
ivpu_pm_prepare_cold_boot(vdev);
|
|
ivpu_jobs_abort_all(vdev);
|
|
ivpu_ms_cleanup_all(vdev);
|
|
|
|
ret = ivpu_resume(vdev);
|
|
if (ret) {
|
|
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
|
|
pm_runtime_set_suspended(vdev->drm.dev);
|
|
} else {
|
|
pm_runtime_set_active(vdev->drm.dev);
|
|
}
|
|
|
|
up_write(&vdev->pm->reset_lock);
|
|
atomic_set(&vdev->pm->reset_pending, 0);
|
|
|
|
pm_runtime_mark_last_busy(vdev->drm.dev);
|
|
pm_runtime_enable(vdev->drm.dev);
|
|
}
|
|
|
|
static void ivpu_pm_recovery_work(struct work_struct *work)
|
|
{
|
|
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
|
|
struct ivpu_device *vdev = pm->vdev;
|
|
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
|
|
|
|
ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
|
|
|
|
ivpu_pm_reset_begin(vdev);
|
|
|
|
if (!pm_runtime_status_suspended(vdev->drm.dev)) {
|
|
ivpu_jsm_state_dump(vdev);
|
|
ivpu_dev_coredump(vdev);
|
|
ivpu_suspend(vdev);
|
|
}
|
|
|
|
ivpu_pm_reset_complete(vdev);
|
|
|
|
kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
|
|
}
|
|
|
|
void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
|
|
{
|
|
ivpu_err(vdev, "Recovery triggered by %s\n", reason);
|
|
|
|
if (ivpu_disable_recovery) {
|
|
ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n");
|
|
return;
|
|
}
|
|
|
|
/* Trigger recovery if it's not in progress */
|
|
if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
|
|
ivpu_hw_diagnose_failure(vdev);
|
|
ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */
|
|
queue_work(system_dfl_wq, &vdev->pm->recovery_work);
|
|
}
|
|
}
|
|
|
|
static void ivpu_job_timeout_work(struct work_struct *work)
|
|
{
|
|
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
|
|
struct ivpu_device *vdev = pm->vdev;
|
|
unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
|
|
unsigned long inference_timeout_ms = ivpu_inference_timeout_ms ? ivpu_inference_timeout_ms :
|
|
vdev->timeout.inference;
|
|
u64 inference_max_retries;
|
|
u64 heartbeat;
|
|
|
|
if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) {
|
|
ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n");
|
|
goto recovery;
|
|
}
|
|
|
|
inference_max_retries = DIV_ROUND_UP(inference_timeout_ms, timeout_ms);
|
|
if (atomic_fetch_inc(&vdev->job_timeout_counter) >= inference_max_retries) {
|
|
ivpu_err(vdev, "Job timeout detected, heartbeat limit (%lld) exceeded\n",
|
|
inference_max_retries);
|
|
goto recovery;
|
|
}
|
|
|
|
vdev->fw->last_heartbeat = heartbeat;
|
|
ivpu_start_job_timeout_detection(vdev);
|
|
return;
|
|
|
|
recovery:
|
|
atomic_set(&vdev->job_timeout_counter, 0);
|
|
ivpu_pm_trigger_recovery(vdev, "TDR");
|
|
}
|
|
|
|
void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
|
|
{
|
|
unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
|
|
|
|
/* No-op if already queued */
|
|
queue_delayed_work(system_percpu_wq, &vdev->pm->job_timeout_work,
|
|
msecs_to_jiffies(timeout_ms));
|
|
}
|
|
|
|
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
|
|
{
|
|
cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
|
|
atomic_set(&vdev->job_timeout_counter, 0);
|
|
}
|
|
|
|
int ivpu_pm_suspend_cb(struct device *dev)
|
|
{
|
|
struct drm_device *drm = dev_get_drvdata(dev);
|
|
struct ivpu_device *vdev = to_ivpu_device(drm);
|
|
unsigned long timeout;
|
|
|
|
trace_pm("suspend");
|
|
ivpu_dbg(vdev, PM, "Suspend..\n");
|
|
|
|
timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr);
|
|
while (!ivpu_hw_is_idle(vdev)) {
|
|
cond_resched();
|
|
if (time_after_eq(jiffies, timeout)) {
|
|
ivpu_err(vdev, "Failed to enter idle on system suspend\n");
|
|
return -EBUSY;
|
|
}
|
|
}
|
|
|
|
ivpu_jsm_pwr_d0i3_enter(vdev);
|
|
|
|
ivpu_suspend(vdev);
|
|
ivpu_pm_prepare_warm_boot(vdev);
|
|
|
|
ivpu_dbg(vdev, PM, "Suspend done.\n");
|
|
trace_pm("suspend done");
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ivpu_pm_resume_cb(struct device *dev)
|
|
{
|
|
struct drm_device *drm = dev_get_drvdata(dev);
|
|
struct ivpu_device *vdev = to_ivpu_device(drm);
|
|
int ret;
|
|
|
|
trace_pm("resume");
|
|
ivpu_dbg(vdev, PM, "Resume..\n");
|
|
|
|
ret = ivpu_resume(vdev);
|
|
if (ret)
|
|
ivpu_err(vdev, "Failed to resume: %d\n", ret);
|
|
|
|
ivpu_dbg(vdev, PM, "Resume done.\n");
|
|
trace_pm("resume done");
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ivpu_pm_runtime_suspend_cb(struct device *dev)
|
|
{
|
|
struct drm_device *drm = dev_get_drvdata(dev);
|
|
struct ivpu_device *vdev = to_ivpu_device(drm);
|
|
int ret, ret_d0i3;
|
|
bool is_idle;
|
|
|
|
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
|
|
drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work));
|
|
|
|
trace_pm("runtime suspend");
|
|
ivpu_dbg(vdev, PM, "Runtime suspend..\n");
|
|
|
|
ivpu_mmu_disable(vdev);
|
|
|
|
is_idle = ivpu_hw_is_idle(vdev) || vdev->pm->dct_active_percent;
|
|
if (!is_idle)
|
|
ivpu_err(vdev, "NPU is not idle before autosuspend\n");
|
|
|
|
ret_d0i3 = ivpu_jsm_pwr_d0i3_enter(vdev);
|
|
if (ret_d0i3)
|
|
ivpu_err(vdev, "Failed to prepare for d0i3: %d\n", ret_d0i3);
|
|
|
|
ret = ivpu_suspend(vdev);
|
|
if (ret)
|
|
ivpu_err(vdev, "Failed to suspend NPU: %d\n", ret);
|
|
|
|
if (!is_idle || ret_d0i3) {
|
|
ivpu_err(vdev, "Forcing cold boot due to previous errors\n");
|
|
atomic_inc(&vdev->pm->reset_counter);
|
|
ivpu_dev_coredump(vdev);
|
|
ivpu_pm_prepare_cold_boot(vdev);
|
|
} else {
|
|
ivpu_pm_prepare_warm_boot(vdev);
|
|
}
|
|
|
|
ivpu_dbg(vdev, PM, "Runtime suspend done.\n");
|
|
trace_pm("runtime suspend done");
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ivpu_pm_runtime_resume_cb(struct device *dev)
|
|
{
|
|
struct drm_device *drm = dev_get_drvdata(dev);
|
|
struct ivpu_device *vdev = to_ivpu_device(drm);
|
|
int ret;
|
|
|
|
trace_pm("runtime resume");
|
|
ivpu_dbg(vdev, PM, "Runtime resume..\n");
|
|
|
|
ret = ivpu_resume(vdev);
|
|
if (ret)
|
|
ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
|
|
|
|
ivpu_dbg(vdev, PM, "Runtime resume done.\n");
|
|
trace_pm("runtime resume done");
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ivpu_rpm_get(struct ivpu_device *vdev)
|
|
{
|
|
int ret;
|
|
|
|
ret = pm_runtime_resume_and_get(vdev->drm.dev);
|
|
if (ret < 0) {
|
|
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
|
|
pm_runtime_set_suspended(vdev->drm.dev);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void ivpu_rpm_put(struct ivpu_device *vdev)
|
|
{
|
|
pm_runtime_put_autosuspend(vdev->drm.dev);
|
|
}
|
|
|
|
void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
|
|
{
|
|
struct ivpu_device *vdev = pci_get_drvdata(pdev);
|
|
|
|
ivpu_dbg(vdev, PM, "Pre-reset..\n");
|
|
|
|
ivpu_pm_reset_begin(vdev);
|
|
|
|
if (!pm_runtime_status_suspended(vdev->drm.dev)) {
|
|
ivpu_prepare_for_reset(vdev);
|
|
ivpu_hw_reset(vdev);
|
|
}
|
|
|
|
ivpu_dbg(vdev, PM, "Pre-reset done.\n");
|
|
}
|
|
|
|
void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
|
|
{
|
|
struct ivpu_device *vdev = pci_get_drvdata(pdev);
|
|
|
|
ivpu_dbg(vdev, PM, "Post-reset..\n");
|
|
|
|
ivpu_pm_reset_complete(vdev);
|
|
|
|
ivpu_dbg(vdev, PM, "Post-reset done.\n");
|
|
}
|
|
|
|
void ivpu_pm_init(struct ivpu_device *vdev)
|
|
{
|
|
struct device *dev = vdev->drm.dev;
|
|
struct ivpu_pm_info *pm = vdev->pm;
|
|
int delay;
|
|
|
|
pm->vdev = vdev;
|
|
|
|
init_rwsem(&pm->reset_lock);
|
|
atomic_set(&pm->reset_pending, 0);
|
|
atomic_set(&pm->reset_counter, 0);
|
|
|
|
INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
|
|
INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
|
|
|
|
if (ivpu_disable_recovery)
|
|
delay = -1;
|
|
else
|
|
delay = vdev->timeout.autosuspend;
|
|
|
|
pm_runtime_use_autosuspend(dev);
|
|
pm_runtime_set_autosuspend_delay(dev, delay);
|
|
pm_runtime_set_active(dev);
|
|
|
|
ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay);
|
|
}
|
|
|
|
void ivpu_pm_disable_recovery(struct ivpu_device *vdev)
|
|
{
|
|
drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work));
|
|
disable_work_sync(&vdev->pm->recovery_work);
|
|
}
|
|
|
|
void ivpu_pm_enable(struct ivpu_device *vdev)
|
|
{
|
|
struct device *dev = vdev->drm.dev;
|
|
|
|
pm_runtime_allow(dev);
|
|
pm_runtime_put_autosuspend(dev);
|
|
}
|
|
|
|
void ivpu_pm_disable(struct ivpu_device *vdev)
|
|
{
|
|
pm_runtime_get_noresume(vdev->drm.dev);
|
|
pm_runtime_forbid(vdev->drm.dev);
|
|
}
|
|
|
|
int ivpu_pm_dct_init(struct ivpu_device *vdev)
|
|
{
|
|
if (vdev->pm->dct_active_percent)
|
|
return ivpu_pm_dct_enable(vdev, vdev->pm->dct_active_percent);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent)
|
|
{
|
|
u32 active_us, inactive_us;
|
|
int ret;
|
|
|
|
if (active_percent == 0 || active_percent > 100)
|
|
return -EINVAL;
|
|
|
|
active_us = (DCT_PERIOD_US * active_percent) / 100;
|
|
inactive_us = DCT_PERIOD_US - active_us;
|
|
|
|
vdev->pm->dct_active_percent = active_percent;
|
|
|
|
ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n",
|
|
active_percent, active_us, inactive_us);
|
|
|
|
ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us);
|
|
if (ret) {
|
|
ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ivpu_pm_dct_disable(struct ivpu_device *vdev)
|
|
{
|
|
int ret;
|
|
|
|
vdev->pm->dct_active_percent = 0;
|
|
|
|
ivpu_dbg(vdev, PM, "DCT requested to be disabled\n");
|
|
|
|
ret = ivpu_jsm_dct_disable(vdev);
|
|
if (ret) {
|
|
ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void ivpu_pm_irq_dct_work_fn(struct work_struct *work)
|
|
{
|
|
struct ivpu_device *vdev = container_of(work, struct ivpu_device, irq_dct_work);
|
|
bool enable;
|
|
int ret;
|
|
|
|
if (ivpu_hw_btrs_dct_get_request(vdev, &enable))
|
|
return;
|
|
|
|
if (enable)
|
|
ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT);
|
|
else
|
|
ret = ivpu_pm_dct_disable(vdev);
|
|
|
|
if (!ret) {
|
|
/* Convert percent to U1.7 format */
|
|
u8 val = DIV_ROUND_CLOSEST(vdev->pm->dct_active_percent * 128, 100);
|
|
|
|
ivpu_hw_btrs_dct_set_status(vdev, enable, val);
|
|
}
|
|
|
|
}
|