From 36d9579fed6c9429aa172f77bd28c58696ce8e2b Mon Sep 17 00:00:00 2001
From: Francesco Lavra <flavra@baylibre.com>
Date: Tue, 10 Feb 2026 19:09:32 +0100
Subject: [PATCH 01/38] drm/solomon: Fix page start when updating rectangle in
 page addressing mode

In page addressing mode, the pixel values of a dirty rectangle must be sent
to the display controller one page at a time. The range of pages
corresponding to a given rectangle is being incorrectly calculated as if
the Y value of the top left coordinate of the rectangle was 0. This can
result in rectangle updates being displayed on wrong parts of the screen.

Fix the above issue by consolidating the start page calculation in a single
place at the beginning of the update_rect function, and using the
calculated value for all addressing modes.

Fixes: b0daaa5cfaa5 ("drm/ssd130x: Support page addressing mode")
Signed-off-by: Francesco Lavra <flavra@baylibre.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patch.msgid.link/20260210180932.736502-1-flavra@baylibre.com
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
---
 drivers/gpu/drm/solomon/ssd130x.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index 6ecf9e2ff61b..c77455b1834d 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -737,6 +737,7 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x,
 	unsigned int height = drm_rect_height(rect);
 	unsigned int line_length = DIV_ROUND_UP(width, 8);
 	unsigned int page_height = SSD130X_PAGE_HEIGHT;
+	u8 page_start = ssd130x->page_offset + y / page_height;
 	unsigned int pages = DIV_ROUND_UP(height, page_height);
 	struct drm_device *drm = &ssd130x->drm;
 	u32 array_idx = 0;
@@ -774,14 +775,11 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x,
 	 */
 
 	if (!ssd130x->page_address_mode) {
-		u8 page_start;
-
 		/* Set address range for horizontal addressing mode */
 		ret = ssd130x_set_col_range(ssd130x, ssd130x->col_offset + x, width);
 		if (ret < 0)
 			return ret;
 
-		page_start = ssd130x->page_offset + y / page_height;
 		ret = ssd130x_set_page_range(ssd130x, page_start, pages);
 		if (ret < 0)
 			return ret;
@@ -813,7 +811,7 @@ static int ssd130x_update_rect(struct ssd130x_device *ssd130x,
 		 */
 		if (ssd130x->page_address_mode) {
 			ret = ssd130x_set_page_pos(ssd130x,
-						   ssd130x->page_offset + i,
+						   page_start + i,
 						   ssd130x->col_offset + x);
 			if (ret < 0)
 				return ret;

From 6e3f4514e3b432871ac81717d24f56b441857f77 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Fri, 27 Feb 2026 12:49:01 +0000
Subject: [PATCH 02/38] drm/ttm: Fix ttm_pool_beneficial_order() return type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a nasty copy and paste bug, where the incorrect boolean return type of
the ttm_pool_beneficial_order() helper had a consequence of avoiding
direct reclaim too eagerly for drivers which use this feature (currently
amdgpu).

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Fixes: 7e9c548d3709 ("drm/ttm: Allow drivers to specify maximum beneficial TTM pool size")
Cc: Christian König <christian.koenig@amd.com>
Cc: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Cc: dri-devel@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v6.19+
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
Link: https://lore.kernel.org/r/20260227124901.3177-1-tvrtko.ursulin@igalia.com
---
 drivers/gpu/drm/ttm/ttm_pool_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool_internal.h b/drivers/gpu/drm/ttm/ttm_pool_internal.h
index 82c4b7e56a99..24c179fd69d1 100644
--- a/drivers/gpu/drm/ttm/ttm_pool_internal.h
+++ b/drivers/gpu/drm/ttm/ttm_pool_internal.h
@@ -17,7 +17,7 @@ static inline bool ttm_pool_uses_dma32(struct ttm_pool *pool)
 	return pool->alloc_flags & TTM_ALLOCATION_POOL_USE_DMA32;
 }
 
-static inline bool ttm_pool_beneficial_order(struct ttm_pool *pool)
+static inline unsigned int ttm_pool_beneficial_order(struct ttm_pool *pool)
 {
 	return pool->alloc_flags & 0xff;
 }

From 89ff45359abbf9d8d3c4aa3f5a57ed0be82b5a12 Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Thu, 26 Feb 2026 16:48:41 -0800
Subject: [PATCH 03/38] accel/amdxdna: Fill invalid payload for failed command

Newer userspace applications may read the payload of a failed command
to obtain detailed error information. However, the driver and old firmware
versions may not support returning advanced error information.
In this case, initialize the command payload with an invalid value so
userspace can detect that no detailed error information is available.

Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260227004841.3080241-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/aie2_ctx.c    | 23 ++++++++---------------
 drivers/accel/amdxdna/amdxdna_ctx.c | 27 +++++++++++++++++++++++++++
 drivers/accel/amdxdna/amdxdna_ctx.h |  3 +++
 3 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 25845bd5e507..afee5e667f77 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -186,13 +186,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
 	cmd_abo = job->cmd_bo;
 
 	if (unlikely(job->job_timeout)) {
-		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (unlikely(!data) || unlikely(size != sizeof(u32))) {
-		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -202,7 +202,7 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
 	if (status == AIE2_STATUS_SUCCESS)
 		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
 	else
-		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
+		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR);
 
 out:
 	aie2_sched_notify(job);
@@ -244,13 +244,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
 	cmd_abo = job->cmd_bo;
 
 	if (unlikely(job->job_timeout)) {
-		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
+		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
-		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -270,19 +270,12 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
 		 fail_cmd_idx, fail_cmd_status);
 
 	if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
-		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+		amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT);
 		ret = -EINVAL;
-		goto out;
+	} else {
+		amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR);
 	}
-	amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
 
-	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
-		struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
-
-		cc->error_index = fail_cmd_idx;
-		if (cc->error_index >= cc->command_count)
-			cc->error_index = 0;
-	}
 out:
 	aie2_sched_notify(job);
 	return ret;
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 263d36072540..666dfd7b2a80 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -135,6 +135,33 @@ u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
 	return INVALID_CU_IDX;
 }
 
+int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo,
+			  struct amdxdna_sched_job *job, u32 cmd_idx,
+			  enum ert_cmd_state error_state)
+{
+	struct amdxdna_client *client = job->hwctx->client;
+	struct amdxdna_cmd *cmd = abo->mem.kva;
+	struct amdxdna_cmd_chain *cc = NULL;
+
+	cmd->header &= ~AMDXDNA_CMD_STATE;
+	cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, error_state);
+
+	if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) {
+		cc = amdxdna_cmd_get_payload(abo, NULL);
+		cc->error_index = (cmd_idx < cc->command_count) ? cmd_idx : 0;
+		abo = amdxdna_gem_get_obj(client, cc->data[0], AMDXDNA_BO_CMD);
+		if (!abo)
+			return -EINVAL;
+		cmd = abo->mem.kva;
+	}
+
+	memset(cmd->data, 0xff, abo->mem.size - sizeof(*cmd));
+	if (cc)
+		amdxdna_gem_put_obj(abo);
+
+	return 0;
+}
+
 /*
  * This should be called in close() and remove(). DO NOT call in other syscalls.
  * This guarantee that when hwctx and resources will be released, if user
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index 16c85f08f03c..fbdf9d000871 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -167,6 +167,9 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
 
 void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
 u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo,
+			  struct amdxdna_sched_job *job, u32 cmd_idx,
+			  enum ert_cmd_state error_state);
 
 void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
 void amdxdna_hwctx_remove_all(struct amdxdna_client *client);

From fb797a70108f3fda83fde6dea30bee4be7d5df8b Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Thu, 26 Feb 2026 20:52:16 -0500
Subject: [PATCH 04/38] drm: renesas: rz-du: mipi_dsi: Set DSI divider

Before the MIPI DSI clock source can be configured, the target divide
ratio needs to be set.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Tested-by: Biju Das <biju.das.jz@bp.renesas.com>
Fixes: 5a4326f2e3b1 ("clk: renesas: rzg2l: Remove DSI clock rate restrictions")
Link: https://patch.msgid.link/20260227015216.2721504-1-chris.brandt@renesas.com
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
---
 drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c
index f74a0aa85ba8..29f2b7d24fe5 100644
--- a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c
+++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c
@@ -1122,6 +1122,7 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host,
 				      struct mipi_dsi_device *device)
 {
 	struct rzg2l_mipi_dsi *dsi = host_to_rzg2l_mipi_dsi(host);
+	int bpp;
 	int ret;
 
 	if (device->lanes > dsi->num_data_lanes) {
@@ -1131,7 +1132,8 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host,
 		return -EINVAL;
 	}
 
-	switch (mipi_dsi_pixel_format_to_bpp(device->format)) {
+	bpp = mipi_dsi_pixel_format_to_bpp(device->format);
+	switch (bpp) {
 	case 24:
 		break;
 	case 18:
@@ -1162,6 +1164,18 @@ static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host,
 
 	drm_bridge_add(&dsi->bridge);
 
+	/*
+	 * Report the required division ratio setting for the MIPI clock dividers.
+	 *
+	 * vclk * bpp = hsclk * 8 * num_lanes
+	 *
+	 * vclk * DSI_AB_divider = hsclk * 16
+	 *
+	 *   which simplifies to...
+	 * DSI_AB_divider = bpp * 2 / num_lanes
+	 */
+	rzg2l_cpg_dsi_div_set_divider(bpp * 2 / dsi->lanes, PLL5_TARGET_DSI);
+
 	return 0;
 }
 

From 2e3649e237237258a08d75afef96648dd2b379f7 Mon Sep 17 00:00:00 2001
From: Julian Orth <ju.orth@gmail.com>
Date: Sun, 1 Mar 2026 13:34:42 +0100
Subject: [PATCH 05/38] drm/syncobj: Fix handle <-> fd ioctls with dirty stack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Consider the following application:

    #include <fcntl.h>
    #include <string.h>
    #include <drm/drm.h>
    #include <sys/ioctl.h>

    int main(void) {
        int fd = open("/dev/dri/renderD128", O_RDWR);
        struct drm_syncobj_create arg1;
        ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &arg1);
        struct drm_syncobj_handle arg2;
        memset(&arg2, 1, sizeof(arg2)); // simulate dirty stack
        arg2.handle = arg1.handle;
        arg2.flags = 0;
        arg2.fd = 0;
        arg2.pad = 0;
        // arg2.point = 0; // userspace is required to set point to 0
        ioctl(fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &arg2);
    }

The last ioctl returns EINVAL because args->point is not 0. However,
userspace developed against older kernel versions is not aware of the
new point field and might therefore not initialize it.

The correct check would be

    if (args->flags & DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE)
        return -EINVAL;

However, there might already be userspace that relies on this not
returning an error as long as point == 0. Therefore use the more lenient
check.

Fixes: c2d3a7300695 ("drm/syncobj: Extend EXPORT_SYNC_FILE for timeline syncobjs")
Signed-off-by: Julian Orth <ju.orth@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260301-point-v1-1-21fc5fd98614@gmail.com
---
 drivers/gpu/drm/drm_syncobj.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 250734dee928..49eccb43ce63 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -875,7 +875,7 @@ drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 		return drm_syncobj_export_sync_file(file_private, args->handle,
 						    point, &args->fd);
 
-	if (args->point)
+	if (point)
 		return -EINVAL;
 
 	return drm_syncobj_handle_to_fd(file_private, args->handle,
@@ -909,7 +909,7 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 							  args->handle,
 							  point);
 
-	if (args->point)
+	if (point)
 		return -EINVAL;
 
 	return drm_syncobj_fd_to_handle(file_private, args->fd,

From cdc8a1e11f4d5b480ec750e28010c357185b95a6 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 14 Jan 2026 16:45:46 -0800
Subject: [PATCH 06/38] drm/xe: Do not preempt fence signaling CS instructions

If a batch buffer is complete, it makes little sense to preempt the
fence signaling instructions in the ring, as the largest portion of the
work (the batch buffer) is already done and fence signaling consists of
only a few instructions. If these instructions are preempted, the GuC
would need to perform a context switch just to signal the fence, which
is costly and delays fence signaling. Avoid this scenario by disabling
preemption immediately after the BB start instruction and re-enabling it
after executing the fence signaling instructions.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Carlos Santa <carlos.santa@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patch.msgid.link/20260115004546.58060-1-matthew.brost@intel.com
(cherry picked from commit 2bcbf2dcde0c839a73af664a3c77d4e77d58a3eb)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 248620b0901d..53d420d72164 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -280,6 +280,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
+	/* Don't preempt fence signaling */
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
 	if (job->user_fence.used) {
 		i = emit_flush_dw(dw, i);
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
@@ -345,6 +348,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
+	/* Don't preempt fence signaling */
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
 	if (job->user_fence.used) {
 		i = emit_flush_dw(dw, i);
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
@@ -397,6 +403,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
+	/* Don't preempt fence signaling */
+	dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
 	i = emit_render_cache_flush(job, dw, i);
 
 	if (job->user_fence.used)

From e377182f0266f46f02d01838e6bde67b9dac0d66 Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin@intel.com>
Date: Wed, 25 Feb 2026 01:34:49 +0000
Subject: [PATCH 07/38] drm/xe/configfs: Free ctx_restore_mid_bb in release

ctx_restore_mid_bb memory is allocated in wa_bb_store(), but
xe_config_device_release() only frees ctx_restore_post_bb.

Free ctx_restore_mid_bb[0].cs as well to avoid leaking the allocation
when the configfs device is removed.

Fixes: b30d5de3d40c ("drm/xe/configfs: Add mid context restore bb")
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Nitin Gote <nitin.r.gote@intel.com>
Link: https://patch.msgid.link/20260225013448.3547687-2-shuicheng.lin@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit a235e7d0098337c3f2d1e8f3610c719a589e115f)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_configfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index c59b1414df22..7fd07d1280bb 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -830,6 +830,7 @@ static void xe_config_device_release(struct config_item *item)
 
 	mutex_destroy(&dev->lock);
 
+	kfree(dev->config.ctx_restore_mid_bb[0].cs);
 	kfree(dev->config.ctx_restore_post_bb[0].cs);
 	kfree(dev);
 }

From 99f9b5343cae80eb0dfe050baf6c86d722b3ba2e Mon Sep 17 00:00:00 2001
From: Tomasz Lis <tomasz.lis@intel.com>
Date: Thu, 26 Feb 2026 22:26:58 +0100
Subject: [PATCH 08/38] drm/xe/queue: Call fini on exec queue creation fail

Every call to queue init should have a corresponding fini call.
Skipping this would mean skipping removal of the queue from GuC list
(which is part of guc_id allocation). A damaged queue stored in
exec_queue_lookup list would lead to invalid memory reference,
sooner or later.

Call fini to free guc_id. This must be done before any internal
LRCs are freed.

Since the finalization with this extra call became very similar to
__xe_exec_queue_fini(), reuse that. To make this reuse possible,
alter xe_lrc_put() so it can survive NULL parameters, like other
similar functions.

v2: Reuse _xe_exec_queue_fini(). Make xe_lrc_put() aware of NULLs.

Fixes: 3c1fa4aa60b1 ("drm/xe: Move queue init before LRC creation")
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> (v1)
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patch.msgid.link/20260226212701.2937065-2-tomasz.lis@intel.com
(cherry picked from commit 393e5fea6f7d7054abc2c3d97a4cfe8306cd6079)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c | 23 +++++++++++------------
 drivers/gpu/drm/xe/xe_lrc.h        |  3 ++-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 0ddae7fcfc97..8ecdf949f9e4 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -266,6 +266,16 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
 	return q;
 }
 
+static void __xe_exec_queue_fini(struct xe_exec_queue *q)
+{
+	int i;
+
+	q->ops->fini(q);
+
+	for (i = 0; i < q->width; ++i)
+		xe_lrc_put(q->lrc[i]);
+}
+
 static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
 {
 	int i, err;
@@ -320,21 +330,10 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
 	return 0;
 
 err_lrc:
-	for (i = i - 1; i >= 0; --i)
-		xe_lrc_put(q->lrc[i]);
+	__xe_exec_queue_fini(q);
 	return err;
 }
 
-static void __xe_exec_queue_fini(struct xe_exec_queue *q)
-{
-	int i;
-
-	q->ops->fini(q);
-
-	for (i = 0; i < q->width; ++i)
-		xe_lrc_put(q->lrc[i]);
-}
-
 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
 					   u32 logical_mask, u16 width,
 					   struct xe_hw_engine *hwe, u32 flags,
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index c307a3fd9ea2..c1c615447c85 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -75,7 +75,8 @@ static inline struct xe_lrc *xe_lrc_get(struct xe_lrc *lrc)
  */
 static inline void xe_lrc_put(struct xe_lrc *lrc)
 {
-	kref_put(&lrc->refcount, xe_lrc_destroy);
+	if (lrc)
+		kref_put(&lrc->refcount, xe_lrc_destroy);
 }
 
 /**

From b570f37a2ce480be26c665345c5514686a8a0274 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 10 Feb 2026 12:56:53 +0100
Subject: [PATCH 09/38] mm: Fix a hmm_range_fault() livelock / starvation
 problem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If hmm_range_fault() fails a folio_trylock() in do_swap_page,
trying to acquire the lock of a device-private folio for migration,
to ram, the function will spin until it succeeds grabbing the lock.

However, if the process holding the lock is depending on a work
item to be completed, which is scheduled on the same CPU as the
spinning hmm_range_fault(), that work item might be starved and
we end up in a livelock / starvation situation which is never
resolved.

This can happen, for example if the process holding the
device-private folio lock is stuck in
   migrate_device_unmap()->lru_add_drain_all()
sinc lru_add_drain_all() requires a short work-item
to be run on all online cpus to complete.

A prerequisite for this to happen is:
a) Both zone device and system memory folios are considered in
   migrate_device_unmap(), so that there is a reason to call
   lru_add_drain_all() for a system memory folio while a
   folio lock is held on a zone device folio.
b) The zone device folio has an initial mapcount > 1 which causes
   at least one migration PTE entry insertion to be deferred to
   try_to_migrate(), which can happen after the call to
   lru_add_drain_all().
c) No or voluntary only preemption.

This all seems pretty unlikely to happen, but indeed is hit by
the "xe_exec_system_allocator" igt test.

Resolve this by waiting for the folio to be unlocked if the
folio_trylock() fails in do_swap_page().

Rename migration_entry_wait_on_locked() to
softleaf_entry_wait_unlock() and update its documentation to
indicate the new use-case.

Future code improvements might consider moving
the lru_add_drain_all() call in migrate_device_unmap() to be
called *after* all pages have migration entries inserted.
That would eliminate also b) above.

v2:
- Instead of a cond_resched() in hmm_range_fault(),
  eliminate the problem by waiting for the folio to be unlocked
  in do_swap_page() (Alistair Popple, Andrew Morton)
v3:
- Add a stub migration_entry_wait_on_locked() for the
  !CONFIG_MIGRATION case. (Kernel Test Robot)
v4:
- Rename migrate_entry_wait_on_locked() to
  softleaf_entry_wait_on_locked() and update docs (Alistair Popple)
v5:
- Add a WARN_ON_ONCE() for the !CONFIG_MIGRATION
  version of softleaf_entry_wait_on_locked().
- Modify wording around function names in the commit message
  (Andrew Morton)

Suggested-by: Alistair Popple <apopple@nvidia.com>
Fixes: 1afaeb8293c9 ("mm/migrate: Trylock device page in do_swap_page")
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: linux-mm@kvack.org
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: <stable@vger.kernel.org> # v6.15+
Reviewed-by: John Hubbard <jhubbard@nvidia.com> #v3
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Link: https://patch.msgid.link/20260210115653.92413-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit a69d1ab971a624c6f112cea61536569d579c3215)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/linux/migrate.h | 10 +++++++++-
 mm/filemap.c            | 15 ++++++++++-----
 mm/memory.c             |  3 ++-
 mm/migrate.c            |  8 ++++----
 mm/migrate_device.c     |  2 +-
 5 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 26ca00c325d9..d5af2b7f577b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list);
 
 int migrate_huge_page_move_mapping(struct address_space *mapping,
 		struct folio *dst, struct folio *src);
-void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
 		__releases(ptl);
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
@@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag
 	return -ENOSYS;
 }
 
+static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+	__releases(ptl)
+{
+	WARN_ON_ONCE(1);
+
+	spin_unlock(ptl);
+}
+
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/filemap.c b/mm/filemap.c
index 6cd7974d4ada..406cef06b684 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1379,14 +1379,16 @@ repeat:
 
 #ifdef CONFIG_MIGRATION
 /**
- * migration_entry_wait_on_locked - Wait for a migration entry to be removed
- * @entry: migration swap entry.
+ * softleaf_entry_wait_on_locked - Wait for a migration entry or
+ * device_private entry to be removed.
+ * @entry: migration or device_private swap entry.
  * @ptl: already locked ptl. This function will drop the lock.
  *
- * Wait for a migration entry referencing the given page to be removed. This is
+ * Wait for a migration entry referencing the given page, or device_private
+ * entry referencing a dvice_private page to be unlocked. This is
  * equivalent to folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE) except
  * this can be called without taking a reference on the page. Instead this
- * should be called while holding the ptl for the migration entry referencing
+ * should be called while holding the ptl for @entry referencing
  * the page.
  *
  * Returns after unlocking the ptl.
@@ -1394,7 +1396,7 @@ repeat:
  * This follows the same logic as folio_wait_bit_common() so see the comments
  * there.
  */
-void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
+void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
 	__releases(ptl)
 {
 	struct wait_page_queue wait_page;
@@ -1428,6 +1430,9 @@ void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl)
 	 * If a migration entry exists for the page the migration path must hold
 	 * a valid reference to the page, and it must take the ptl to remove the
 	 * migration entry. So the page is valid until the ptl is dropped.
+	 * Similarly any path attempting to drop the last reference to a
+	 * device-private page needs to grab the ptl to remove the device-private
+	 * entry.
 	 */
 	spin_unlock(ptl);
 
diff --git a/mm/memory.c b/mm/memory.c
index 07778814b4a8..2f815a34d924 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4763,7 +4763,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 				unlock_page(vmf->page);
 				put_page(vmf->page);
 			} else {
-				pte_unmap_unlock(vmf->pte, vmf->ptl);
+				pte_unmap(vmf->pte);
+				softleaf_entry_wait_on_locked(entry, vmf->ptl);
 			}
 		} else if (softleaf_is_hwpoison(entry)) {
 			ret = VM_FAULT_HWPOISON;
diff --git a/mm/migrate.c b/mm/migrate.c
index 1bf2cf8c44dd..2c3d489ecf51 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -500,7 +500,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 	if (!softleaf_is_migration(entry))
 		goto out;
 
-	migration_entry_wait_on_locked(entry, ptl);
+	softleaf_entry_wait_on_locked(entry, ptl);
 	return;
 out:
 	spin_unlock(ptl);
@@ -532,10 +532,10 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p
 		 * If migration entry existed, safe to release vma lock
 		 * here because the pgtable page won't be freed without the
 		 * pgtable lock released.  See comment right above pgtable
-		 * lock release in migration_entry_wait_on_locked().
+		 * lock release in softleaf_entry_wait_on_locked().
 		 */
 		hugetlb_vma_unlock_read(vma);
-		migration_entry_wait_on_locked(entry, ptl);
+		softleaf_entry_wait_on_locked(entry, ptl);
 		return;
 	}
 
@@ -553,7 +553,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
 	ptl = pmd_lock(mm, pmd);
 	if (!pmd_is_migration_entry(*pmd))
 		goto unlock;
-	migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl);
+	softleaf_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl);
 	return;
 unlock:
 	spin_unlock(ptl);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 0a8b31939640..8079676c8f1f 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -176,7 +176,7 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start,
 		}
 
 		if (softleaf_is_migration(entry)) {
-			migration_entry_wait_on_locked(entry, ptl);
+			softleaf_entry_wait_on_locked(entry, ptl);
 			spin_unlock(ptl);
 			return -EAGAIN;
 		}

From 6270ee26e1edd862ea17e3eba148ca8fb2c99dc9 Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Thu, 26 Feb 2026 13:38:57 -0800
Subject: [PATCH 10/38] accel/amdxdna: Fix NULL pointer dereference of
 mgmt_chann

mgmt_chann may be set to NULL if the firmware returns an unexpected
error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL
pointer dereference in aie2_hw_stop().

Fix this by introducing a dedicated helper to destroy mgmt_chann
and by adding proper NULL checks before accessing it.

Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260226213857.3068474-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++-----
 drivers/accel/amdxdna/aie2_pci.c     |  7 ++-----
 drivers/accel/amdxdna/aie2_pci.h     |  1 +
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 277a27bce850..22e1a85a7ae0 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
 		return -ENODEV;
 
 	ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
-	if (ret == -ETIME) {
-		xdna_mailbox_stop_channel(ndev->mgmt_chann);
-		xdna_mailbox_destroy_channel(ndev->mgmt_chann);
-		ndev->mgmt_chann = NULL;
-	}
+	if (ret == -ETIME)
+		aie2_destroy_mgmt_chann(ndev);
 
 	if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
 		XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
@@ -914,6 +911,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
 		ndev->exec_msg_ops = &legacy_exec_message_ops;
 }
 
+void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
+{
+	struct amdxdna_dev *xdna = ndev->xdna;
+
+	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+	if (!ndev->mgmt_chann)
+		return;
+
+	xdna_mailbox_stop_channel(ndev->mgmt_chann);
+	xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+	ndev->mgmt_chann = NULL;
+}
+
 static inline struct amdxdna_gem_obj *
 aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
 {
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 85079b6fc5d9..977ce21eaf9f 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
 
 	aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
 	aie2_mgmt_fw_fini(ndev);
-	xdna_mailbox_stop_channel(ndev->mgmt_chann);
-	xdna_mailbox_destroy_channel(ndev->mgmt_chann);
-	ndev->mgmt_chann = NULL;
+	aie2_destroy_mgmt_chann(ndev);
 	drmm_kfree(&xdna->ddev, ndev->mbox);
 	ndev->mbox = NULL;
 	aie2_psp_stop(ndev->psp_hdl);
@@ -441,8 +439,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
 	return 0;
 
 destroy_mgmt_chann:
-	xdna_mailbox_stop_channel(ndev->mgmt_chann);
-	xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+	aie2_destroy_mgmt_chann(ndev);
 stop_psp:
 	aie2_psp_stop(ndev->psp_hdl);
 fini_smu:
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index b20a3661078c..e72311c77996 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
 
 /* aie2_message.c */
 void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
+void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);

From c28b3ec3ca034fd1abc832fef46ce36eb13f8fad Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 27 Feb 2026 12:26:04 -0700
Subject: [PATCH 11/38] drm/amd/display: Use mpc.preblend flag to indicate 3D
 LUT

[WHAT]
New ASIC's 3D LUT is indicated by mpc.preblend.

Fixes: 0de2b1afea8d ("drm/amd/display: add 3D LUT colorop")
Reviewed-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 43175f6164d32cb96362d16e357689f74298145c)
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c   | 6 ++++--
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 2ba98f384685..cd1e58b8defc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1706,6 +1706,7 @@ __set_dm_plane_colorop_3dlut(struct drm_plane_state *plane_state,
 	struct dc_transfer_func *tf = &dc_plane_state->in_shaper_func;
 	struct drm_atomic_state *state = plane_state->state;
 	const struct amdgpu_device *adev = drm_to_adev(colorop->dev);
+	bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend;
 	const struct drm_device *dev = colorop->dev;
 	const struct drm_color_lut32 *lut3d;
 	uint32_t lut3d_size;
@@ -1722,7 +1723,7 @@ __set_dm_plane_colorop_3dlut(struct drm_plane_state *plane_state,
 	}
 
 	if (colorop_state && !colorop_state->bypass && colorop->type == DRM_COLOROP_3D_LUT) {
-		if (!adev->dm.dc->caps.color.dpp.hw_3d_lut) {
+		if (!has_3dlut) {
 			drm_dbg(dev, "3D LUT is not supported by hardware\n");
 			return -EINVAL;
 		}
@@ -1875,6 +1876,7 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state,
 	struct drm_colorop *colorop = plane_state->color_pipeline;
 	struct drm_device *dev = plane_state->plane->dev;
 	struct amdgpu_device *adev = drm_to_adev(dev);
+	bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend;
 	int ret;
 
 	/* 1D Curve - DEGAM TF */
@@ -1907,7 +1909,7 @@ amdgpu_dm_plane_set_colorop_properties(struct drm_plane_state *plane_state,
 	if (ret)
 		return ret;
 
-	if (adev->dm.dc->caps.color.dpp.hw_3d_lut) {
+	if (has_3dlut) {
 		/* 1D Curve & LUT - SHAPER TF & LUT */
 		colorop = colorop->next;
 		if (!colorop) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
index f25c0ede7199..d59ba82d3d7c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c
@@ -60,6 +60,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr
 	struct drm_colorop *ops[MAX_COLOR_PIPELINE_OPS];
 	struct drm_device *dev = plane->dev;
 	struct amdgpu_device *adev = drm_to_adev(dev);
+	bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend;
 	int ret;
 	int i = 0;
 
@@ -112,7 +113,7 @@ int amdgpu_dm_initialize_default_pipeline(struct drm_plane *plane, struct drm_pr
 
 	i++;
 
-	if (adev->dm.dc->caps.color.dpp.hw_3d_lut) {
+	if (has_3dlut) {
 		/* 1D curve - SHAPER TF */
 		ops[i] = kzalloc_obj(*ops[0]);
 		if (!ops[i]) {

From a4fa2355e0add57253468ef13bd08f11285f3b6e Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@amd.com>
Date: Fri, 27 Feb 2026 12:30:38 -0700
Subject: [PATCH 12/38] drm/amd/display: Enable DEGAMMA and reject
 COLOR_PIPELINE+DEGAMMA_LUT

[WHAT]
Create DEGAMMA properties even if color pipeline is enabled, and enforce
the mutual exclusion in atomic check by rejecting any commit that
attempts to enable both COLOR_PIPELINE on the plane and DEGAMMA_LUT on
the CRTC simultaneously.

Fixes: 18a4127e9315 ("drm/amd/display: Disable CRTC degamma when color pipeline is enabled")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4963
Reviewed-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 196a6aa727f1f15eb54dda5e60a41543ea9397ee)
---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c   | 16 ++++++++--------
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c  |  8 ++++++++
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 130190e8a1b2..304437c2284d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -765,15 +765,15 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 	dm->adev->mode_info.crtcs[crtc_index] = acrtc;
 
 	/* Don't enable DRM CRTC degamma property for
-	 * 1. Degamma is replaced by color pipeline.
-	 * 2. DCE since it doesn't support programmable degamma anywhere.
-	 * 3. DCN401 since pre-blending degamma LUT doesn't apply to cursor.
+	 * 1. DCE since it doesn't support programmable degamma anywhere.
+	 * 2. DCN401 since pre-blending degamma LUT doesn't apply to cursor.
+	 * Note: DEGAMMA properties are created even if the primary plane has the
+	 * COLOR_PIPELINE property. User space can use either the DEGAMMA properties
+	 * or the COLOR_PIPELINE property. An atomic commit which attempts to enable
+	 * both is rejected.
 	 */
-	if (plane->color_pipeline_property)
-		has_degamma = false;
-	else
-		has_degamma = dm->adev->dm.dc->caps.color.dpp.dcn_arch &&
-			      dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01;
+	has_degamma = dm->adev->dm.dc->caps.color.dpp.dcn_arch &&
+		      dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01;
 
 	drm_crtc_enable_color_mgmt(&acrtc->base, has_degamma ? MAX_COLOR_LUT_ENTRIES : 0,
 				   true, MAX_COLOR_LUT_ENTRIES);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 70587e5a8d46..127207e18dcb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1256,6 +1256,14 @@ static int amdgpu_dm_plane_atomic_check(struct drm_plane *plane,
 	if (ret)
 		return ret;
 
+	/* Reject commits that attempt to use both COLOR_PIPELINE and CRTC DEGAMMA_LUT */
+	if (new_plane_state->color_pipeline && new_crtc_state->degamma_lut) {
+		drm_dbg_atomic(plane->dev,
+			       "[PLANE:%d:%s] COLOR_PIPELINE and CRTC DEGAMMA_LUT cannot be enabled simultaneously\n",
+			       plane->base.id, plane->name);
+		return -EINVAL;
+	}
+
 	ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, new_plane_state, &scaling_info);
 	if (ret)
 		return ret;

From 389c2024cab817366e6b8345f679f41064fa94d6 Mon Sep 17 00:00:00 2001
From: sguttula <suresh.guttula@amd.com>
Date: Sat, 21 Feb 2026 10:47:59 +0530
Subject: [PATCH 13/38] drm/amdgpu: Enable DPG support for VCN5

This will set DPG flags for enabling power gating on GFX11_5_4

Signed-off-by: sguttula <suresh.guttula@amd.com>
Reviewed-by: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit a503c266d70d3363ba6bffb883cd6ecdb092670c)
---
 drivers/gpu/drm/amd/amdgpu/soc21.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 8122a5cacf07..a0ad1f8a76f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -858,7 +858,9 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block)
 			AMD_CG_SUPPORT_IH_CG |
 			AMD_CG_SUPPORT_BIF_MGCG |
 			AMD_CG_SUPPORT_BIF_LS;
-		adev->pg_flags = AMD_PG_SUPPORT_VCN |
+		adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+			AMD_PG_SUPPORT_VCN |
+			AMD_PG_SUPPORT_JPEG_DPG |
 			AMD_PG_SUPPORT_JPEG |
 			AMD_PG_SUPPORT_GFX_PG;
 		adev->external_rev_id = adev->rev_id + 0x1;

From 30d937f63bd19bbcaafa4b892eb251f8bbbf04ef Mon Sep 17 00:00:00 2001
From: Dillon Varone <Dillon.Varone@amd.com>
Date: Wed, 18 Feb 2026 14:34:28 -0500
Subject: [PATCH 14/38] drm/amd/display: Fallback to boot snapshot for dispclk

[WHY & HOW]
If the dentist is unavailable, fallback to reading CLKIP via the boot
snapshot to get the current dispclk.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Dillon Varone <Dillon.Varone@amd.com>
Signed-off-by: Alex Hung <alex.hung@amd.com>
Cc: Mario Limonciello <mario.limonciello@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 2ab77600d1e55a042c02437326d3c7563e853c6c)
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
index b91517b9fedc..eb198d52a115 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -72,7 +72,11 @@ void dcn401_initialize_min_clocks(struct dc *dc)
 		 * audio corruption. Read current DISPCLK from DENTIST and request the same
 		 * freq to ensure that the timing is valid and unchanged.
 		 */
-		clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr);
+		if (dc->clk_mgr->funcs->get_dispclk_from_dentist) {
+			clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr);
+		} else {
+			clocks->dispclk_khz = dc->clk_mgr->boot_snapshot.dispclk * 1000;
+		}
 	}
 	clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
 	clocks->fclk_p_state_change_support = true;

From a116bac87118903925108e57781bbfc7a7eea27b Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
Date: Mon, 2 Mar 2026 16:23:09 -0800
Subject: [PATCH 15/38] dma-buf: Include ioctl.h in UAPI header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

include/uapi/linux/dma-buf.h uses several macros from ioctl.h to define
its ioctl commands. However, it does not include ioctl.h itself. So,
if userspace source code tries to include the dma-buf.h file without
including ioctl.h, it can result in build failures.

Therefore, include ioctl.h in the dma-buf UAPI header.

Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Reviewed-by: T.J. Mercier <tjmercier@google.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260303002309.1401849-1-isaacmanjarres@google.com
---
 include/uapi/linux/dma-buf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index 5a6fda66d9ad..e827c9d20c5d 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -20,6 +20,7 @@
 #ifndef _DMA_BUF_UAPI_H_
 #define _DMA_BUF_UAPI_H_
 
+#include <linux/ioctl.h>
 #include <linux/types.h>
 
 /**

From 61ded1083b264ff67ca8c2de822c66b6febaf9a8 Mon Sep 17 00:00:00 2001
From: Yujie Liu <yujie.liu@intel.com>
Date: Fri, 27 Feb 2026 16:24:52 +0800
Subject: [PATCH 16/38] drm/sched: Fix kernel-doc warning for
 drm_sched_job_done()

There is a kernel-doc warning for the scheduler:

Warning: drivers/gpu/drm/scheduler/sched_main.c:367 function parameter 'result' not described in 'drm_sched_job_done'

Fix the warning by describing the undocumented error code.

Fixes: 539f9ee4b52a ("drm/scheduler: properly forward fence errors")
Signed-off-by: Yujie Liu <yujie.liu@intel.com>
[phasta: Flesh out commit message]
Signed-off-by: Philipp Stanner <phasta@kernel.org>
Link: https://patch.msgid.link/20260227082452.1802922-1-yujie.liu@intel.com
---
 drivers/gpu/drm/scheduler/sched_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index e6ee35406165..2d5cb21a05b6 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -361,6 +361,7 @@ static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
 /**
  * drm_sched_job_done - complete a job
  * @s_job: pointer to the job which is done
+ * @result: 0 on success, -ERRNO on error
  *
  * Finish the job's fence and resubmit the work items.
  */

From a58d487fb1a52579d3c37544ea371da78ed70c45 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <dev@lankhorst.se>
Date: Wed, 4 Mar 2026 09:56:16 +0100
Subject: [PATCH 17/38] drm/ttm/tests: Fix build failure on PREEMPT_RT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a compile error in the kunit tests when CONFIG_PREEMPT_RT is
enabled, and the normal mutex is converted into a rtmutex.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602261547.3bM6yVAS-lkp@intel.com/
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patch.msgid.link/20260304085616.1216961-1-dev@lankhorst.se
Signed-off-by: Maarten Lankhorst <dev@lankhorst.se>
---
 drivers/gpu/drm/ttm/tests/ttm_bo_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
index d468f8322072..f3103307b5df 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c
@@ -222,13 +222,13 @@ static void ttm_bo_reserve_interrupted(struct kunit *test)
 		KUNIT_FAIL(test, "Couldn't create ttm bo reserve task\n");
 
 	/* Take a lock so the threaded reserve has to wait */
-	mutex_lock(&bo->base.resv->lock.base);
+	dma_resv_lock(bo->base.resv, NULL);
 
 	wake_up_process(task);
 	msleep(20);
 	err = kthread_stop(task);
 
-	mutex_unlock(&bo->base.resv->lock.base);
+	dma_resv_unlock(bo->base.resv);
 
 	KUNIT_ASSERT_EQ(test, err, -ERESTARTSYS);
 }

From ace7dcc8181373a0338efa1686c5e36eb121dff2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 25 Feb 2026 09:42:20 +0200
Subject: [PATCH 18/38] drm/dp: Add definition for Panel Replay full-line
 granularity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DP specification is saying value 0xff 0xff in PANEL REPLAY SELECTIVE UPDATE
X GRANULARITY CAPABILITY registers (0xb2 and 0xb3) means full-line
granularity. Add definition for this.

Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patch.msgid.link/20260225074221.1744330-1-jouni.hogander@intel.com
(cherry picked from commit b93311673263bb98a200ab1cb6304f969bdada5c)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 include/drm/display/drm_dp.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index e4eebabab975..8b15d3eeb716 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -571,6 +571,8 @@
 # define DP_PANEL_REPLAY_LINK_OFF_SUPPORTED_IN_PR_AFTER_ADAPTIVE_SYNC_SDP	(1 << 7)
 
 #define DP_PANEL_REPLAY_CAP_X_GRANULARITY		0xb2
+# define DP_PANEL_REPLAY_FULL_LINE_GRANULARITY		0xffff
+
 #define DP_PANEL_REPLAY_CAP_Y_GRANULARITY		0xb4
 
 /* Link Configuration */

From a99cac460ddeb3705cb54a8421339f351586b25d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Wed, 25 Feb 2026 09:42:21 +0200
Subject: [PATCH 19/38] drm/i915/psr: Fix for Panel Replay X granularity DPCD
 register handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DP specification is saying value 0xff 0xff in PANEL REPLAY SELECTIVE UPDATE
X GRANULARITY CAPABILITY registers (0xb2 and 0xb3) means full-line
granularity. Take this into account when handling Panel Replay X
granularity informed by the panel.

Fixes: 1cc854647450 ("drm/i915/psr: Use SU granularity information available in intel_connector")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7284
Tested-by: Mark Pearson <mpearson-lenovo@squebb.ca>
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Link: https://patch.msgid.link/20260225074221.1744330-2-jouni.hogander@intel.com
(cherry picked from commit f5c8f824a495e849492f09a43bd965a8f4d86cb2)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_psr.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 62208ffc5101..4ce1173a2e91 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1307,9 +1307,14 @@ static bool psr2_granularity_check(struct intel_crtc_state *crtc_state,
 	u16 sink_y_granularity = crtc_state->has_panel_replay ?
 		connector->dp.panel_replay_caps.su_y_granularity :
 		connector->dp.psr_caps.su_y_granularity;
-	u16 sink_w_granularity =  crtc_state->has_panel_replay ?
-		connector->dp.panel_replay_caps.su_w_granularity :
-		connector->dp.psr_caps.su_w_granularity;
+	u16 sink_w_granularity;
+
+	if (crtc_state->has_panel_replay)
+		sink_w_granularity = connector->dp.panel_replay_caps.su_w_granularity ==
+			DP_PANEL_REPLAY_FULL_LINE_GRANULARITY ?
+			crtc_hdisplay : connector->dp.panel_replay_caps.su_w_granularity;
+	else
+		sink_w_granularity = connector->dp.psr_caps.su_w_granularity;
 
 	/* PSR2 HW only send full lines so we only need to validate the width */
 	if (crtc_hdisplay % sink_w_granularity)

From a99d34e5ecb9a8f2212ee5a01140313bb115f9be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Wed, 11 Feb 2026 11:41:59 +0100
Subject: [PATCH 20/38] Revert "drm/pagemap: Disable device-to-device
 migration"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With commit
a69d1ab971a6 ("mm: Fix a hmm_range_fault() livelock / starvation problem")
device-to-device migration is not functional again and the
disabling can be reverted.

Add the above commit as a Fixes: tag in order for the revert to not
take place unless that commit is present.

This reverts commit 10dd1eaa80a56d3cf6d7c36b5269c8fed617f001.

Cc: Matthew Brost <matthew.brost@intel.com>
Fixes: b570f37a2ce4 ("mm: Fix a hmm_range_fault() livelock / starvation problem")
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260211104159.114947-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit 1a3c0049b3f56278c9caf2784c53f6ab435fd12c)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo updated Fixes tag]
---
 drivers/gpu/drm/drm_pagemap.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index bdc79140875c..862675ac5bb2 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -480,18 +480,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
 		.start		= start,
 		.end		= end,
 		.pgmap_owner	= pagemap->owner,
-		/*
-		 * FIXME: MIGRATE_VMA_SELECT_DEVICE_PRIVATE intermittently
-		 * causes 'xe_exec_system_allocator --r *race*no*' to trigger aa
-		 * engine reset and a hard hang due to getting stuck on a folio
-		 * lock. This should work and needs to be root-caused. The only
-		 * downside of not selecting MIGRATE_VMA_SELECT_DEVICE_PRIVATE
-		 * is that device-to-device migrations won’t work; instead,
-		 * memory will bounce through system memory. This path should be
-		 * rare and only occur when the madvise attributes of memory are
-		 * changed or atomics are being used.
-		 */
-		.flags		= MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT,
+		.flags		= MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT |
+		MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
 	};
 	unsigned long i, npages = npages_in_range(start, end);
 	unsigned long own_pages = 0, migrated_pages = 0;

From b3368ecca9538b88ddf982ea99064860fd5add97 Mon Sep 17 00:00:00 2001
From: Zhanjun Dong <zhanjun.dong@intel.com>
Date: Fri, 20 Feb 2026 17:53:08 -0500
Subject: [PATCH 21/38] drm/xe/gsc: Fix GSC proxy cleanup on early
 initialization failure

xe_gsc_proxy_remove undoes what is done in both xe_gsc_proxy_init and
xe_gsc_proxy_start; however, if we fail between those 2 calls, it is
possible that the HW forcewake access hasn't been initialized yet and so
we hit errors when the cleanup code tries to write GSC register. To
avoid that, split the cleanup in 2 functions so that the HW cleanup is
only called if the HW setup was completed successfully.

Since the HW cleanup (interrupt disabling) is now removed from
xe_gsc_proxy_remove, the cleanup on error paths in xe_gsc_proxy_start
must be updated to disable interrupts before returning.

Fixes: ff6cd29b690b ("drm/xe: Cleanup unwind of gt initialization")
Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patch.msgid.link/20260220225308.101469-1-zhanjun.dong@intel.com
(cherry picked from commit 2b37c401b265c07b46408b5cb36a4b757c9b5060)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gsc_proxy.c | 43 +++++++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_gsc_types.h |  2 ++
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 42438b21f235..707db650a2ae 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -435,15 +435,11 @@ static int proxy_channel_alloc(struct xe_gsc *gsc)
 	return 0;
 }
 
-static void xe_gsc_proxy_remove(void *arg)
+static void xe_gsc_proxy_stop(struct xe_gsc *gsc)
 {
-	struct xe_gsc *gsc = arg;
 	struct xe_gt *gt = gsc_to_gt(gsc);
 	struct xe_device *xe = gt_to_xe(gt);
 
-	if (!gsc->proxy.component_added)
-		return;
-
 	/* disable HECI2 IRQs */
 	scoped_guard(xe_pm_runtime, xe) {
 		CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC);
@@ -455,6 +451,30 @@ static void xe_gsc_proxy_remove(void *arg)
 	}
 
 	xe_gsc_wait_for_worker_completion(gsc);
+	gsc->proxy.started = false;
+}
+
+static void xe_gsc_proxy_remove(void *arg)
+{
+	struct xe_gsc *gsc = arg;
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!gsc->proxy.component_added)
+		return;
+
+	/*
+	 * GSC proxy start is an async process that can be ongoing during
+	 * Xe module load/unload. Using devm managed action to register
+	 * xe_gsc_proxy_stop could cause issues if Xe module unload has
+	 * already started when the action is registered, potentially leading
+	 * to the cleanup being called at the wrong time. Therefore, instead
+	 * of registering a separate devm action to undo what is done in
+	 * proxy start, we call it from here, but only if the start has
+	 * completed successfully (tracked with the 'started' flag).
+	 */
+	if (gsc->proxy.started)
+		xe_gsc_proxy_stop(gsc);
 
 	component_del(xe->drm.dev, &xe_gsc_proxy_component_ops);
 	gsc->proxy.component_added = false;
@@ -510,6 +530,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc)
  */
 int xe_gsc_proxy_start(struct xe_gsc *gsc)
 {
+	struct xe_gt *gt = gsc_to_gt(gsc);
 	int err;
 
 	/* enable the proxy interrupt in the GSC shim layer */
@@ -521,12 +542,18 @@ int xe_gsc_proxy_start(struct xe_gsc *gsc)
 	 */
 	err = xe_gsc_proxy_request_handler(gsc);
 	if (err)
-		return err;
+		goto err_irq_disable;
 
 	if (!xe_gsc_proxy_init_done(gsc)) {
-		xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n");
-		return -EIO;
+		xe_gt_err(gt, "GSC FW reports proxy init not completed\n");
+		err = -EIO;
+		goto err_irq_disable;
 	}
 
+	gsc->proxy.started = true;
 	return 0;
+
+err_irq_disable:
+	gsc_proxy_irq_toggle(gsc, false);
+	return err;
 }
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
index 97c056656df0..5aaa2a75861f 100644
--- a/drivers/gpu/drm/xe/xe_gsc_types.h
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -58,6 +58,8 @@ struct xe_gsc {
 		struct mutex mutex;
 		/** @proxy.component_added: whether the component has been added */
 		bool component_added;
+		/** @proxy.started: whether the proxy has been started */
+		bool started;
 		/** @proxy.bo: object to store message to and from the GSC */
 		struct xe_bo *bo;
 		/** @proxy.to_gsc: map of the memory used to send messages to the GSC */

From 89865e6dc8487b627302bdced3f965cd0c406835 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 27 Feb 2026 08:43:41 -0800
Subject: [PATCH 22/38] drm/xe/xe2_hpg: Correct implementation of
 Wa_16025250150

Wa_16025250150 asks us to set five register fields of the register to
0x1 each.  However we were just OR'ing this into the existing register
value (which has a default of 0x4 for each nibble-sized field) resulting
in final field values of 0x5 instead of the desired 0x1.  Correct the
RTP programming (use FIELD_SET instead of SET) to ensure each field is
assigned to exactly the value we want.

Cc: Aradhya Bhatia <aradhya.bhatia@intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: stable@vger.kernel.org # v6.16+
Fixes: 7654d51f1fd8 ("drm/xe/xe2hpg: Add Wa_16025250150")
Reviewed-by: Ngai-Mint Kwan <ngai-mint.kwan@linux.intel.com>
Link: https://patch.msgid.link/20260227164341.3600098-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit d139209ef88e48af1f6731cd45440421c757b6b5)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index c7b1bd79ab17..462c2fa712e0 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -241,12 +241,13 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 
 	{ XE_RTP_NAME("16025250150"),
 	  XE_RTP_RULES(GRAPHICS_VERSION(2001)),
-	  XE_RTP_ACTIONS(SET(LSN_VC_REG2,
-			     LSN_LNI_WGT(1) |
-			     LSN_LNE_WGT(1) |
-			     LSN_DIM_X_WGT(1) |
-			     LSN_DIM_Y_WGT(1) |
-			     LSN_DIM_Z_WGT(1)))
+	  XE_RTP_ACTIONS(FIELD_SET(LSN_VC_REG2,
+				   LSN_LNI_WGT_MASK | LSN_LNE_WGT_MASK |
+				   LSN_DIM_X_WGT_MASK | LSN_DIM_Y_WGT_MASK |
+				   LSN_DIM_Z_WGT_MASK,
+				   LSN_LNI_WGT(1) | LSN_LNE_WGT(1) |
+				   LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) |
+				   LSN_DIM_Z_WGT(1)))
 	},
 
 	/* Xe2_HPM */

From 3091723785def05ebfe6a50866f87a044ae314ba Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin@intel.com>
Date: Wed, 4 Feb 2026 17:28:11 +0000
Subject: [PATCH 23/38] drm/xe/reg_sr: Fix leak on xa_store failure

Free the newly allocated entry when xa_store() fails to avoid a memory
leak on the error path.

v2: use goto fail_free. (Bala)

Fixes: e5283bd4dfec ("drm/xe/reg_sr: Remove register pool")
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patch.msgid.link/20260204172810.1486719-2-shuicheng.lin@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 6bc6fec71ac45f52db609af4e62bdb96b9f5fadb)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_reg_sr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 2e5c78940b41..a07be161cfa2 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -98,10 +98,12 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
 	*pentry = *e;
 	ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
 	if (ret)
-		goto fail;
+		goto fail_free;
 
 	return 0;
 
+fail_free:
+	kfree(pentry);
 fail:
 	xe_gt_err(gt,
 		  "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",

From 0cfe9c4838f1147713f6b5c02094cd4dc0c598fa Mon Sep 17 00:00:00 2001
From: Varun Gupta <varun.gupta@intel.com>
Date: Mon, 23 Feb 2026 23:21:45 +0530
Subject: [PATCH 24/38] drm/xe: Fix memory leak in xe_vm_madvise_ioctl

When check_bo_args_are_sane() validation fails, jump to the new
free_vmas cleanup label to properly free the allocated resources.
This ensures proper cleanup in this error path.

Fixes: 293032eec4ba ("drm/xe/bo: Update atomic_access attribute on madvise")
Cc: stable@vger.kernel.org # v6.18+
Reviewed-by: Shuicheng Lin <shuicheng.lin@intel.com>
Signed-off-by: Varun Gupta <varun.gupta@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260223175145.1532801-1-varun.gupta@intel.com
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
(cherry picked from commit 29bd06faf727a4b76663e4be0f7d770e2d2a7965)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm_madvise.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 95bf53cc29e3..bc39a9a9790c 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -453,7 +453,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
 						    madvise_range.num_vmas,
 						    args->atomic.val)) {
 				err = -EINVAL;
-				goto madv_fini;
+				goto free_vmas;
 			}
 		}
 
@@ -490,6 +490,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
 err_fini:
 	if (madvise_range.has_bo_vmas)
 		drm_exec_fini(&exec);
+free_vmas:
 	kfree(madvise_range.vmas);
 	madvise_range.vmas = NULL;
 madv_fini:

From 68785c5e79e0fc1eacf63026fbba32be3867f410 Mon Sep 17 00:00:00 2001
From: Yang Wang <kevinyang.wang@amd.com>
Date: Wed, 25 Feb 2026 22:51:06 -0500
Subject: [PATCH 25/38] drm/amd/pm: remove invalid
 gpu_metrics.energy_accumulator on smu v13.0.x

v1:
The metrics->EnergyAccumulator field has been deprecated on newer pmfw.

v2:
add smu 13.0.0/13.0.7/13.0.10 support.

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 8de9edb35976fa56565dc8fbb5d1310e8e10187c)
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 +++++++-
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index e030f1e186cb..b32c053950c9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2034,6 +2034,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu,
 			smu, SMU_DRIVER_TABLE_GPU_METRICS);
 	SmuMetricsExternal_t metrics_ext;
 	SmuMetrics_t *metrics = &metrics_ext.SmuMetrics;
+	uint32_t mp1_ver = amdgpu_ip_version(smu->adev, MP1_HWIP, 0);
 	int ret = 0;
 
 	ret = smu_cmn_get_metrics_table(smu,
@@ -2058,7 +2059,12 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu,
 					       metrics->Vcn1ActivityPercentage);
 
 	gpu_metrics->average_socket_power = metrics->AverageSocketPower;
-	gpu_metrics->energy_accumulator = metrics->EnergyAccumulator;
+
+	if ((mp1_ver == IP_VERSION(13, 0, 0) && smu->smc_fw_version <= 0x004e1e00) ||
+	    (mp1_ver == IP_VERSION(13, 0, 10) && smu->smc_fw_version <= 0x00500800))
+		gpu_metrics->energy_accumulator = metrics->EnergyAccumulator;
+	else
+		gpu_metrics->energy_accumulator = UINT_MAX;
 
 	if (metrics->AverageGfxActivity <= SMU_13_0_0_BUSY_THRESHOLD)
 		gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index af0482c9caa7..f08cfa510a8a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2065,7 +2065,8 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu,
 					       metrics->Vcn1ActivityPercentage);
 
 	gpu_metrics->average_socket_power = metrics->AverageSocketPower;
-	gpu_metrics->energy_accumulator = metrics->EnergyAccumulator;
+	gpu_metrics->energy_accumulator = smu->smc_fw_version <= 0x00521400 ?
+		metrics->EnergyAccumulator : UINT_MAX;
 
 	if (metrics->AverageGfxActivity <= SMU_13_0_7_BUSY_THRESHOLD)
 		gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs;

From 2c1030f2e84885cc58bffef6af67d5b9d2e7098f Mon Sep 17 00:00:00 2001
From: Alysa Liu <Alysa.Liu@amd.com>
Date: Thu, 5 Feb 2026 11:21:45 -0500
Subject: [PATCH 26/38] drm/amdgpu: Fix use-after-free race in VM acquire

Replace non-atomic vm->process_info assignment with cmpxchg()
to prevent race when parent/child processes sharing a drm_file
both try to acquire the same VM after fork().

Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alysa Liu <Alysa.Liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit c7c573275ec20db05be769288a3e3bb2250ec618)
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 06c1913d5a3f..29b400cdd6d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1439,7 +1439,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 		*process_info = info;
 	}
 
-	vm->process_info = *process_info;
+	if (cmpxchg(&vm->process_info, NULL, *process_info) != NULL) {
+		ret = -EINVAL;
+		goto already_acquired;
+	}
 
 	/* Validate page directory and attach eviction fence */
 	ret = amdgpu_bo_reserve(vm->root.bo, true);
@@ -1479,6 +1482,7 @@ validate_pd_fail:
 	amdgpu_bo_unreserve(vm->root.bo);
 reserve_pd_fail:
 	vm->process_info = NULL;
+already_acquired:
 	if (info) {
 		dma_fence_put(&info->eviction_fence->base);
 		*process_info = NULL;

From a145bbff6f53ab80757a15eba5ad2ba8e3bdc9dc Mon Sep 17 00:00:00 2001
From: sguttula <suresh.guttula@amd.com>
Date: Wed, 25 Feb 2026 13:57:01 +0530
Subject: [PATCH 27/38] drm/amdgpu/psp: Use Indirect access address for GFX to
 PSP mailbox

The reason the RAP is not granting access to 0x58200 is that
a dedicated RSMU slot would have to be spent for this address range,
and MPASP is close to running out of RSMU slots.

This will help to fix PSP TOC load failure during secureboot.
GFX Driver Need to use indirect access for SMN address regs.

Signed-off-by: sguttula <suresh.guttula@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 9b822e26eea3899003aa8a89d5e2c4408e066e20)
---
 drivers/gpu/drm/amd/amdgpu/psp_v15_0.c        | 20 +++++++++----------
 .../include/asic_reg/mp/mp_15_0_0_offset.h    | 18 +++++++++++++++++
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c
index 723ddae17644..73a709773e85 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c
@@ -69,12 +69,12 @@ static int psp_v15_0_0_ring_stop(struct psp_context *psp,
 				   0x80000000, 0x80000000, false);
 	} else {
 		/* Write the ring destroy command*/
-		WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64,
+		WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64,
 			     GFX_CTRL_CMD_ID_DESTROY_RINGS);
 		/* there might be handshake issue with hardware which needs delay */
 		mdelay(20);
 		/* Wait for response flag (bit 31) */
-		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64),
 				   0x80000000, 0x80000000, false);
 	}
 
@@ -116,7 +116,7 @@ static int psp_v15_0_0_ring_create(struct psp_context *psp,
 
 	} else {
 		/* Wait for sOS ready for ring creation */
-		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64),
 				   0x80000000, 0x80000000, false);
 		if (ret) {
 			DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
@@ -125,23 +125,23 @@ static int psp_v15_0_0_ring_create(struct psp_context *psp,
 
 		/* Write low address of the ring to C2PMSG_69 */
 		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
-		WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg);
+		WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_69, psp_ring_reg);
 		/* Write high address of the ring to C2PMSG_70 */
 		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
-		WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg);
+		WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_70, psp_ring_reg);
 		/* Write size of ring to C2PMSG_71 */
 		psp_ring_reg = ring->ring_size;
-		WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg);
+		WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_71, psp_ring_reg);
 		/* Write the ring initialization command to C2PMSG_64 */
 		psp_ring_reg = ring_type;
 		psp_ring_reg = psp_ring_reg << 16;
-		WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg);
+		WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64, psp_ring_reg);
 
 		/* there might be handshake issue with hardware which needs delay */
 		mdelay(20);
 
 		/* Wait for response flag (bit 31) in C2PMSG_64 */
-		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64),
 				   0x80000000, 0x8000FFFF, false);
 	}
 
@@ -174,7 +174,7 @@ static uint32_t psp_v15_0_0_ring_get_wptr(struct psp_context *psp)
 	if (amdgpu_sriov_vf(adev))
 		data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102);
 	else
-		data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67);
+		data = RREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_67);
 
 	return data;
 }
@@ -188,7 +188,7 @@ static void psp_v15_0_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
 		WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
 			     GFX_CTRL_CMD_ID_CONSUME_CMD);
 	} else
-		WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value);
+		WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_67, value);
 }
 
 static const struct psp_funcs psp_v15_0_0_funcs = {
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h
index 0e4c195297a4..fe97943b9b97 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_15_0_0_offset.h
@@ -82,6 +82,24 @@
 #define regMPASP_SMN_IH_SW_INT_CTRL                                                                     0x0142
 #define regMPASP_SMN_IH_SW_INT_CTRL_BASE_IDX                                                            0
 
+// addressBlock: mp_SmuMpASPPub_PcruDec
+// base address: 0x3800000
+#define regMPASP_PCRU1_MPASP_C2PMSG_64                                                   0x4280
+#define regMPASP_PCRU1_MPASP_C2PMSG_64_BASE_IDX                                          3
+#define regMPASP_PCRU1_MPASP_C2PMSG_65                                                   0x4281
+#define regMPASP_PCRU1_MPASP_C2PMSG_65_BASE_IDX                                          3
+#define regMPASP_PCRU1_MPASP_C2PMSG_66                                                   0x4282
+#define regMPASP_PCRU1_MPASP_C2PMSG_66_BASE_IDX                                          3
+#define regMPASP_PCRU1_MPASP_C2PMSG_67                                                   0x4283
+#define regMPASP_PCRU1_MPASP_C2PMSG_67_BASE_IDX                                          3
+#define regMPASP_PCRU1_MPASP_C2PMSG_68                                                   0x4284
+#define regMPASP_PCRU1_MPASP_C2PMSG_68_BASE_IDX                                          3
+#define regMPASP_PCRU1_MPASP_C2PMSG_69                                                   0x4285
+#define regMPASP_PCRU1_MPASP_C2PMSG_69_BASE_IDX                                          3
+#define regMPASP_PCRU1_MPASP_C2PMSG_70                                                   0x4286
+#define regMPASP_PCRU1_MPASP_C2PMSG_70_BASE_IDX                                          3
+#define regMPASP_PCRU1_MPASP_C2PMSG_71                                                   0x4287
+#define regMPASP_PCRU1_MPASP_C2PMSG_71_BASE_IDX                                          3
 
 // addressBlock: mp_SmuMp1_SmnDec
 // base address: 0x0

From 048c1c4e51715ffddd4189745c07f530f34fbe37 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Mon, 23 Feb 2026 12:41:32 +0000
Subject: [PATCH 28/38] drm/amdgpu/userq: Consolidate wait ioctl exit path

If we gate the fence destruction with a check telling us whether there are
valid pointers in there we can eliminate the need for dual, basically
identical, exit paths.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit bea29bb0dd29012949cd44fdb122465a9fd5cf91)
---
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 28 ++++---------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 7e9cf1868cc9..77969a6017a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -983,32 +983,14 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
 			r = -EFAULT;
 			goto free_fences;
 		}
-
-		kfree(fences);
-		kfree(fence_info);
 	}
 
-	drm_exec_fini(&exec);
-	for (i = 0; i < num_read_bo_handles; i++)
-		drm_gem_object_put(gobj_read[i]);
-	kfree(gobj_read);
-
-	for (i = 0; i < num_write_bo_handles; i++)
-		drm_gem_object_put(gobj_write[i]);
-	kfree(gobj_write);
-
-	kfree(timeline_points);
-	kfree(timeline_handles);
-	kfree(syncobj_handles);
-	kfree(bo_handles_write);
-	kfree(bo_handles_read);
-
-	return 0;
-
 free_fences:
-	while (num_fences-- > 0)
-		dma_fence_put(fences[num_fences]);
-	kfree(fences);
+	if (fences) {
+		while (num_fences-- > 0)
+			dma_fence_put(fences[num_fences]);
+		kfree(fences);
+	}
 free_fence_info:
 	kfree(fence_info);
 exec_fini:

From 65b5c326ce4103620c977b8dcb1699bdac4da143 Mon Sep 17 00:00:00 2001
From: Sunil Khatri <sunil.khatri@amd.com>
Date: Mon, 2 Mar 2026 18:50:46 +0530
Subject: [PATCH 29/38] drm/amdgpu/userq: refcount userqueues to avoid any race
 conditions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To avoid race condition and avoid UAF cases, implement kref
based queues and protect the below operations using xa lock
a. Getting a queue from xarray
b. Increment/Decrement it's refcount

Every time some one want to access a queue, always get via
amdgpu_userq_get to make sure we have locks in place and get
the object if active.

A userqueue is destroyed on the last refcount is dropped which
typically would be via IOCTL or during fini.

v2: Add the missing drop in one the condition in the signal ioclt [Alex]

v3: remove the queue from the xarray first in the free queue ioctl path
    [Christian]

- Pass queue to the amdgpu_userq_put directly.
- make amdgpu_userq_put xa_lock free since we are doing put for each get
  only and final put is done via destroy and we remove the queue from xa
  with lock.
- use userq_put in fini too so cleanup is done fully.

v4: Use xa_erase directly rather than doing load and erase in free
    ioctl. Also remove some of the error logs which could be exploited
    by the user to flood the logs [Christian]

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 4952189b284d4d847f92636bb42dd747747129c0)
Cc: <stable@vger.kernel.org> # 048c1c4e5171: drm/amdgpu/userq: Consolidate wait ioctl exit path
Cc: <stable@vger.kernel.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     | 116 ++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |   4 +
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   |  14 ++-
 3 files changed, 95 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 9d67b770bcc2..7c450350847d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -446,8 +446,7 @@ static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
 	return ret;
 }
 
-static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
-				 int queue_id)
+static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue)
 {
 	struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
 	struct amdgpu_device *adev = uq_mgr->adev;
@@ -461,7 +460,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
 	uq_funcs->mqd_destroy(queue);
 	amdgpu_userq_fence_driver_free(queue);
 	/* Use interrupt-safe locking since IRQ handlers may access these XArrays */
-	xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id);
 	xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
 	queue->userq_mgr = NULL;
 	list_del(&queue->userq_va_list);
@@ -470,12 +468,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
 	up_read(&adev->reset_domain->sem);
 }
 
-static struct amdgpu_usermode_queue *
-amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
-{
-	return xa_load(&uq_mgr->userq_xa, qid);
-}
-
 void
 amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
 			     struct amdgpu_eviction_fence_mgr *evf_mgr)
@@ -625,22 +617,13 @@ unref_bo:
 }
 
 static int
-amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
+amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
 {
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
-	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
 	struct amdgpu_device *adev = uq_mgr->adev;
-	struct amdgpu_usermode_queue *queue;
 	int r = 0;
 
 	cancel_delayed_work_sync(&uq_mgr->resume_work);
 	mutex_lock(&uq_mgr->userq_mutex);
-	queue = amdgpu_userq_find(uq_mgr, queue_id);
-	if (!queue) {
-		drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
-		mutex_unlock(&uq_mgr->userq_mutex);
-		return -EINVAL;
-	}
 	amdgpu_userq_wait_for_last_fence(queue);
 	/* Cancel any pending hang detection work and cleanup */
 	if (queue->hang_detect_fence) {
@@ -672,7 +655,7 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
 		drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
 		queue->state = AMDGPU_USERQ_STATE_HUNG;
 	}
-	amdgpu_userq_cleanup(queue, queue_id);
+	amdgpu_userq_cleanup(queue);
 	mutex_unlock(&uq_mgr->userq_mutex);
 
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -680,6 +663,37 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
 	return r;
 }
 
+static void amdgpu_userq_kref_destroy(struct kref *kref)
+{
+	int r;
+	struct amdgpu_usermode_queue *queue =
+		container_of(kref, struct amdgpu_usermode_queue, refcount);
+	struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+
+	r = amdgpu_userq_destroy(uq_mgr, queue);
+	if (r)
+		drm_file_err(uq_mgr->file, "Failed to destroy usermode queue %d\n", r);
+}
+
+struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid)
+{
+	struct amdgpu_usermode_queue *queue;
+
+	xa_lock(&uq_mgr->userq_xa);
+	queue = xa_load(&uq_mgr->userq_xa, qid);
+	if (queue)
+		kref_get(&queue->refcount);
+	xa_unlock(&uq_mgr->userq_xa);
+
+	return queue;
+}
+
+void amdgpu_userq_put(struct amdgpu_usermode_queue *queue)
+{
+	if (queue)
+		kref_put(&queue->refcount, amdgpu_userq_kref_destroy);
+}
+
 static int amdgpu_userq_priority_permit(struct drm_file *filp,
 					int priority)
 {
@@ -834,6 +848,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 		goto unlock;
 	}
 
+	/* drop this refcount during queue destroy */
+	kref_init(&queue->refcount);
+
 	/* Wait for mode-1 reset to complete */
 	down_read(&adev->reset_domain->sem);
 	r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
@@ -985,7 +1002,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *filp)
 {
 	union drm_amdgpu_userq *args = data;
-	int r;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_usermode_queue *queue;
+	int r = 0;
 
 	if (!amdgpu_userq_enabled(dev))
 		return -ENOTSUPP;
@@ -1000,11 +1019,16 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
 			drm_file_err(filp, "Failed to create usermode queue\n");
 		break;
 
-	case AMDGPU_USERQ_OP_FREE:
-		r = amdgpu_userq_destroy(filp, args->in.queue_id);
-		if (r)
-			drm_file_err(filp, "Failed to destroy usermode queue\n");
+	case AMDGPU_USERQ_OP_FREE: {
+		xa_lock(&fpriv->userq_mgr.userq_xa);
+		queue = __xa_erase(&fpriv->userq_mgr.userq_xa, args->in.queue_id);
+		xa_unlock(&fpriv->userq_mgr.userq_xa);
+		if (!queue)
+			return -ENOENT;
+
+		amdgpu_userq_put(queue);
 		break;
+	}
 
 	default:
 		drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
@@ -1023,16 +1047,23 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
 
 	/* Resume all the queues for this process */
 	xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
+		queue = amdgpu_userq_get(uq_mgr, queue_id);
+		if (!queue)
+			continue;
+
 		if (!amdgpu_userq_buffer_vas_mapped(queue)) {
 			drm_file_err(uq_mgr->file,
 				     "trying restore queue without va mapping\n");
 			queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+			amdgpu_userq_put(queue);
 			continue;
 		}
 
 		r = amdgpu_userq_restore_helper(queue);
 		if (r)
 			ret = r;
+
+		amdgpu_userq_put(queue);
 	}
 
 	if (ret)
@@ -1266,9 +1297,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
 	amdgpu_userq_detect_and_reset_queues(uq_mgr);
 	/* Try to unmap all the queues in this process ctx */
 	xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
+		queue = amdgpu_userq_get(uq_mgr, queue_id);
+		if (!queue)
+			continue;
 		r = amdgpu_userq_preempt_helper(queue);
 		if (r)
 			ret = r;
+		amdgpu_userq_put(queue);
 	}
 
 	if (ret)
@@ -1301,16 +1336,24 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
 	int ret;
 
 	xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
+		queue = amdgpu_userq_get(uq_mgr, queue_id);
+		if (!queue)
+			continue;
+
 		struct dma_fence *f = queue->last_fence;
 
-		if (!f || dma_fence_is_signaled(f))
+		if (!f || dma_fence_is_signaled(f)) {
+			amdgpu_userq_put(queue);
 			continue;
+		}
 		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
 		if (ret <= 0) {
 			drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
 				     f->context, f->seqno);
+			amdgpu_userq_put(queue);
 			return -ETIMEDOUT;
 		}
+		amdgpu_userq_put(queue);
 	}
 
 	return 0;
@@ -1361,20 +1404,23 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
 void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
 {
 	struct amdgpu_usermode_queue *queue;
-	unsigned long queue_id;
+	unsigned long queue_id = 0;
 
-	cancel_delayed_work_sync(&userq_mgr->resume_work);
+	for (;;) {
+		xa_lock(&userq_mgr->userq_xa);
+		queue = xa_find(&userq_mgr->userq_xa, &queue_id, ULONG_MAX,
+				XA_PRESENT);
+		if (queue)
+			__xa_erase(&userq_mgr->userq_xa, queue_id);
+		xa_unlock(&userq_mgr->userq_xa);
 
-	mutex_lock(&userq_mgr->userq_mutex);
-	amdgpu_userq_detect_and_reset_queues(userq_mgr);
-	xa_for_each(&userq_mgr->userq_xa, queue_id, queue) {
-		amdgpu_userq_wait_for_last_fence(queue);
-		amdgpu_userq_unmap_helper(queue);
-		amdgpu_userq_cleanup(queue, queue_id);
+		if (!queue)
+			break;
+
+		amdgpu_userq_put(queue);
 	}
 
 	xa_destroy(&userq_mgr->userq_xa);
-	mutex_unlock(&userq_mgr->userq_mutex);
 	mutex_destroy(&userq_mgr->userq_mutex);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 5845d8959034..736c1d38297c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -74,6 +74,7 @@ struct amdgpu_usermode_queue {
 	struct dentry		*debugfs_queue;
 	struct delayed_work hang_detect_work;
 	struct dma_fence *hang_detect_fence;
+	struct kref		refcount;
 
 	struct list_head	userq_va_list;
 };
@@ -112,6 +113,9 @@ struct amdgpu_db_info {
 	struct amdgpu_userq_obj	*db_obj;
 };
 
+struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid);
+void amdgpu_userq_put(struct amdgpu_usermode_queue *queue);
+
 int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 
 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 77969a6017a4..5239b06b9ab0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -466,7 +466,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 	struct drm_amdgpu_userq_signal *args = data;
 	struct drm_gem_object **gobj_write = NULL;
 	struct drm_gem_object **gobj_read = NULL;
-	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_usermode_queue *queue = NULL;
 	struct amdgpu_userq_fence *userq_fence;
 	struct drm_syncobj **syncobj = NULL;
 	u32 *bo_handles_write, num_write_bo_handles;
@@ -553,7 +553,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 	}
 
 	/* Retrieve the user queue */
-	queue = xa_load(&userq_mgr->userq_xa, args->queue_id);
+	queue = amdgpu_userq_get(userq_mgr, args->queue_id);
 	if (!queue) {
 		r = -ENOENT;
 		goto put_gobj_write;
@@ -648,6 +648,9 @@ free_syncobj:
 free_syncobj_handles:
 	kfree(syncobj_handles);
 
+	if (queue)
+		amdgpu_userq_put(queue);
+
 	return r;
 }
 
@@ -660,7 +663,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
 	struct drm_amdgpu_userq_wait *wait_info = data;
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
-	struct amdgpu_usermode_queue *waitq;
+	struct amdgpu_usermode_queue *waitq = NULL;
 	struct drm_gem_object **gobj_write;
 	struct drm_gem_object **gobj_read;
 	struct dma_fence **fences = NULL;
@@ -926,7 +929,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
 		 */
 		num_fences = dma_fence_dedup_array(fences, num_fences);
 
-		waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id);
+		waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id);
 		if (!waitq) {
 			r = -EINVAL;
 			goto free_fences;
@@ -1014,5 +1017,8 @@ free_bo_handles_write:
 free_bo_handles_read:
 	kfree(bo_handles_read);
 
+	if (waitq)
+		amdgpu_userq_put(waitq);
+
 	return r;
 }

From f82859c84a9862e242c904e72fb0625fef9b24e7 Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Wed, 4 Mar 2026 11:50:12 -0800
Subject: [PATCH 30/38] accel/amdxdna: Fix major version check on NPU1 platform

Add the missing major number in npu1_fw_feature_table.

Without the major version specified, the firmware feature check fails,
preventing new firmware commands from being enabled on the NPU1
platform.

With the correct major version populated, the driver properly detects
firmware support and enables the new command.

Fixes: f1eac46fe5f7 ("accel/amdxdna: Update firmware version check for latest firmware")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260304195012.3616908-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/npu1_regs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index 6e3d3ca69c04..1320e924e548 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -67,7 +67,7 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
 
 static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
 	{ .major = 5, .min_minor = 7 },
-	{ .features = BIT_U64(AIE2_NPU_COMMAND), .min_minor = 8 },
+	{ .features = BIT_U64(AIE2_NPU_COMMAND), .major = 5, .min_minor = 8 },
 	{ 0 }
 };
 

From 8f3c6f08ababad2e3bdd239728cf66a9949446b4 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 24 Feb 2026 13:17:50 +1000
Subject: [PATCH 31/38] nouveau/dpcd: return EBUSY for aux xfer if the device
 is asleep

If we have runtime suspended, and userspace wants to use /dev/drm_dp_*
then just tell it the device is busy instead of crashing in the GSP
code.

WARNING: CPU: 2 PID: 565741 at drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c:164 r535_gsp_msgq_wait+0x9a/0xb0 [nouveau]
CPU: 2 UID: 0 PID: 565741 Comm: fwupd Not tainted 6.18.10-200.fc43.x86_64 #1 PREEMPT(lazy)
Hardware name: LENOVO 20QTS0PQ00/20QTS0PQ00, BIOS N2OET65W (1.52 ) 08/05/2024
RIP: 0010:r535_gsp_msgq_wait+0x9a/0xb0 [nouveau]

This is a simple fix to get backported. We should probably engineer a
proper power domain solution to wake up devices and keep them awake
while fw updates are happening.

Cc: stable@vger.kernel.org
Fixes: 8894f4919bc4 ("drm/nouveau: register a drm_dp_aux channel for each dp connector")
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patch.msgid.link/20260224031750.791621-1-airlied@gmail.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/gpu/drm/nouveau/nouveau_connector.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 00d4530aea71..cc239492c7f0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -1230,6 +1230,9 @@ nouveau_connector_aux_xfer(struct drm_dp_aux *obj, struct drm_dp_aux_msg *msg)
 	u8 size = msg->size;
 	int ret;
 
+	if (pm_runtime_suspended(nv_connector->base.dev->dev))
+		return -EBUSY;
+
 	nv_encoder = find_encoder(&nv_connector->base, DCB_OUTPUT_DP);
 	if (!nv_encoder)
 		return -ENODEV;

From 91d7e9df42598af28ca440b95b16a4e51a408771 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Date: Wed, 4 Mar 2026 20:27:52 -0300
Subject: [PATCH 32/38] drm/ttm: Fix bo resource use-after-free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When allocating a lot of buffers and putting the TTM under memory pressure,
during swapout, it might crash the system with the stack trace below.

It turns out that ttm_bo_swapout_cb might replace bo->resource when it
moves it to system cached.

When commit c06da4b3573a ("drm/ttm: Tidy usage of local variables a little
bit") used a local variable for bo->resource, it used the freed resource
later in the function, leading to a UAF.

Move back to using bo->resource in all cases in that function instead of a
local variable.

[  604.814275] BUG: kernel NULL pointer dereference, address: 0000000000000000
[  604.814284] #PF: supervisor read access in kernel mode
[  604.814288] #PF: error_code(0x0000) - not-present page
[  604.814291] PGD 0 P4D 0
[  604.814296] Oops: Oops: 0000 [#1] SMP NOPTI
[  604.814303] CPU: 2 UID: 0 PID: 4408 Comm: vulkan Tainted: G        W           7.0.0-rc2-00001-gc50a051e6aca #21 PREEMPT(full)  aef6eb0c02036a7c8a5e62e0c84a30c2be90688d
[  604.814309] Tainted: [W]=WARN
[  604.814311] Hardware name: Valve Jupiter/Jupiter, BIOS F7A0133 08/05/2024
[  604.814314] RIP: 0010:ttm_resource_move_to_lru_tail+0x100/0x160 [ttm]
[  604.814329] Code: 5b 5d e9 83 b4 1b cb 48 63 d2 48 c1 e0 04 48 8b 4e 40 48 8d 7e 40 48 8b ac d3 d8 00 00 00 48 89 c3 48 8d 54 05 68 48 8b 46 48 <48> 3b 38 0f 85 b3 3b 00 00 48 3b 79 08 0f 85 a9 3b 00 00 48 89 41
[  604.814332] RSP: 0018:ffffcfe54e3d7578 EFLAGS: 00010256
[  604.814336] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8cf09eced300
[  604.814339] RDX: 0000000000000068 RSI: ffff8cf1d4c1fc00 RDI: ffff8cf1d4c1fc40
[  604.814341] RBP: 0000000000000000 R08: ffff8cf09eced300 R09: 0000000000000000
[  604.814344] R10: 0000000000000000 R11: 0000000000000016 R12: ffff8cf1d4c1fc00
[  604.814346] R13: 0000000000000400 R14: ffff8cf096289c00 R15: ffff8cf084c8f688
[  604.814349] FS:  00007f00531b7780(0000) GS:ffff8cf4217a0000(0000) knlGS:0000000000000000
[  604.814352] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  604.814355] CR2: 0000000000000000 CR3: 000000018e3df000 CR4: 0000000000350ef0
[  604.814358] Call Trace:
[  604.814362]  <TASK>
[  604.814368]  ttm_bo_swapout_cb+0x24c/0x280 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814380]  ttm_lru_walk_for_evict+0xac/0x1d0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814394]  ttm_bo_swapout+0x5b/0x80 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814405]  ttm_global_swapout+0x63/0x100 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814415]  ttm_tt_populate+0x82/0x130 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814424]  ttm_bo_populate+0x37/0xa0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814433]  ttm_bo_handle_move_mem+0x157/0x170 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814443]  ttm_bo_validate+0xd9/0x180 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814453]  ttm_bo_init_reserved+0xa0/0x1b0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.814461]  ? srso_return_thunk+0x5/0x5f
[  604.814469]  amdgpu_bo_create+0x1f5/0x500 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.814855]  ? __pfx_amdgpu_bo_user_destroy+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.815182]  amdgpu_bo_create_user+0x3d/0x70 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.815504]  amdgpu_gem_create_ioctl+0x16c/0x3b0 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.815830]  ? __pfx_amdgpu_bo_user_destroy+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.816155]  ? __pfx_amdgpu_gem_create_ioctl+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.816478]  drm_ioctl_kernel+0xae/0x100
[  604.816486]  drm_ioctl+0x283/0x510
[  604.816491]  ? __pfx_amdgpu_gem_create_ioctl+0x10/0x10 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.816819]  amdgpu_drm_ioctl+0x4a/0x80 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.817135]  __x64_sys_ioctl+0x96/0xe0
[  604.817142]  do_syscall_64+0x11b/0x7e0
[  604.817148]  ? srso_return_thunk+0x5/0x5f
[  604.817152]  ? srso_return_thunk+0x5/0x5f
[  604.817156]  ? walk_system_ram_range+0xb0/0x110
[  604.817161]  ? srso_return_thunk+0x5/0x5f
[  604.817165]  ? __pte_offset_map+0x1b/0xb0
[  604.817170]  ? srso_return_thunk+0x5/0x5f
[  604.817174]  ? pte_offset_map_lock+0x87/0xf0
[  604.817179]  ? srso_return_thunk+0x5/0x5f
[  604.817183]  ? insert_pfn+0x9f/0x1f0
[  604.817188]  ? srso_return_thunk+0x5/0x5f
[  604.817192]  ? vmf_insert_pfn_prot+0x97/0x190
[  604.817197]  ? srso_return_thunk+0x5/0x5f
[  604.817201]  ? ttm_bo_vm_fault_reserved+0x1a6/0x3f0 [ttm a469cf7fcb6737fdcf3fb5cdbcc8b1ca41f3e302]
[  604.817213]  ? srso_return_thunk+0x5/0x5f
[  604.817217]  ? amdgpu_gem_fault+0xe2/0x100 [amdgpu 361516226706227f4403914dbfdd3f90996136ca]
[  604.817542]  ? srso_return_thunk+0x5/0x5f
[  604.817546]  ? __do_fault+0x33/0x180
[  604.817550]  ? srso_return_thunk+0x5/0x5f
[  604.817554]  ? do_fault+0x178/0x610
[  604.817559]  ? srso_return_thunk+0x5/0x5f
[  604.817562]  ? __handle_mm_fault+0x9be/0x1120
[  604.817567]  ? srso_return_thunk+0x5/0x5f
[  604.817574]  ? srso_return_thunk+0x5/0x5f
[  604.817578]  ? count_memcg_events+0xc4/0x160
[  604.817583]  ? srso_return_thunk+0x5/0x5f
[  604.817587]  ? handle_mm_fault+0x1d7/0x2e0
[  604.817593]  ? srso_return_thunk+0x5/0x5f
[  604.817596]  ? do_user_addr_fault+0x173/0x660
[  604.817602]  ? srso_return_thunk+0x5/0x5f
[  604.817607]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  604.817612] RIP: 0033:0x7f00532cef4d
[  604.817617] Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00
[  604.817620] RSP: 002b:00007ffd69ab0650 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  604.817624] RAX: ffffffffffffffda RBX: 00007ffd69ab07d0 RCX: 00007f00532cef4d
[  604.817627] RDX: 00007ffd69ab0700 RSI: 00000000c0206440 RDI: 0000000000000005
[  604.817629] RBP: 00007ffd69ab06a0 R08: 00007f00533a0ac0 R09: 0000000000000000
[  604.817632] R10: 00007ffd69ab07c0 R11: 0000000000000246 R12: 00007ffd69ab0700
[  604.817634] R13: 00000000c0206440 R14: 0000000000000005 R15: 0000000000000243
[  604.817642]  </TASK>

Cc: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Fixes: c06da4b3573a ("drm/ttm: Tidy usage of local variables a little bit")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
Link: https://lore.kernel.org/r/20260304-ttm_bo_res_uaf-v1-1-43f20125b67f@igalia.com
---
 drivers/gpu/drm/ttm/ttm_bo.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index acb9197db879..0765d69423d2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1107,8 +1107,7 @@ struct ttm_bo_swapout_walk {
 static s64
 ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 {
-	struct ttm_resource *res = bo->resource;
-	struct ttm_place place = { .mem_type = res->mem_type };
+	struct ttm_place place = { .mem_type = bo->resource->mem_type };
 	struct ttm_bo_swapout_walk *swapout_walk =
 		container_of(walk, typeof(*swapout_walk), walk);
 	struct ttm_operation_ctx *ctx = walk->arg.ctx;
@@ -1148,7 +1147,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 	/*
 	 * Move to system cached
 	 */
-	if (res->mem_type != TTM_PL_SYSTEM) {
+	if (bo->resource->mem_type != TTM_PL_SYSTEM) {
 		struct ttm_resource *evict_mem;
 		struct ttm_place hop;
 
@@ -1180,15 +1179,15 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
 
 	if (ttm_tt_is_populated(tt)) {
 		spin_lock(&bdev->lru_lock);
-		ttm_resource_del_bulk_move(res, bo);
+		ttm_resource_del_bulk_move(bo->resource, bo);
 		spin_unlock(&bdev->lru_lock);
 
 		ret = ttm_tt_swapout(bdev, tt, swapout_walk->gfp_flags);
 
 		spin_lock(&bdev->lru_lock);
 		if (ret)
-			ttm_resource_add_bulk_move(res, bo);
-		ttm_resource_move_to_lru_tail(res);
+			ttm_resource_add_bulk_move(bo->resource, bo);
+		ttm_resource_move_to_lru_tail(bo->resource);
 		spin_unlock(&bdev->lru_lock);
 	}
 

From ee8ade4d9678a456bb5ea675c270738b250eda68 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <dev@lankhorst.se>
Date: Thu, 5 Mar 2026 12:37:34 +0100
Subject: [PATCH 33/38] Revert "drm/syncobj: Fix handle <-> fd ioctls with
 dirty stack"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 2e3649e237237258a08d75afef96648dd2b379f7.

The problem occurs when userspace is compiled against new headers
with new members, but don't correctly initialise those new members.

This is not a kernel problem, and should be fixed in userspace by
correctly zero'ing all members.

Cc: Rob Clark <robdclark@chromium.org>
Cc: Julian Orth <ju.orth@gmail.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Michel Dänzer <michel.daenzer@mailbox.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Julian Orth <ju.orth@gmail.com>
Link: https://patch.msgid.link/20260305113734.1309238-1-dev@lankhorst.se
Signed-off-by: Maarten Lankhorst <dev@lankhorst.se>
---
 drivers/gpu/drm/drm_syncobj.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 49eccb43ce63..250734dee928 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -875,7 +875,7 @@ drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 		return drm_syncobj_export_sync_file(file_private, args->handle,
 						    point, &args->fd);
 
-	if (point)
+	if (args->point)
 		return -EINVAL;
 
 	return drm_syncobj_handle_to_fd(file_private, args->handle,
@@ -909,7 +909,7 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 							  args->handle,
 							  point);
 
-	if (point)
+	if (args->point)
 		return -EINVAL;
 
 	return drm_syncobj_fd_to_handle(file_private, args->fd,

From 76e8173ba92e15eeb0421b7cdbaef20513193b51 Mon Sep 17 00:00:00 2001
From: Akash Goel <akash.goel@arm.com>
Date: Thu, 5 Mar 2026 11:07:23 +0000
Subject: [PATCH 34/38] drm/panthor: Correct the order of arguments passed to
 gem_sync

This commit corrects the order of arguments passed to panthor_gem_sync()
function, called when the SYNC_WAIT condition has to be evaluated for a
blocked GPU queue.

Fixes: cd2c9c3015e6 ("drm/panthor: Add flag to map GEM object Write-Back Cacheable")
Signed-off-by: Akash Goel <akash.goel@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patch.msgid.link/20260305110723.2871733-1-akash.goel@arm.com
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 drivers/gpu/drm/panthor/panthor_sched.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index bd703a2904a1..a70f1db0764e 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -893,14 +893,15 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
 
 out_sync:
 	/* Make sure the CPU caches are invalidated before the seqno is read.
-	 * drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check
+	 * panthor_gem_sync() is a NOP if map_wc=true, so no need to check
 	 * it here.
 	 */
-	panthor_gem_sync(&bo->base.base, queue->syncwait.offset,
+	panthor_gem_sync(&bo->base.base,
+			 DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE,
+			 queue->syncwait.offset,
 			 queue->syncwait.sync64 ?
 			 sizeof(struct panthor_syncobj_64b) :
-			 sizeof(struct panthor_syncobj_32b),
-			 DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE);
+			 sizeof(struct panthor_syncobj_32b));
 
 	return queue->syncwait.kmap + queue->syncwait.offset;
 

From d5b8b0347fa8470b751a506fb801797e271d7548 Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@amd.com>
Date: Wed, 4 Mar 2026 22:20:41 -0800
Subject: [PATCH 35/38] accel/amdxdna: Split mailbox channel create function

The management channel used for firmware control command submission is
currently created after the firmware is started. If channel creation
fails (for example, due to memory allocation failure or workqueue
creation interruption), the firmware remains in a pending state and is
unable to receive any control commands.

To avoid leaving the firmware in this inconsistent state, split
xdna_mailbox_create_channel() into two separate functions so that
resource allocation can be completed before interacting with the
hardware.
  xdna_mailbox_alloc_channel()
    Allocates memory and initializes the workqueue. This can be called
    earlier, before interacting with the hardware.
  xdna_mailbox_start_channel()
    Performs the hardware interaction required to start the channel.

Rename xdna_mailbox_destroy_channel() to xdna_mailbox_free_channel().
Ensure that xdna_mailbox_stop_channel() and xdna_mailbox_free_channel()
properly unwind the corresponding start and allocation steps, respectively.

Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260305062041.3954024-1-lizhi.hou@amd.com
---
 drivers/accel/amdxdna/aie2_message.c    | 17 ++++-
 drivers/accel/amdxdna/aie2_pci.c        | 63 +++++++++-------
 drivers/accel/amdxdna/amdxdna_mailbox.c | 99 ++++++++++++-------------
 drivers/accel/amdxdna/amdxdna_mailbox.h | 31 ++++----
 4 files changed, 116 insertions(+), 94 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 22e1a85a7ae0..ffcf3be79e23 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -293,13 +293,20 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
 	}
 
 	intr_reg = i2x.mb_head_ptr_reg + 4;
-	hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
-							      intr_reg, ret);
+	hwctx->priv->mbox_chann = xdna_mailbox_alloc_channel(ndev->mbox);
 	if (!hwctx->priv->mbox_chann) {
 		XDNA_ERR(xdna, "Not able to create channel");
 		ret = -EINVAL;
 		goto del_ctx_req;
 	}
+
+	ret = xdna_mailbox_start_channel(hwctx->priv->mbox_chann, &x2i, &i2x,
+					 intr_reg, ret);
+	if (ret) {
+		XDNA_ERR(xdna, "Not able to create channel");
+		ret = -EINVAL;
+		goto free_channel;
+	}
 	ndev->hwctx_num++;
 
 	XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id);
@@ -307,6 +314,8 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
 
 	return 0;
 
+free_channel:
+	xdna_mailbox_free_channel(hwctx->priv->mbox_chann);
 del_ctx_req:
 	aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
 	return ret;
@@ -322,7 +331,7 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc
 
 	xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
 	ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
-	xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
+	xdna_mailbox_free_channel(hwctx->priv->mbox_chann);
 	XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id);
 	hwctx->priv->mbox_chann = NULL;
 	hwctx->fw_ctx_id = -1;
@@ -921,7 +930,7 @@ void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
 		return;
 
 	xdna_mailbox_stop_channel(ndev->mgmt_chann);
-	xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+	xdna_mailbox_free_channel(ndev->mgmt_chann);
 	ndev->mgmt_chann = NULL;
 }
 
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 977ce21eaf9f..4924a9da55b6 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -361,10 +361,29 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
 	}
 	pci_set_master(pdev);
 
+	mbox_res.ringbuf_base = ndev->sram_base;
+	mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
+	mbox_res.mbox_base = ndev->mbox_base;
+	mbox_res.mbox_size = MBOX_SIZE(ndev);
+	mbox_res.name = "xdna_mailbox";
+	ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
+	if (!ndev->mbox) {
+		XDNA_ERR(xdna, "failed to create mailbox device");
+		ret = -ENODEV;
+		goto disable_dev;
+	}
+
+	ndev->mgmt_chann = xdna_mailbox_alloc_channel(ndev->mbox);
+	if (!ndev->mgmt_chann) {
+		XDNA_ERR(xdna, "failed to alloc channel");
+		ret = -ENODEV;
+		goto disable_dev;
+	}
+
 	ret = aie2_smu_init(ndev);
 	if (ret) {
 		XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
-		goto disable_dev;
+		goto free_channel;
 	}
 
 	ret = aie2_psp_start(ndev->psp_hdl);
@@ -379,18 +398,6 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
 		goto stop_psp;
 	}
 
-	mbox_res.ringbuf_base = ndev->sram_base;
-	mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
-	mbox_res.mbox_base = ndev->mbox_base;
-	mbox_res.mbox_size = MBOX_SIZE(ndev);
-	mbox_res.name = "xdna_mailbox";
-	ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
-	if (!ndev->mbox) {
-		XDNA_ERR(xdna, "failed to create mailbox device");
-		ret = -ENODEV;
-		goto stop_psp;
-	}
-
 	mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx);
 	if (mgmt_mb_irq < 0) {
 		ret = mgmt_mb_irq;
@@ -399,13 +406,13 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
 	}
 
 	xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4;
-	ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox,
-						       &ndev->mgmt_x2i,
-						       &ndev->mgmt_i2x,
-						       xdna_mailbox_intr_reg,
-						       mgmt_mb_irq);
-	if (!ndev->mgmt_chann) {
-		XDNA_ERR(xdna, "failed to create management mailbox channel");
+	ret = xdna_mailbox_start_channel(ndev->mgmt_chann,
+					 &ndev->mgmt_x2i,
+					 &ndev->mgmt_i2x,
+					 xdna_mailbox_intr_reg,
+					 mgmt_mb_irq);
+	if (ret) {
+		XDNA_ERR(xdna, "failed to start management mailbox channel");
 		ret = -EINVAL;
 		goto stop_psp;
 	}
@@ -413,37 +420,41 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
 	ret = aie2_mgmt_fw_init(ndev);
 	if (ret) {
 		XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
-		goto destroy_mgmt_chann;
+		goto stop_fw;
 	}
 
 	ret = aie2_pm_init(ndev);
 	if (ret) {
 		XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
-		goto destroy_mgmt_chann;
+		goto stop_fw;
 	}
 
 	ret = aie2_mgmt_fw_query(ndev);
 	if (ret) {
 		XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
-		goto destroy_mgmt_chann;
+		goto stop_fw;
 	}
 
 	ret = aie2_error_async_events_alloc(ndev);
 	if (ret) {
 		XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
-		goto destroy_mgmt_chann;
+		goto stop_fw;
 	}
 
 	ndev->dev_status = AIE2_DEV_START;
 
 	return 0;
 
-destroy_mgmt_chann:
-	aie2_destroy_mgmt_chann(ndev);
+stop_fw:
+	aie2_suspend_fw(ndev);
+	xdna_mailbox_stop_channel(ndev->mgmt_chann);
 stop_psp:
 	aie2_psp_stop(ndev->psp_hdl);
 fini_smu:
 	aie2_smu_fini(ndev);
+free_channel:
+	xdna_mailbox_free_channel(ndev->mgmt_chann);
+	ndev->mgmt_chann = NULL;
 disable_dev:
 	pci_disable_device(pdev);
 
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index 235a94047530..46d844a73a94 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -460,26 +460,49 @@ msg_id_failed:
 	return ret;
 }
 
-struct mailbox_channel *
-xdna_mailbox_create_channel(struct mailbox *mb,
-			    const struct xdna_mailbox_chann_res *x2i,
-			    const struct xdna_mailbox_chann_res *i2x,
-			    u32 iohub_int_addr,
-			    int mb_irq)
+struct mailbox_channel *xdna_mailbox_alloc_channel(struct mailbox *mb)
 {
 	struct mailbox_channel *mb_chann;
-	int ret;
-
-	if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) {
-		pr_err("Ring buf size must be power of 2");
-		return NULL;
-	}
 
 	mb_chann = kzalloc_obj(*mb_chann);
 	if (!mb_chann)
 		return NULL;
 
+	INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker);
+	mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME);
+	if (!mb_chann->work_q) {
+		MB_ERR(mb_chann, "Create workqueue failed");
+		goto free_chann;
+	}
 	mb_chann->mb = mb;
+
+	return mb_chann;
+
+free_chann:
+	kfree(mb_chann);
+	return NULL;
+}
+
+void xdna_mailbox_free_channel(struct mailbox_channel *mb_chann)
+{
+	destroy_workqueue(mb_chann->work_q);
+	kfree(mb_chann);
+}
+
+int
+xdna_mailbox_start_channel(struct mailbox_channel *mb_chann,
+			   const struct xdna_mailbox_chann_res *x2i,
+			   const struct xdna_mailbox_chann_res *i2x,
+			   u32 iohub_int_addr,
+			   int mb_irq)
+{
+	int ret;
+
+	if (!is_power_of_2(x2i->rb_size) || !is_power_of_2(i2x->rb_size)) {
+		pr_err("Ring buf size must be power of 2");
+		return -EINVAL;
+	}
+
 	mb_chann->msix_irq = mb_irq;
 	mb_chann->iohub_int_addr = iohub_int_addr;
 	memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i));
@@ -489,61 +512,37 @@ xdna_mailbox_create_channel(struct mailbox *mb,
 	mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I);
 	mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X);
 
-	INIT_WORK(&mb_chann->rx_work, mailbox_rx_worker);
-	mb_chann->work_q = create_singlethread_workqueue(MAILBOX_NAME);
-	if (!mb_chann->work_q) {
-		MB_ERR(mb_chann, "Create workqueue failed");
-		goto free_and_out;
-	}
-
 	/* Everything look good. Time to enable irq handler */
 	ret = request_irq(mb_irq, mailbox_irq_handler, 0, MAILBOX_NAME, mb_chann);
 	if (ret) {
 		MB_ERR(mb_chann, "Failed to request irq %d ret %d", mb_irq, ret);
-		goto destroy_wq;
+		return ret;
 	}
 
 	mb_chann->bad_state = false;
 	mailbox_reg_write(mb_chann, mb_chann->iohub_int_addr, 0);
 
-	MB_DBG(mb_chann, "Mailbox channel created (irq: %d)", mb_chann->msix_irq);
-	return mb_chann;
-
-destroy_wq:
-	destroy_workqueue(mb_chann->work_q);
-free_and_out:
-	kfree(mb_chann);
-	return NULL;
-}
-
-int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
-{
-	struct mailbox_msg *mb_msg;
-	unsigned long msg_id;
-
-	MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
-	free_irq(mb_chann->msix_irq, mb_chann);
-	destroy_workqueue(mb_chann->work_q);
-	/* We can clean up and release resources */
-
-	xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg)
-		mailbox_release_msg(mb_chann, mb_msg);
-
-	xa_destroy(&mb_chann->chan_xa);
-
-	MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq);
-	kfree(mb_chann);
+	MB_DBG(mb_chann, "Mailbox channel started (irq: %d)", mb_chann->msix_irq);
 	return 0;
 }
 
 void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann)
 {
+	struct mailbox_msg *mb_msg;
+	unsigned long msg_id;
+
 	/* Disable an irq and wait. This might sleep. */
-	disable_irq(mb_chann->msix_irq);
+	free_irq(mb_chann->msix_irq, mb_chann);
 
 	/* Cancel RX work and wait for it to finish */
-	cancel_work_sync(&mb_chann->rx_work);
-	MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
+	drain_workqueue(mb_chann->work_q);
+
+	/* We can clean up and release resources */
+	xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg)
+		mailbox_release_msg(mb_chann, mb_msg);
+	xa_destroy(&mb_chann->chan_xa);
+
+	MB_DBG(mb_chann, "Mailbox channel stopped, irq: %d", mb_chann->msix_irq);
 }
 
 struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.h b/drivers/accel/amdxdna/amdxdna_mailbox.h
index ea367f2fb738..8b1e00945da4 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.h
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.h
@@ -74,9 +74,16 @@ struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
 				     const struct xdna_mailbox_res *res);
 
 /*
- * xdna_mailbox_create_channel() -- Create a mailbox channel instance
+ * xdna_mailbox_alloc_channel() -- alloc a mailbox channel
  *
- * @mailbox: the handle return from xdna_mailbox_create()
+ * @mb: mailbox handle
+ */
+struct mailbox_channel *xdna_mailbox_alloc_channel(struct mailbox *mb);
+
+/*
+ * xdna_mailbox_start_channel() -- start a mailbox channel instance
+ *
+ * @mb_chann: the handle return from xdna_mailbox_alloc_channel()
  * @x2i: host to firmware mailbox resources
  * @i2x: firmware to host mailbox resources
  * @xdna_mailbox_intr_reg: register addr of MSI-X interrupt
@@ -84,28 +91,24 @@ struct mailbox *xdnam_mailbox_create(struct drm_device *ddev,
  *
  * Return: If success, return a handle of mailbox channel. Otherwise, return NULL.
  */
-struct mailbox_channel *
-xdna_mailbox_create_channel(struct mailbox *mailbox,
-			    const struct xdna_mailbox_chann_res *x2i,
-			    const struct xdna_mailbox_chann_res *i2x,
-			    u32 xdna_mailbox_intr_reg,
-			    int mb_irq);
+int
+xdna_mailbox_start_channel(struct mailbox_channel *mb_chann,
+			   const struct xdna_mailbox_chann_res *x2i,
+			   const struct xdna_mailbox_chann_res *i2x,
+			   u32 xdna_mailbox_intr_reg,
+			   int mb_irq);
 
 /*
- * xdna_mailbox_destroy_channel() -- destroy mailbox channel
+ * xdna_mailbox_free_channel() -- free mailbox channel
  *
  * @mailbox_chann: the handle return from xdna_mailbox_create_channel()
- *
- * Return: if success, return 0. otherwise return error code
  */
-int xdna_mailbox_destroy_channel(struct mailbox_channel *mailbox_chann);
+void xdna_mailbox_free_channel(struct mailbox_channel *mailbox_chann);
 
 /*
  * xdna_mailbox_stop_channel() -- stop mailbox channel
  *
  * @mailbox_chann: the handle return from xdna_mailbox_create_channel()
- *
- * Return: if success, return 0. otherwise return error code
  */
 void xdna_mailbox_stop_channel(struct mailbox_channel *mailbox_chann);
 

From 150bceb3e0a4a30950279d91ea0e8cc69a736742 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 18 Feb 2026 16:21:55 -0600
Subject: [PATCH 36/38] accel: ethosu: Fix job submit error clean-up refcount
 underflows

If the job submit fails before adding the job to the scheduler queue
such as when the GEM buffer bounds checks fail, then doing a
ethosu_job_put() results in a pm_runtime_put_autosuspend() without the
corresponding pm_runtime_resume_and_get(). The dma_fence_put()'s are
also unnecessary, but seem to be harmless.

Split the ethosu_job_cleanup() function into 2 parts for the before
and after the job is queued.

Fixes: 5a5e9c0228e6 ("accel: Add Arm Ethos-U NPU driver")
Reviewed-and-Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://patch.msgid.link/20260218-ethos-fixes-v1-1-be3fa3ea9a30@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/accel/ethosu/ethosu_job.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/accel/ethosu/ethosu_job.c b/drivers/accel/ethosu/ethosu_job.c
index 8598a3634340..ec85f4156744 100644
--- a/drivers/accel/ethosu/ethosu_job.c
+++ b/drivers/accel/ethosu/ethosu_job.c
@@ -143,17 +143,10 @@ out:
 	return ret;
 }
 
-static void ethosu_job_cleanup(struct kref *ref)
+static void ethosu_job_err_cleanup(struct ethosu_job *job)
 {
-	struct ethosu_job *job = container_of(ref, struct ethosu_job,
-						refcount);
 	unsigned int i;
 
-	pm_runtime_put_autosuspend(job->dev->base.dev);
-
-	dma_fence_put(job->done_fence);
-	dma_fence_put(job->inference_done_fence);
-
 	for (i = 0; i < job->region_cnt; i++)
 		drm_gem_object_put(job->region_bo[i]);
 
@@ -162,6 +155,19 @@ static void ethosu_job_cleanup(struct kref *ref)
 	kfree(job);
 }
 
+static void ethosu_job_cleanup(struct kref *ref)
+{
+	struct ethosu_job *job = container_of(ref, struct ethosu_job,
+						refcount);
+
+	pm_runtime_put_autosuspend(job->dev->base.dev);
+
+	dma_fence_put(job->done_fence);
+	dma_fence_put(job->inference_done_fence);
+
+	ethosu_job_err_cleanup(job);
+}
+
 static void ethosu_job_put(struct ethosu_job *job)
 {
 	kref_put(&job->refcount, ethosu_job_cleanup);
@@ -454,12 +460,16 @@ static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file
 		}
 	}
 	ret = ethosu_job_push(ejob);
+	if (!ret) {
+		ethosu_job_put(ejob);
+		return 0;
+	}
 
 out_cleanup_job:
 	if (ret)
 		drm_sched_job_cleanup(&ejob->base);
 out_put_job:
-	ethosu_job_put(ejob);
+	ethosu_job_err_cleanup(ejob);
 
 	return ret;
 }

From 838ae99f9a77a5724ee6d4e7b7b1eb079147f888 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 18 Feb 2026 16:21:56 -0600
Subject: [PATCH 37/38] accel: ethosu: Fix NPU_OP_ELEMENTWISE validation with
 scalar

The NPU_OP_ELEMENTWISE instruction uses a scalar value for IFM2 if the
IFM2_BROADCAST "scalar" mode is set. It is a bit (7) on the u65 and
part of a field (bits 3:0) on the u85. The driver was hardcoded to the
u85.

Fixes: 5a5e9c0228e6 ("accel: Add Arm Ethos-U NPU driver")
Reviewed-and-Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://patch.msgid.link/20260218-ethos-fixes-v1-2-be3fa3ea9a30@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/accel/ethosu/ethosu_gem.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c
index 668c71d5ff45..ff5e58944138 100644
--- a/drivers/accel/ethosu/ethosu_gem.c
+++ b/drivers/accel/ethosu/ethosu_gem.c
@@ -417,7 +417,10 @@ static int ethosu_gem_cmdstream_copy_and_validate(struct drm_device *ddev,
 				return ret;
 			break;
 		case NPU_OP_ELEMENTWISE:
-			use_ifm2 = !((st.ifm2.broadcast == 8) || (param == 5) ||
+			use_scale = ethosu_is_u65(edev) ?
+				    (st.ifm2.broadcast & 0x80) :
+				    (st.ifm2.broadcast == 8);
+			use_ifm2 = !(use_scale || (param == 5) ||
 				(param == 6) || (param == 7) || (param == 0x24));
 			use_ifm = st.ifm.broadcast != 8;
 			ret = calc_sizes_elemwise(ddev, info, cmd, &st, use_ifm, use_ifm2);

From 021f1b77f70d62351bba67b050b8d784641d817f Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 18 Feb 2026 16:21:57 -0600
Subject: [PATCH 38/38] accel: ethosu: Handle possible underflow in IFM size
 calculations

If the command stream has larger padding sizes than the IFM and OFM
diminsions, then the calculations will underflow to a negative value.
The result is a very large region bounds which is caught on submit, but
it's better to catch it earlier.

Current mesa ethosu driver has a signedness bug which resulted in
padding of 127 (the max) and triggers this issue.

Reviewed-and-Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://patch.msgid.link/20260218-ethos-fixes-v1-3-be3fa3ea9a30@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/accel/ethosu/ethosu_gem.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c
index ff5e58944138..7994e7073903 100644
--- a/drivers/accel/ethosu/ethosu_gem.c
+++ b/drivers/accel/ethosu/ethosu_gem.c
@@ -245,11 +245,14 @@ static int calc_sizes(struct drm_device *ddev,
 			((st->ifm.stride_kernel >> 1) & 0x1) + 1;
 		u32 stride_x = ((st->ifm.stride_kernel >> 5) & 0x2) +
 			(st->ifm.stride_kernel & 0x1) + 1;
-		u32 ifm_height = st->ofm.height[2] * stride_y +
+		s32 ifm_height = st->ofm.height[2] * stride_y +
 			st->ifm.height[2] - (st->ifm.pad_top + st->ifm.pad_bottom);
-		u32 ifm_width  = st->ofm.width * stride_x +
+		s32 ifm_width = st->ofm.width * stride_x +
 			st->ifm.width - (st->ifm.pad_left + st->ifm.pad_right);
 
+		if (ifm_height < 0 || ifm_width < 0)
+			return -EINVAL;
+
 		len = feat_matrix_length(info, &st->ifm, ifm_width,
 					 ifm_height, st->ifm.depth);
 		dev_dbg(ddev->dev, "op %d: IFM:%d:0x%llx-0x%llx\n",