From 43d37df67f7770d8d261fdcb64ecc8c314e91303 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 14:30:59 -0800 Subject: [PATCH] drm/xe/wa: Steer RMW of MCR registers while building default LRC When generating the default LRC, if a register is not masked, we apply any save-restore programming necessary via a read-modify-write sequence that will ensure we only update the relevant bits/fields without clobbering the rest of the register. However some of the registers that need to be updated might be MCR registers which require steering to a non-terminated instance to ensure we can read back a valid, non-zero value. The steering of reads originating from a command streamer is controlled by register CS_MMIO_GROUP_INSTANCE_SELECT. Emit additional MI_LRI commands to update the steering before any RMW of an MCR register to ensure the reads are performed properly. Note that needing to perform a RMW of an MCR register while building the default LRC is pretty rare. Most of the MCR registers that are part of an engine's LRCs are also masked registers, so no MCR is necessary. Fixes: f2f90989ccff ("drm/xe: Avoid reading RMW registers in emit_wa_job") Cc: Michal Wajdeczko Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260206223058.387014-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit 6c2e331c915ba9e774aa847921262805feb00863) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 6 +++ drivers/gpu/drm/xe/xe_gt.c | 66 +++++++++++++++++++----- 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 68172b0248a6..dc5a4fafa70c 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -96,6 +96,12 @@ #define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13) #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) + +#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc) +#define SELECTIVE_READ_ADDRESSING REG_BIT(30) +#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23) +#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16) + /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption. diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9d090d0f2438..df6d04704823 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return ret; } +/* Dwords required to emit a RMW of a register */ +#define EMIT_RMW_DW 20 + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { - struct xe_reg_sr *sr = &q->hwe->reg_lrc; + struct xe_hw_engine *hwe = q->hwe; + struct xe_reg_sr *sr = &hwe->reg_lrc; struct xe_reg_sr_entry *entry; - int count_rmw = 0, count = 0, ret; + int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret; unsigned long idx; struct xe_bb *bb; size_t bb_len = 0; @@ -224,6 +228,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) ++count; + else if (entry->reg.mcr) + ++count_rmw_mcr; else ++count_rmw; } @@ -231,17 +237,35 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (count) bb_len += count * 2 + 1; - if (count_rmw) - bb_len += count_rmw * 20 + 7; + /* + * RMW of MCR registers is the same as a normal RMW, except an + * additional LRI (3 dwords) is required per register to steer the read + * to a nom-terminated instance. + * + * We could probably shorten the batch slightly by eliding the + * steering for consecutive MCR registers that have the same + * group/instance target, but it's not worth the extra complexity to do + * so. + */ + bb_len += count_rmw * EMIT_RMW_DW; + bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3); - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * After doing all RMW, we need 7 trailing dwords to clean up, + * plus an additional 3 dwords to reset steering if any of the + * registers were MCR. + */ + if (count_rmw || count_rmw_mcr) + bb_len += 7 + (count_rmw_mcr ? 3 : 0); + + if (hwe->class == XE_ENGINE_CLASS_RENDER) /* * Big enough to emit all of the context's 3DSTATE via * xe_lrc_emit_hwe_state_instructions() */ - bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32); - xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len); bb = xe_bb_new(gt, bb_len, false); if (IS_ERR(bb)) @@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } - if (count_rmw) { - /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ - + if (count_rmw || count_rmw_mcr) { xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; + if (entry->reg.mcr) { + struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw }; + u8 group, instance; + + xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr; + *cs++ = SELECTIVE_READ_ADDRESSING | + REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) | + REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance); + } + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; *cs++ = entry->reg.addr; *cs++ = CS_GPR_REG(0, 0).addr; @@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = CS_GPR_REG(0, 0).addr; *cs++ = entry->reg.addr; - xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", - entry->reg.addr, entry->clr_bits, entry->set_bits); + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n", + entry->reg.addr, entry->clr_bits, entry->set_bits, + entry->reg.mcr ? " (MCR)" : ""); } /* reset used GPR */ @@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = 0; *cs++ = CS_GPR_REG(0, 2).addr; *cs++ = 0; + + /* reset steering */ + if (count_rmw_mcr) { + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr; + *cs++ = 0; + } } cs = xe_lrc_emit_hwe_state_instructions(q, cs);