dmaengine: idxd: Add Max SGL Size Support for DSA3.0

Certain DSA 3.0 opcodes, such as Gather copy and Gather reduce, require max
SGL configured for workqueues prior to supporting these opcodes.

Configure the maximum scatter-gather list (SGL) size for workqueues during
setup on the supported HW. Application can then properly handle the SGL
size without explicitly setting it.

Signed-off-by: Yi Sun <yi.sun@intel.com>
Co-developed-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Yi Lai <yi1.lai@intel.com>
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Link: https://patch.msgid.link/20260107-idxd-yi-sun-dsa3-sgl-size-v2-2-dbef8f559e48@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
This commit is contained in:
Yi Sun 2026-01-07 16:02:23 -08:00 committed by Vinod Koul
parent 8308510b93
commit fe7b87d908
4 changed files with 28 additions and 1 deletions

View file

@ -390,6 +390,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
memset(wq->name, 0, WQ_NAME_SIZE);
wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
idxd_wq_set_init_max_sgl_size(idxd, wq);
if (wq->opcap_bmap)
bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
}
@ -989,6 +990,8 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
/* bytes 12-15 */
wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes);
idxd_wqcfg_set_max_batch_shift(idxd->data->type, wq->wqcfg, ilog2(wq->max_batch_size));
if (idxd_sgl_supported(idxd))
wq->wqcfg->max_sgl_shift = ilog2(wq->max_sgl_size);
/* bytes 32-63 */
if (idxd->hw.wq_cap.op_config && wq->opcap_bmap) {
@ -1167,6 +1170,8 @@ static int idxd_wq_load_config(struct idxd_wq *wq)
wq->max_xfer_bytes = 1ULL << wq->wqcfg->max_xfer_shift;
idxd_wq_set_max_batch_size(idxd->data->type, wq, 1U << wq->wqcfg->max_batch_shift);
if (idxd_sgl_supported(idxd))
wq->max_sgl_size = 1U << wq->wqcfg->max_sgl_shift;
for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i);

View file

@ -227,6 +227,7 @@ struct idxd_wq {
char name[WQ_NAME_SIZE + 1];
u64 max_xfer_bytes;
u32 max_batch_size;
u32 max_sgl_size;
/* Lock to protect upasid_xa access. */
struct mutex uc_lock;
@ -348,6 +349,7 @@ struct idxd_device {
u64 max_xfer_bytes;
u32 max_batch_size;
u32 max_sgl_size;
int max_groups;
int max_engines;
int max_rdbufs;
@ -692,6 +694,20 @@ static inline void idxd_wq_set_max_batch_size(int idxd_type, struct idxd_wq *wq,
wq->max_batch_size = max_batch_size;
}
static bool idxd_sgl_supported(struct idxd_device *idxd)
{
return idxd->data->type == IDXD_TYPE_DSA &&
idxd->hw.version >= DEVICE_VERSION_3 &&
idxd->hw.dsacap0.sgl_formats;
}
static inline void idxd_wq_set_init_max_sgl_size(struct idxd_device *idxd,
struct idxd_wq *wq)
{
if (idxd_sgl_supported(idxd))
wq->max_sgl_size = 1U << idxd->hw.dsacap0.max_sgl_shift;
}
static inline void idxd_wqcfg_set_max_batch_shift(int idxd_type, union wqcfg *wqcfg,
u32 max_batch_shift)
{

View file

@ -222,6 +222,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
init_completion(&wq->wq_resurrect);
wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
idxd_wq_set_init_max_sgl_size(idxd, wq);
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
if (!wq->wqcfg) {
@ -590,6 +591,10 @@ static void idxd_read_caps(struct idxd_device *idxd)
idxd->hw.dsacap1.bits = ioread64(idxd->reg_base + IDXD_DSACAP1_OFFSET);
idxd->hw.dsacap2.bits = ioread64(idxd->reg_base + IDXD_DSACAP2_OFFSET);
}
if (idxd_sgl_supported(idxd)) {
idxd->max_sgl_size = 1U << idxd->hw.dsacap0.max_sgl_shift;
dev_dbg(dev, "max sgl size: %u\n", idxd->max_sgl_size);
}
/* read iaa cap */
if (idxd->data->type == IDXD_TYPE_IAX && idxd->hw.version >= DEVICE_VERSION_2)

View file

@ -390,7 +390,8 @@ union wqcfg {
/* bytes 12-15 */
u32 max_xfer_shift:5;
u32 max_batch_shift:4;
u32 rsvd4:23;
u32 max_sgl_shift:4;
u32 rsvd4:19;
/* bytes 16-19 */
u16 occupancy_inth;