mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
RDMA v7.0 merge window
Usual smallish cycle:
- Various code improvements in irdma, rtrs, qedr, ocrdma, irdma, rxe
- Small driver improvements and minor bug fixes to hns, mlx5, rxe, mana,
mlx5, irdma
- Robusness improvements in completion processing for EFA
- New query_port_speed() verb to move past limited IBA defined speed steps
- Support for SG_GAPS in rts and many other small improvements
- Rare list corruption fix in iwcm
- Better support different page sizes in rxe
- Device memory support for mana
- Direct bio vec to kernel MR for use by NFS-RDMA
- QP rate limiting for bnxt_re
- Remote triggerable NULL pointer crash in siw
- DMA-buf exporter support for RDMA mmaps like doorbells
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCaY44vgAKCRCFwuHvBreF
YfiZAP91cMZfogN7r1FMD75xDZu55dI3Jvy8OaixyRxlWLGPcQEAjritdL0o7fZp
YrD1OXNS/1XG//rPBVw7xj+54Aa8hAU=
=AVcu
-----END PGP SIGNATURE-----
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Usual smallish cycle. The NFS biovec work to push it down into RDMA
instead of indirecting through a scatterlist is pretty nice to see,
been talked about for a long time now.
- Various code improvements in irdma, rtrs, qedr, ocrdma, irdma, rxe
- Small driver improvements and minor bug fixes to hns, mlx5, rxe,
mana, mlx5, irdma
- Robusness improvements in completion processing for EFA
- New query_port_speed() verb to move past limited IBA defined speed
steps
- Support for SG_GAPS in rts and many other small improvements
- Rare list corruption fix in iwcm
- Better support different page sizes in rxe
- Device memory support for mana
- Direct bio vec to kernel MR for use by NFS-RDMA
- QP rate limiting for bnxt_re
- Remote triggerable NULL pointer crash in siw
- DMA-buf exporter support for RDMA mmaps like doorbells"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (66 commits)
RDMA/mlx5: Implement DMABUF export ops
RDMA/uverbs: Add DMABUF object type and operations
RDMA/uverbs: Support external FD uobjects
RDMA/siw: Fix potential NULL pointer dereference in header processing
RDMA/umad: Reject negative data_len in ib_umad_write
IB/core: Extend rate limit support for RC QPs
RDMA/mlx5: Support rate limit only for Raw Packet QP
RDMA/bnxt_re: Report QP rate limit in debugfs
RDMA/bnxt_re: Report packet pacing capabilities when querying device
RDMA/bnxt_re: Add support for QP rate limiting
MAINTAINERS: Drop RDMA files from Hyper-V section
RDMA/uverbs: Add __GFP_NOWARN to ib_uverbs_unmarshall_recv() kmalloc
svcrdma: use bvec-based RDMA read/write API
RDMA/core: add rdma_rw_max_sge() helper for SQ sizing
RDMA/core: add MR support for bvec-based RDMA operations
RDMA/core: use IOVA-based DMA mapping for bvec RDMA operations
RDMA/core: add bio_vec based RDMA read/write API
RDMA/irdma: Use kvzalloc for paged memory DMA address array
RDMA/rxe: Fix race condition in QP timer handlers
RDMA/mana_ib: Add device‑memory support
...
This commit is contained in:
commit
311aa68319
77 changed files with 2648 additions and 722 deletions
|
|
@ -11842,7 +11842,6 @@ F: arch/x86/kernel/cpu/mshyperv.c
|
|||
F: drivers/clocksource/hyperv_timer.c
|
||||
F: drivers/hid/hid-hyperv.c
|
||||
F: drivers/hv/
|
||||
F: drivers/infiniband/hw/mana/
|
||||
F: drivers/input/serio/hyperv-keyboard.c
|
||||
F: drivers/iommu/hyperv-iommu.c
|
||||
F: drivers/net/ethernet/microsoft/
|
||||
|
|
@ -11861,7 +11860,6 @@ F: include/hyperv/hvhdk_mini.h
|
|||
F: include/linux/hyperv.h
|
||||
F: include/net/mana
|
||||
F: include/uapi/linux/hyperv.h
|
||||
F: include/uapi/rdma/mana-abi.h
|
||||
F: net/vmw_vsock/hyperv_transport.c
|
||||
F: tools/hv/
|
||||
|
||||
|
|
@ -17468,6 +17466,7 @@ MICROSOFT MANA RDMA DRIVER
|
|||
M: Long Li <longli@microsoft.com>
|
||||
M: Konstantin Taranov <kotaranov@microsoft.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
L: linux-hyperv@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/infiniband/hw/mana/
|
||||
F: include/net/mana
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ ib_umad-y := user_mad.o
|
|||
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
|
||||
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
|
||||
uverbs_std_types_cq.o \
|
||||
uverbs_std_types_dmabuf.o \
|
||||
uverbs_std_types_dmah.o \
|
||||
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
|
||||
uverbs_std_types_mr.o uverbs_std_types_counters.o \
|
||||
|
|
|
|||
|
|
@ -1537,7 +1537,8 @@ static void ib_cache_event_task(struct work_struct *_work)
|
|||
* the cache.
|
||||
*/
|
||||
ret = ib_cache_update(work->event.device, work->event.element.port_num,
|
||||
work->event.event == IB_EVENT_GID_CHANGE,
|
||||
work->event.event == IB_EVENT_GID_CHANGE ||
|
||||
work->event.event == IB_EVENT_CLIENT_REREGISTER,
|
||||
work->event.event == IB_EVENT_PKEY_CHANGE,
|
||||
work->enforce_security);
|
||||
|
||||
|
|
|
|||
|
|
@ -361,34 +361,6 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_device_get_by_name - Find an IB device by name
|
||||
* @name: The name to look for
|
||||
* @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
|
||||
*
|
||||
* Find and hold an ib_device by its name. The caller must call
|
||||
* ib_device_put() on the returned pointer.
|
||||
*/
|
||||
struct ib_device *ib_device_get_by_name(const char *name,
|
||||
enum rdma_driver_id driver_id)
|
||||
{
|
||||
struct ib_device *device;
|
||||
|
||||
down_read(&devices_rwsem);
|
||||
device = __ib_device_get_by_name(name);
|
||||
if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
|
||||
device->ops.driver_id != driver_id)
|
||||
device = NULL;
|
||||
|
||||
if (device) {
|
||||
if (!ib_device_try_get(device))
|
||||
device = NULL;
|
||||
}
|
||||
up_read(&devices_rwsem);
|
||||
return device;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_device_get_by_name);
|
||||
|
||||
static int rename_compat_devs(struct ib_device *device)
|
||||
{
|
||||
struct ib_core_device *cdev;
|
||||
|
|
@ -2793,6 +2765,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
|
|||
SET_DEVICE_OP(dev_ops, map_mr_sg);
|
||||
SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
|
||||
SET_DEVICE_OP(dev_ops, mmap);
|
||||
SET_DEVICE_OP(dev_ops, mmap_get_pfns);
|
||||
SET_DEVICE_OP(dev_ops, mmap_free);
|
||||
SET_DEVICE_OP(dev_ops, modify_ah);
|
||||
SET_DEVICE_OP(dev_ops, modify_cq);
|
||||
|
|
@ -2803,6 +2776,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
|
|||
SET_DEVICE_OP(dev_ops, modify_srq);
|
||||
SET_DEVICE_OP(dev_ops, modify_wq);
|
||||
SET_DEVICE_OP(dev_ops, peek_cq);
|
||||
SET_DEVICE_OP(dev_ops, pgoff_to_mmap_entry);
|
||||
SET_DEVICE_OP(dev_ops, pre_destroy_cq);
|
||||
SET_DEVICE_OP(dev_ops, poll_cq);
|
||||
SET_DEVICE_OP(dev_ops, port_groups);
|
||||
|
|
@ -2816,6 +2790,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
|
|||
SET_DEVICE_OP(dev_ops, query_gid);
|
||||
SET_DEVICE_OP(dev_ops, query_pkey);
|
||||
SET_DEVICE_OP(dev_ops, query_port);
|
||||
SET_DEVICE_OP(dev_ops, query_port_speed);
|
||||
SET_DEVICE_OP(dev_ops, query_qp);
|
||||
SET_DEVICE_OP(dev_ops, query_srq);
|
||||
SET_DEVICE_OP(dev_ops, query_ucontext);
|
||||
|
|
@ -2875,7 +2850,6 @@ int ib_add_sub_device(struct ib_device *parent,
|
|||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_add_sub_device);
|
||||
|
||||
int ib_del_sub_device_and_put(struct ib_device *sub)
|
||||
{
|
||||
|
|
@ -2896,7 +2870,6 @@ int ib_del_sub_device_and_put(struct ib_device *sub)
|
|||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_del_sub_device_and_put);
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_VIRT_DMA
|
||||
int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
|
||||
|
|
|
|||
|
|
@ -5,9 +5,13 @@
|
|||
* Copyright 2019 Marvell. All rights reserved.
|
||||
*/
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-resv.h>
|
||||
#include "uverbs.h"
|
||||
#include "core_priv.h"
|
||||
|
||||
MODULE_IMPORT_NS("DMA_BUF");
|
||||
|
||||
/**
|
||||
* rdma_umap_priv_init() - Initialize the private data of a vma
|
||||
*
|
||||
|
|
@ -229,12 +233,29 @@ EXPORT_SYMBOL(rdma_user_mmap_entry_put);
|
|||
*/
|
||||
void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
|
||||
{
|
||||
struct ib_uverbs_dmabuf_file *uverbs_dmabuf, *tmp;
|
||||
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
mutex_lock(&entry->dmabufs_lock);
|
||||
xa_lock(&entry->ucontext->mmap_xa);
|
||||
entry->driver_removed = true;
|
||||
xa_unlock(&entry->ucontext->mmap_xa);
|
||||
list_for_each_entry_safe(uverbs_dmabuf, tmp, &entry->dmabufs, dmabufs_elm) {
|
||||
dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
|
||||
list_del(&uverbs_dmabuf->dmabufs_elm);
|
||||
uverbs_dmabuf->revoked = true;
|
||||
dma_buf_move_notify(uverbs_dmabuf->dmabuf);
|
||||
dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
|
||||
DMA_RESV_USAGE_BOOKKEEP, false,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
|
||||
kref_put(&uverbs_dmabuf->kref, ib_uverbs_dmabuf_done);
|
||||
wait_for_completion(&uverbs_dmabuf->comp);
|
||||
}
|
||||
mutex_unlock(&entry->dmabufs_lock);
|
||||
|
||||
kref_put(&entry->ref, rdma_user_mmap_entry_free);
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
|
||||
|
|
@ -274,6 +295,9 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
|
|||
return -EINVAL;
|
||||
|
||||
kref_init(&entry->ref);
|
||||
INIT_LIST_HEAD(&entry->dmabufs);
|
||||
mutex_init(&entry->dmabufs_lock);
|
||||
|
||||
entry->ucontext = ucontext;
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -95,7 +95,6 @@ static struct workqueue_struct *iwcm_wq;
|
|||
struct iwcm_work {
|
||||
struct work_struct work;
|
||||
struct iwcm_id_private *cm_id;
|
||||
struct list_head list;
|
||||
struct iw_cm_event event;
|
||||
struct list_head free_list;
|
||||
};
|
||||
|
|
@ -178,7 +177,6 @@ static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
|
|||
return -ENOMEM;
|
||||
}
|
||||
work->cm_id = cm_id_priv;
|
||||
INIT_LIST_HEAD(&work->list);
|
||||
put_work(work);
|
||||
}
|
||||
return 0;
|
||||
|
|
@ -213,7 +211,6 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
|
|||
static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
|
||||
{
|
||||
if (refcount_dec_and_test(&cm_id_priv->refcount)) {
|
||||
BUG_ON(!list_empty(&cm_id_priv->work_list));
|
||||
free_cm_id(cm_id_priv);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -260,7 +257,6 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
|
|||
refcount_set(&cm_id_priv->refcount, 1);
|
||||
init_waitqueue_head(&cm_id_priv->connect_wait);
|
||||
init_completion(&cm_id_priv->destroy_comp);
|
||||
INIT_LIST_HEAD(&cm_id_priv->work_list);
|
||||
INIT_LIST_HEAD(&cm_id_priv->work_free_list);
|
||||
|
||||
return &cm_id_priv->id;
|
||||
|
|
@ -1007,13 +1003,13 @@ static int process_event(struct iwcm_id_private *cm_id_priv,
|
|||
}
|
||||
|
||||
/*
|
||||
* Process events on the work_list for the cm_id. If the callback
|
||||
* function requests that the cm_id be deleted, a flag is set in the
|
||||
* cm_id flags to indicate that when the last reference is
|
||||
* removed, the cm_id is to be destroyed. This is necessary to
|
||||
* distinguish between an object that will be destroyed by the app
|
||||
* thread asleep on the destroy_comp list vs. an object destroyed
|
||||
* here synchronously when the last reference is removed.
|
||||
* Process events for the cm_id. If the callback function requests
|
||||
* that the cm_id be deleted, a flag is set in the cm_id flags to
|
||||
* indicate that when the last reference is removed, the cm_id is
|
||||
* to be destroyed. This is necessary to distinguish between an
|
||||
* object that will be destroyed by the app thread asleep on the
|
||||
* destroy_comp list vs. an object destroyed here synchronously
|
||||
* when the last reference is removed.
|
||||
*/
|
||||
static void cm_work_handler(struct work_struct *_work)
|
||||
{
|
||||
|
|
@ -1024,35 +1020,26 @@ static void cm_work_handler(struct work_struct *_work)
|
|||
int ret = 0;
|
||||
|
||||
spin_lock_irqsave(&cm_id_priv->lock, flags);
|
||||
while (!list_empty(&cm_id_priv->work_list)) {
|
||||
work = list_first_entry(&cm_id_priv->work_list,
|
||||
struct iwcm_work, list);
|
||||
list_del_init(&work->list);
|
||||
levent = work->event;
|
||||
put_work(work);
|
||||
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
|
||||
|
||||
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
|
||||
ret = process_event(cm_id_priv, &levent);
|
||||
if (ret) {
|
||||
destroy_cm_id(&cm_id_priv->id);
|
||||
WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
|
||||
}
|
||||
} else
|
||||
pr_debug("dropping event %d\n", levent.event);
|
||||
if (iwcm_deref_id(cm_id_priv))
|
||||
return;
|
||||
spin_lock_irqsave(&cm_id_priv->lock, flags);
|
||||
}
|
||||
levent = work->event;
|
||||
put_work(work);
|
||||
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
|
||||
|
||||
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
|
||||
ret = process_event(cm_id_priv, &levent);
|
||||
if (ret) {
|
||||
destroy_cm_id(&cm_id_priv->id);
|
||||
WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
|
||||
}
|
||||
} else
|
||||
pr_debug("dropping event %d\n", levent.event);
|
||||
if (iwcm_deref_id(cm_id_priv))
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called on interrupt context. Schedule events on
|
||||
* the iwcm_wq thread to allow callback functions to downcall into
|
||||
* the CM and/or block. Events are queued to a per-CM_ID
|
||||
* work_list. If this is the first event on the work_list, the work
|
||||
* element is also queued on the iwcm_wq thread.
|
||||
* the CM and/or block.
|
||||
*
|
||||
* Each event holds a reference on the cm_id. Until the last posted
|
||||
* event has been delivered and processed, the cm_id cannot be
|
||||
|
|
@ -1094,7 +1081,6 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
|
|||
}
|
||||
|
||||
refcount_inc(&cm_id_priv->refcount);
|
||||
list_add_tail(&work->list, &cm_id_priv->work_list);
|
||||
queue_work(iwcm_wq, &work->work);
|
||||
out:
|
||||
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ struct iwcm_id_private {
|
|||
struct ib_qp *qp;
|
||||
struct completion destroy_comp;
|
||||
wait_queue_head_t connect_wait;
|
||||
struct list_head work_list;
|
||||
spinlock_t lock;
|
||||
refcount_t refcount;
|
||||
struct list_head work_free_list;
|
||||
|
|
|
|||
|
|
@ -465,7 +465,7 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
|
|||
|
||||
fd_type =
|
||||
container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
|
||||
if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release &&
|
||||
if (WARN_ON(fd_type->fops && fd_type->fops->release != &uverbs_uobject_fd_release &&
|
||||
fd_type->fops->release != &uverbs_async_event_release)) {
|
||||
ret = ERR_PTR(-EINVAL);
|
||||
goto err_fd;
|
||||
|
|
@ -477,14 +477,16 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
|
|||
goto err_fd;
|
||||
}
|
||||
|
||||
/* Note that uverbs_uobject_fd_release() is called during abort */
|
||||
filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
|
||||
fd_type->flags);
|
||||
if (IS_ERR(filp)) {
|
||||
ret = ERR_CAST(filp);
|
||||
goto err_getfile;
|
||||
if (fd_type->fops) {
|
||||
/* Note that uverbs_uobject_fd_release() is called during abort */
|
||||
filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
|
||||
fd_type->flags);
|
||||
if (IS_ERR(filp)) {
|
||||
ret = ERR_CAST(filp);
|
||||
goto err_getfile;
|
||||
}
|
||||
uobj->object = filp;
|
||||
}
|
||||
uobj->object = filp;
|
||||
|
||||
uobj->id = new_fd;
|
||||
return uobj;
|
||||
|
|
@ -561,7 +563,9 @@ static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
|
|||
{
|
||||
struct file *filp = uobj->object;
|
||||
|
||||
fput(filp);
|
||||
if (filp)
|
||||
fput(filp);
|
||||
|
||||
put_unused_fd(uobj->id);
|
||||
}
|
||||
|
||||
|
|
@ -628,11 +632,14 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
|
|||
/* This shouldn't be used anymore. Use the file object instead */
|
||||
uobj->id = 0;
|
||||
|
||||
/*
|
||||
* NOTE: Once we install the file we loose ownership of our kref on
|
||||
* uobj. It will be put by uverbs_uobject_fd_release()
|
||||
*/
|
||||
filp->private_data = uobj;
|
||||
if (!filp->private_data) {
|
||||
/*
|
||||
* NOTE: Once we install the file we loose ownership of our kref on
|
||||
* uobj. It will be put by uverbs_uobject_fd_release()
|
||||
*/
|
||||
filp->private_data = uobj;
|
||||
}
|
||||
|
||||
fd_install(fd, filp);
|
||||
}
|
||||
|
||||
|
|
@ -802,21 +809,10 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
|
|||
};
|
||||
EXPORT_SYMBOL(uverbs_idr_class);
|
||||
|
||||
/*
|
||||
* Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
|
||||
* file_operations release method.
|
||||
*/
|
||||
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
|
||||
int uverbs_uobject_release(struct ib_uobject *uobj)
|
||||
{
|
||||
struct ib_uverbs_file *ufile;
|
||||
struct ib_uobject *uobj;
|
||||
|
||||
/*
|
||||
* This can only happen if the fput came from alloc_abort_fd_uobject()
|
||||
*/
|
||||
if (!filp->private_data)
|
||||
return 0;
|
||||
uobj = filp->private_data;
|
||||
ufile = uobj->ufile;
|
||||
|
||||
if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
|
||||
|
|
@ -843,6 +839,21 @@ int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
|
|||
uverbs_uobject_put(uobj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
|
||||
* file_operations release method.
|
||||
*/
|
||||
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
/*
|
||||
* This can only happen if the fput came from alloc_abort_fd_uobject()
|
||||
*/
|
||||
if (!filp->private_data)
|
||||
return 0;
|
||||
|
||||
return uverbs_uobject_release(filp->private_data);
|
||||
}
|
||||
EXPORT_SYMBOL(uverbs_uobject_fd_release);
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -156,6 +156,7 @@ extern const struct uapi_definition uverbs_def_obj_counters[];
|
|||
extern const struct uapi_definition uverbs_def_obj_cq[];
|
||||
extern const struct uapi_definition uverbs_def_obj_device[];
|
||||
extern const struct uapi_definition uverbs_def_obj_dm[];
|
||||
extern const struct uapi_definition uverbs_def_obj_dmabuf[];
|
||||
extern const struct uapi_definition uverbs_def_obj_dmah[];
|
||||
extern const struct uapi_definition uverbs_def_obj_flow_action[];
|
||||
extern const struct uapi_definition uverbs_def_obj_intf[];
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ enum {
|
|||
RDMA_RW_MULTI_WR,
|
||||
RDMA_RW_MR,
|
||||
RDMA_RW_SIG_MR,
|
||||
RDMA_RW_IOVA,
|
||||
};
|
||||
|
||||
static bool rdma_rw_force_mr;
|
||||
|
|
@ -121,6 +122,36 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u32 port_num,
|
|||
return count;
|
||||
}
|
||||
|
||||
static int rdma_rw_init_reg_wr(struct rdma_rw_reg_ctx *reg,
|
||||
struct rdma_rw_reg_ctx *prev, struct ib_qp *qp, u32 port_num,
|
||||
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
|
||||
{
|
||||
if (prev) {
|
||||
if (reg->mr->need_inval)
|
||||
prev->wr.wr.next = ®->inv_wr;
|
||||
else
|
||||
prev->wr.wr.next = ®->reg_wr.wr;
|
||||
}
|
||||
|
||||
reg->reg_wr.wr.next = ®->wr.wr;
|
||||
|
||||
reg->wr.wr.sg_list = ®->sge;
|
||||
reg->wr.wr.num_sge = 1;
|
||||
reg->wr.remote_addr = remote_addr;
|
||||
reg->wr.rkey = rkey;
|
||||
|
||||
if (dir == DMA_TO_DEVICE) {
|
||||
reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
|
||||
} else if (!rdma_cap_read_inv(qp->device, port_num)) {
|
||||
reg->wr.wr.opcode = IB_WR_RDMA_READ;
|
||||
} else {
|
||||
reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
|
||||
reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
|
||||
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
|
||||
|
|
@ -146,30 +177,8 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
|||
if (ret < 0)
|
||||
goto out_free;
|
||||
count += ret;
|
||||
|
||||
if (prev) {
|
||||
if (reg->mr->need_inval)
|
||||
prev->wr.wr.next = ®->inv_wr;
|
||||
else
|
||||
prev->wr.wr.next = ®->reg_wr.wr;
|
||||
}
|
||||
|
||||
reg->reg_wr.wr.next = ®->wr.wr;
|
||||
|
||||
reg->wr.wr.sg_list = ®->sge;
|
||||
reg->wr.wr.num_sge = 1;
|
||||
reg->wr.remote_addr = remote_addr;
|
||||
reg->wr.rkey = rkey;
|
||||
if (dir == DMA_TO_DEVICE) {
|
||||
reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
|
||||
} else if (!rdma_cap_read_inv(qp->device, port_num)) {
|
||||
reg->wr.wr.opcode = IB_WR_RDMA_READ;
|
||||
} else {
|
||||
reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
|
||||
reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
|
||||
}
|
||||
count++;
|
||||
|
||||
count += rdma_rw_init_reg_wr(reg, prev, qp, port_num,
|
||||
remote_addr, rkey, dir);
|
||||
remote_addr += reg->sge.length;
|
||||
sg_cnt -= nents;
|
||||
for (j = 0; j < nents; j++)
|
||||
|
|
@ -192,6 +201,92 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int rdma_rw_init_mr_wrs_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec,
|
||||
struct bvec_iter *iter, u64 remote_addr, u32 rkey,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_device *dev = qp->pd->device;
|
||||
struct rdma_rw_reg_ctx *prev = NULL;
|
||||
u32 pages_per_mr = rdma_rw_fr_page_list_len(dev, qp->integrity_en);
|
||||
struct scatterlist *sg;
|
||||
int i, ret, count = 0;
|
||||
u32 nents = 0;
|
||||
|
||||
ctx->reg = kcalloc(DIV_ROUND_UP(nr_bvec, pages_per_mr),
|
||||
sizeof(*ctx->reg), GFP_KERNEL);
|
||||
if (!ctx->reg)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Build scatterlist from bvecs using the iterator. This follows
|
||||
* the pattern from __blk_rq_map_sg.
|
||||
*/
|
||||
ctx->reg[0].sgt.sgl = kmalloc_array(nr_bvec,
|
||||
sizeof(*ctx->reg[0].sgt.sgl),
|
||||
GFP_KERNEL);
|
||||
if (!ctx->reg[0].sgt.sgl) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_reg;
|
||||
}
|
||||
sg_init_table(ctx->reg[0].sgt.sgl, nr_bvec);
|
||||
|
||||
for (sg = ctx->reg[0].sgt.sgl; iter->bi_size; sg = sg_next(sg)) {
|
||||
struct bio_vec bv = mp_bvec_iter_bvec(bvecs, *iter);
|
||||
|
||||
if (nents >= nr_bvec) {
|
||||
ret = -EINVAL;
|
||||
goto out_free_sgl;
|
||||
}
|
||||
sg_set_page(sg, bv.bv_page, bv.bv_len, bv.bv_offset);
|
||||
bvec_iter_advance(bvecs, iter, bv.bv_len);
|
||||
nents++;
|
||||
}
|
||||
sg_mark_end(sg_last(ctx->reg[0].sgt.sgl, nents));
|
||||
ctx->reg[0].sgt.orig_nents = nents;
|
||||
|
||||
/* DMA map the scatterlist */
|
||||
ret = ib_dma_map_sgtable_attrs(dev, &ctx->reg[0].sgt, dir, 0);
|
||||
if (ret)
|
||||
goto out_free_sgl;
|
||||
|
||||
ctx->nr_ops = DIV_ROUND_UP(ctx->reg[0].sgt.nents, pages_per_mr);
|
||||
|
||||
sg = ctx->reg[0].sgt.sgl;
|
||||
nents = ctx->reg[0].sgt.nents;
|
||||
for (i = 0; i < ctx->nr_ops; i++) {
|
||||
struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
|
||||
u32 sge_cnt = min(nents, pages_per_mr);
|
||||
|
||||
ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sge_cnt, 0);
|
||||
if (ret < 0)
|
||||
goto out_free_mrs;
|
||||
count += ret;
|
||||
count += rdma_rw_init_reg_wr(reg, prev, qp, port_num,
|
||||
remote_addr, rkey, dir);
|
||||
remote_addr += reg->sge.length;
|
||||
nents -= sge_cnt;
|
||||
sg += sge_cnt;
|
||||
prev = reg;
|
||||
}
|
||||
|
||||
if (prev)
|
||||
prev->wr.wr.next = NULL;
|
||||
|
||||
ctx->type = RDMA_RW_MR;
|
||||
return count;
|
||||
|
||||
out_free_mrs:
|
||||
while (--i >= 0)
|
||||
ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
|
||||
ib_dma_unmap_sgtable_attrs(dev, &ctx->reg[0].sgt, dir, 0);
|
||||
out_free_sgl:
|
||||
kfree(ctx->reg[0].sgt.sgl);
|
||||
out_free_reg:
|
||||
kfree(ctx->reg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
struct scatterlist *sg, u32 sg_cnt, u32 offset,
|
||||
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
|
||||
|
|
@ -274,6 +369,196 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int rdma_rw_init_single_wr_bvec(struct rdma_rw_ctx *ctx,
|
||||
struct ib_qp *qp, const struct bio_vec *bvecs,
|
||||
struct bvec_iter *iter, u64 remote_addr, u32 rkey,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_device *dev = qp->pd->device;
|
||||
struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
|
||||
struct bio_vec bv = mp_bvec_iter_bvec(bvecs, *iter);
|
||||
u64 dma_addr;
|
||||
|
||||
ctx->nr_ops = 1;
|
||||
|
||||
dma_addr = ib_dma_map_bvec(dev, &bv, dir);
|
||||
if (ib_dma_mapping_error(dev, dma_addr))
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->single.sge.lkey = qp->pd->local_dma_lkey;
|
||||
ctx->single.sge.addr = dma_addr;
|
||||
ctx->single.sge.length = bv.bv_len;
|
||||
|
||||
memset(rdma_wr, 0, sizeof(*rdma_wr));
|
||||
if (dir == DMA_TO_DEVICE)
|
||||
rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
|
||||
else
|
||||
rdma_wr->wr.opcode = IB_WR_RDMA_READ;
|
||||
rdma_wr->wr.sg_list = &ctx->single.sge;
|
||||
rdma_wr->wr.num_sge = 1;
|
||||
rdma_wr->remote_addr = remote_addr;
|
||||
rdma_wr->rkey = rkey;
|
||||
|
||||
ctx->type = RDMA_RW_SINGLE_WR;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int rdma_rw_init_map_wrs_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
const struct bio_vec *bvecs, u32 nr_bvec, struct bvec_iter *iter,
|
||||
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_device *dev = qp->pd->device;
|
||||
u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
|
||||
qp->max_read_sge;
|
||||
struct ib_sge *sge;
|
||||
u32 total_len = 0, i, j;
|
||||
u32 mapped_bvecs = 0;
|
||||
u32 nr_ops = DIV_ROUND_UP(nr_bvec, max_sge);
|
||||
size_t sges_size = array_size(nr_bvec, sizeof(*ctx->map.sges));
|
||||
size_t wrs_offset = ALIGN(sges_size, __alignof__(*ctx->map.wrs));
|
||||
size_t wrs_size = array_size(nr_ops, sizeof(*ctx->map.wrs));
|
||||
void *mem;
|
||||
|
||||
if (sges_size == SIZE_MAX || wrs_size == SIZE_MAX ||
|
||||
check_add_overflow(wrs_offset, wrs_size, &wrs_size))
|
||||
return -ENOMEM;
|
||||
|
||||
mem = kzalloc(wrs_size, GFP_KERNEL);
|
||||
if (!mem)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->map.sges = sge = mem;
|
||||
ctx->map.wrs = mem + wrs_offset;
|
||||
|
||||
for (i = 0; i < nr_ops; i++) {
|
||||
struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
|
||||
u32 nr_sge = min(nr_bvec - mapped_bvecs, max_sge);
|
||||
|
||||
if (dir == DMA_TO_DEVICE)
|
||||
rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
|
||||
else
|
||||
rdma_wr->wr.opcode = IB_WR_RDMA_READ;
|
||||
rdma_wr->remote_addr = remote_addr + total_len;
|
||||
rdma_wr->rkey = rkey;
|
||||
rdma_wr->wr.num_sge = nr_sge;
|
||||
rdma_wr->wr.sg_list = sge;
|
||||
|
||||
for (j = 0; j < nr_sge; j++) {
|
||||
struct bio_vec bv = mp_bvec_iter_bvec(bvecs, *iter);
|
||||
u64 dma_addr;
|
||||
|
||||
dma_addr = ib_dma_map_bvec(dev, &bv, dir);
|
||||
if (ib_dma_mapping_error(dev, dma_addr))
|
||||
goto out_unmap;
|
||||
|
||||
mapped_bvecs++;
|
||||
sge->addr = dma_addr;
|
||||
sge->length = bv.bv_len;
|
||||
sge->lkey = qp->pd->local_dma_lkey;
|
||||
|
||||
total_len += bv.bv_len;
|
||||
sge++;
|
||||
|
||||
bvec_iter_advance_single(bvecs, iter, bv.bv_len);
|
||||
}
|
||||
|
||||
rdma_wr->wr.next = i + 1 < nr_ops ?
|
||||
&ctx->map.wrs[i + 1].wr : NULL;
|
||||
}
|
||||
|
||||
ctx->nr_ops = nr_ops;
|
||||
ctx->type = RDMA_RW_MULTI_WR;
|
||||
return nr_ops;
|
||||
|
||||
out_unmap:
|
||||
for (i = 0; i < mapped_bvecs; i++)
|
||||
ib_dma_unmap_bvec(dev, ctx->map.sges[i].addr,
|
||||
ctx->map.sges[i].length, dir);
|
||||
kfree(ctx->map.sges);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to use the two-step IOVA API to map bvecs into a contiguous DMA range.
|
||||
* This reduces IOTLB sync overhead by doing one sync at the end instead of
|
||||
* one per bvec, and produces a contiguous DMA address range that can be
|
||||
* described by a single SGE.
|
||||
*
|
||||
* Returns the number of WQEs (always 1) on success, -EOPNOTSUPP if IOVA
|
||||
* mapping is not available, or another negative error code on failure.
|
||||
*/
|
||||
static int rdma_rw_init_iova_wrs_bvec(struct rdma_rw_ctx *ctx,
|
||||
struct ib_qp *qp, const struct bio_vec *bvec,
|
||||
struct bvec_iter *iter, u64 remote_addr, u32 rkey,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_device *dev = qp->pd->device;
|
||||
struct device *dma_dev = dev->dma_device;
|
||||
size_t total_len = iter->bi_size;
|
||||
struct bio_vec first_bv;
|
||||
size_t mapped_len = 0;
|
||||
int ret;
|
||||
|
||||
/* Virtual DMA devices cannot support IOVA allocators */
|
||||
if (ib_uses_virt_dma(dev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Try to allocate contiguous IOVA space */
|
||||
first_bv = mp_bvec_iter_bvec(bvec, *iter);
|
||||
if (!dma_iova_try_alloc(dma_dev, &ctx->iova.state,
|
||||
bvec_phys(&first_bv), total_len))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Link all bvecs into the IOVA space */
|
||||
while (iter->bi_size) {
|
||||
struct bio_vec bv = mp_bvec_iter_bvec(bvec, *iter);
|
||||
|
||||
ret = dma_iova_link(dma_dev, &ctx->iova.state, bvec_phys(&bv),
|
||||
mapped_len, bv.bv_len, dir, 0);
|
||||
if (ret)
|
||||
goto out_destroy;
|
||||
|
||||
mapped_len += bv.bv_len;
|
||||
bvec_iter_advance(bvec, iter, bv.bv_len);
|
||||
}
|
||||
|
||||
/* Sync the IOTLB once for all linked pages */
|
||||
ret = dma_iova_sync(dma_dev, &ctx->iova.state, 0, mapped_len);
|
||||
if (ret)
|
||||
goto out_destroy;
|
||||
|
||||
ctx->iova.mapped_len = mapped_len;
|
||||
|
||||
/* Single SGE covers the entire contiguous IOVA range */
|
||||
ctx->iova.sge.addr = ctx->iova.state.addr;
|
||||
ctx->iova.sge.length = mapped_len;
|
||||
ctx->iova.sge.lkey = qp->pd->local_dma_lkey;
|
||||
|
||||
/* Single WR for the whole transfer */
|
||||
memset(&ctx->iova.wr, 0, sizeof(ctx->iova.wr));
|
||||
if (dir == DMA_TO_DEVICE)
|
||||
ctx->iova.wr.wr.opcode = IB_WR_RDMA_WRITE;
|
||||
else
|
||||
ctx->iova.wr.wr.opcode = IB_WR_RDMA_READ;
|
||||
ctx->iova.wr.wr.num_sge = 1;
|
||||
ctx->iova.wr.wr.sg_list = &ctx->iova.sge;
|
||||
ctx->iova.wr.remote_addr = remote_addr;
|
||||
ctx->iova.wr.rkey = rkey;
|
||||
|
||||
ctx->type = RDMA_RW_IOVA;
|
||||
ctx->nr_ops = 1;
|
||||
return 1;
|
||||
|
||||
out_destroy:
|
||||
/*
|
||||
* dma_iova_destroy() expects the actual mapped length, not the
|
||||
* total allocation size. It unlinks only the successfully linked
|
||||
* range and frees the entire IOVA allocation.
|
||||
*/
|
||||
dma_iova_destroy(dma_dev, &ctx->iova.state, mapped_len, dir, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
|
||||
* @ctx: context to initialize
|
||||
|
|
@ -344,6 +629,79 @@ out_unmap_sg:
|
|||
}
|
||||
EXPORT_SYMBOL(rdma_rw_ctx_init);
|
||||
|
||||
/**
|
||||
* rdma_rw_ctx_init_bvec - initialize a RDMA READ/WRITE context from bio_vec
|
||||
* @ctx: context to initialize
|
||||
* @qp: queue pair to operate on
|
||||
* @port_num: port num to which the connection is bound
|
||||
* @bvecs: bio_vec array to READ/WRITE from/to
|
||||
* @nr_bvec: number of entries in @bvecs
|
||||
* @iter: bvec iterator describing offset and length
|
||||
* @remote_addr: remote address to read/write (relative to @rkey)
|
||||
* @rkey: remote key to operate on
|
||||
* @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
|
||||
*
|
||||
* Maps the bio_vec array directly, avoiding intermediate scatterlist
|
||||
* conversion. Supports MR registration for iWARP devices and force_mr mode.
|
||||
*
|
||||
* Returns the number of WQEs that will be needed on the workqueue if
|
||||
* successful, or a negative error code:
|
||||
*
|
||||
* * -EINVAL - @nr_bvec is zero or @iter.bi_size is zero
|
||||
* * -ENOMEM - DMA mapping or memory allocation failed
|
||||
*/
|
||||
int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec,
|
||||
struct bvec_iter iter, u64 remote_addr, u32 rkey,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_device *dev = qp->pd->device;
|
||||
int ret;
|
||||
|
||||
if (nr_bvec == 0 || iter.bi_size == 0)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* iWARP requires MR registration for all RDMA READs. The force_mr
|
||||
* debug option also mandates MR usage.
|
||||
*/
|
||||
if (dir == DMA_FROM_DEVICE && rdma_protocol_iwarp(dev, port_num))
|
||||
return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs,
|
||||
nr_bvec, &iter, remote_addr,
|
||||
rkey, dir);
|
||||
if (unlikely(rdma_rw_force_mr))
|
||||
return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs,
|
||||
nr_bvec, &iter, remote_addr,
|
||||
rkey, dir);
|
||||
|
||||
if (nr_bvec == 1)
|
||||
return rdma_rw_init_single_wr_bvec(ctx, qp, bvecs, &iter,
|
||||
remote_addr, rkey, dir);
|
||||
|
||||
/*
|
||||
* Try IOVA-based mapping first for multi-bvec transfers.
|
||||
* IOVA coalesces bvecs into a single DMA-contiguous region,
|
||||
* reducing the number of WRs needed and avoiding MR overhead.
|
||||
*/
|
||||
ret = rdma_rw_init_iova_wrs_bvec(ctx, qp, bvecs, &iter, remote_addr,
|
||||
rkey, dir);
|
||||
if (ret != -EOPNOTSUPP)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* IOVA mapping not available. Check if MR registration provides
|
||||
* better performance than multiple SGE entries.
|
||||
*/
|
||||
if (rdma_rw_io_needs_mr(dev, port_num, dir, nr_bvec))
|
||||
return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs,
|
||||
nr_bvec, &iter, remote_addr,
|
||||
rkey, dir);
|
||||
|
||||
return rdma_rw_init_map_wrs_bvec(ctx, qp, bvecs, nr_bvec, &iter,
|
||||
remote_addr, rkey, dir);
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_rw_ctx_init_bvec);
|
||||
|
||||
/**
|
||||
* rdma_rw_ctx_signature_init - initialize a RW context with signature offload
|
||||
* @ctx: context to initialize
|
||||
|
|
@ -515,6 +873,10 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
|||
first_wr = &ctx->reg[0].reg_wr.wr;
|
||||
last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
|
||||
break;
|
||||
case RDMA_RW_IOVA:
|
||||
first_wr = &ctx->iova.wr.wr;
|
||||
last_wr = &ctx->iova.wr.wr;
|
||||
break;
|
||||
case RDMA_RW_MULTI_WR:
|
||||
first_wr = &ctx->map.wrs[0].wr;
|
||||
last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
|
||||
|
|
@ -579,6 +941,8 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
|||
|
||||
switch (ctx->type) {
|
||||
case RDMA_RW_MR:
|
||||
/* Bvec MR contexts must use rdma_rw_ctx_destroy_bvec() */
|
||||
WARN_ON_ONCE(ctx->reg[0].sgt.sgl);
|
||||
for (i = 0; i < ctx->nr_ops; i++)
|
||||
ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
|
||||
kfree(ctx->reg);
|
||||
|
|
@ -589,6 +953,10 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
|||
break;
|
||||
case RDMA_RW_SINGLE_WR:
|
||||
break;
|
||||
case RDMA_RW_IOVA:
|
||||
/* IOVA contexts must use rdma_rw_ctx_destroy_bvec() */
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
default:
|
||||
BUG();
|
||||
break;
|
||||
|
|
@ -598,6 +966,58 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
|||
}
|
||||
EXPORT_SYMBOL(rdma_rw_ctx_destroy);
|
||||
|
||||
/**
|
||||
* rdma_rw_ctx_destroy_bvec - release resources from rdma_rw_ctx_init_bvec
|
||||
* @ctx: context to release
|
||||
* @qp: queue pair to operate on
|
||||
* @port_num: port num to which the connection is bound (unused)
|
||||
* @bvecs: bio_vec array that was used for the READ/WRITE (unused)
|
||||
* @nr_bvec: number of entries in @bvecs
|
||||
* @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
|
||||
*
|
||||
* Releases all resources allocated by a successful rdma_rw_ctx_init_bvec()
|
||||
* call. Must not be called if rdma_rw_ctx_init_bvec() returned an error.
|
||||
*
|
||||
* The @port_num and @bvecs parameters are unused but present for API
|
||||
* symmetry with rdma_rw_ctx_destroy().
|
||||
*/
|
||||
void rdma_rw_ctx_destroy_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
u32 __maybe_unused port_num,
|
||||
const struct bio_vec __maybe_unused *bvecs,
|
||||
u32 nr_bvec, enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_device *dev = qp->pd->device;
|
||||
u32 i;
|
||||
|
||||
switch (ctx->type) {
|
||||
case RDMA_RW_MR:
|
||||
for (i = 0; i < ctx->nr_ops; i++)
|
||||
ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
|
||||
ib_dma_unmap_sgtable_attrs(dev, &ctx->reg[0].sgt, dir, 0);
|
||||
kfree(ctx->reg[0].sgt.sgl);
|
||||
kfree(ctx->reg);
|
||||
break;
|
||||
case RDMA_RW_IOVA:
|
||||
dma_iova_destroy(dev->dma_device, &ctx->iova.state,
|
||||
ctx->iova.mapped_len, dir, 0);
|
||||
break;
|
||||
case RDMA_RW_MULTI_WR:
|
||||
for (i = 0; i < nr_bvec; i++)
|
||||
ib_dma_unmap_bvec(dev, ctx->map.sges[i].addr,
|
||||
ctx->map.sges[i].length, dir);
|
||||
kfree(ctx->map.sges);
|
||||
break;
|
||||
case RDMA_RW_SINGLE_WR:
|
||||
ib_dma_unmap_bvec(dev, ctx->single.sge.addr,
|
||||
ctx->single.sge.length, dir);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_rw_ctx_destroy_bvec);
|
||||
|
||||
/**
|
||||
* rdma_rw_ctx_destroy_signature - release all resources allocated by
|
||||
* rdma_rw_ctx_signature_init
|
||||
|
|
@ -651,34 +1071,57 @@ unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
|
|||
}
|
||||
EXPORT_SYMBOL(rdma_rw_mr_factor);
|
||||
|
||||
/**
|
||||
* rdma_rw_max_send_wr - compute max Send WRs needed for RDMA R/W contexts
|
||||
* @dev: RDMA device
|
||||
* @port_num: port number
|
||||
* @max_rdma_ctxs: number of rdma_rw_ctx structures
|
||||
* @create_flags: QP create flags (pass IB_QP_CREATE_INTEGRITY_EN if
|
||||
* data integrity will be enabled on the QP)
|
||||
*
|
||||
* Returns the total number of Send Queue entries needed for
|
||||
* @max_rdma_ctxs. The result accounts for memory registration and
|
||||
* invalidation work requests when the device requires them.
|
||||
*
|
||||
* ULPs use this to size Send Queues and Send CQs before creating a
|
||||
* Queue Pair.
|
||||
*/
|
||||
unsigned int rdma_rw_max_send_wr(struct ib_device *dev, u32 port_num,
|
||||
unsigned int max_rdma_ctxs, u32 create_flags)
|
||||
{
|
||||
unsigned int factor = 1;
|
||||
unsigned int result;
|
||||
|
||||
if (create_flags & IB_QP_CREATE_INTEGRITY_EN ||
|
||||
rdma_rw_can_use_mr(dev, port_num))
|
||||
factor += 2; /* reg + inv */
|
||||
|
||||
if (check_mul_overflow(factor, max_rdma_ctxs, &result))
|
||||
return UINT_MAX;
|
||||
return result;
|
||||
}
|
||||
EXPORT_SYMBOL(rdma_rw_max_send_wr);
|
||||
|
||||
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
|
||||
{
|
||||
u32 factor;
|
||||
unsigned int factor = 1;
|
||||
|
||||
WARN_ON_ONCE(attr->port_num == 0);
|
||||
|
||||
/*
|
||||
* Each context needs at least one RDMA READ or WRITE WR.
|
||||
*
|
||||
* For some hardware we might need more, eventually we should ask the
|
||||
* HCA driver for a multiplier here.
|
||||
*/
|
||||
factor = 1;
|
||||
|
||||
/*
|
||||
* If the device needs MRs to perform RDMA READ or WRITE operations,
|
||||
* we'll need two additional MRs for the registrations and the
|
||||
* invalidation.
|
||||
* If the device uses MRs to perform RDMA READ or WRITE operations,
|
||||
* or if data integrity is enabled, account for registration and
|
||||
* invalidation work requests.
|
||||
*/
|
||||
if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
|
||||
rdma_rw_can_use_mr(dev, attr->port_num))
|
||||
factor += 2; /* inv + reg */
|
||||
factor += 2; /* reg + inv */
|
||||
|
||||
attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
|
||||
|
||||
/*
|
||||
* But maybe we were just too high in the sky and the device doesn't
|
||||
* even support all we need, and we'll have to live with what we get..
|
||||
* The device might not support all we need, and we'll have to
|
||||
* live with what we get.
|
||||
*/
|
||||
attr->cap.max_send_wr =
|
||||
min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
|
||||
|
|
|
|||
|
|
@ -292,62 +292,22 @@ static ssize_t cap_mask_show(struct ib_device *ibdev, u32 port_num,
|
|||
static ssize_t rate_show(struct ib_device *ibdev, u32 port_num,
|
||||
struct ib_port_attribute *unused, char *buf)
|
||||
{
|
||||
struct ib_port_speed_info speed_info;
|
||||
struct ib_port_attr attr;
|
||||
char *speed = "";
|
||||
int rate; /* in deci-Gb/sec */
|
||||
ssize_t ret;
|
||||
|
||||
ret = ib_query_port(ibdev, port_num, &attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
switch (attr.active_speed) {
|
||||
case IB_SPEED_DDR:
|
||||
speed = " DDR";
|
||||
rate = 50;
|
||||
break;
|
||||
case IB_SPEED_QDR:
|
||||
speed = " QDR";
|
||||
rate = 100;
|
||||
break;
|
||||
case IB_SPEED_FDR10:
|
||||
speed = " FDR10";
|
||||
rate = 100;
|
||||
break;
|
||||
case IB_SPEED_FDR:
|
||||
speed = " FDR";
|
||||
rate = 140;
|
||||
break;
|
||||
case IB_SPEED_EDR:
|
||||
speed = " EDR";
|
||||
rate = 250;
|
||||
break;
|
||||
case IB_SPEED_HDR:
|
||||
speed = " HDR";
|
||||
rate = 500;
|
||||
break;
|
||||
case IB_SPEED_NDR:
|
||||
speed = " NDR";
|
||||
rate = 1000;
|
||||
break;
|
||||
case IB_SPEED_XDR:
|
||||
speed = " XDR";
|
||||
rate = 2000;
|
||||
break;
|
||||
case IB_SPEED_SDR:
|
||||
default: /* default to SDR for invalid rates */
|
||||
speed = " SDR";
|
||||
rate = 25;
|
||||
break;
|
||||
}
|
||||
ret = ib_port_attr_to_speed_info(&attr, &speed_info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
rate *= ib_width_enum_to_int(attr.active_width);
|
||||
if (rate < 0)
|
||||
return -EINVAL;
|
||||
|
||||
return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10,
|
||||
rate % 10 ? ".5" : "",
|
||||
ib_width_enum_to_int(attr.active_width), speed);
|
||||
return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", speed_info.rate / 10,
|
||||
speed_info.rate % 10 ? ".5" : "",
|
||||
ib_width_enum_to_int(attr.active_width),
|
||||
speed_info.str);
|
||||
}
|
||||
|
||||
static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
|
||||
|
|
|
|||
|
|
@ -129,9 +129,6 @@ ib_umem_dmabuf_get_with_dma_device(struct ib_device *device,
|
|||
if (check_add_overflow(offset, (unsigned long)size, &end))
|
||||
return ret;
|
||||
|
||||
if (unlikely(!ops || !ops->move_notify))
|
||||
return ret;
|
||||
|
||||
dmabuf = dma_buf_get(fd);
|
||||
if (IS_ERR(dmabuf))
|
||||
return ERR_CAST(dmabuf);
|
||||
|
|
|
|||
|
|
@ -514,7 +514,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
|
|||
struct rdma_ah_attr ah_attr;
|
||||
struct ib_ah *ah;
|
||||
__be64 *tid;
|
||||
int ret, data_len, hdr_len, copy_offset, rmpp_active;
|
||||
int ret, hdr_len, copy_offset, rmpp_active;
|
||||
size_t data_len;
|
||||
u8 base_version;
|
||||
|
||||
if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
|
||||
|
|
@ -588,7 +589,10 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
|
|||
}
|
||||
|
||||
base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version;
|
||||
data_len = count - hdr_size(file) - hdr_len;
|
||||
if (check_sub_overflow(count, hdr_size(file) + hdr_len, &data_len)) {
|
||||
ret = -EINVAL;
|
||||
goto err_ah;
|
||||
}
|
||||
packet->msg = ib_create_send_mad(agent,
|
||||
be32_to_cpu(packet->mad.hdr.qpn),
|
||||
packet->mad.hdr.pkey_index, rmpp_active,
|
||||
|
|
|
|||
|
|
@ -133,6 +133,18 @@ struct ib_uverbs_completion_event_file {
|
|||
struct ib_uverbs_event_queue ev_queue;
|
||||
};
|
||||
|
||||
struct ib_uverbs_dmabuf_file {
|
||||
struct ib_uobject uobj;
|
||||
struct dma_buf *dmabuf;
|
||||
struct list_head dmabufs_elm;
|
||||
struct rdma_user_mmap_entry *mmap_entry;
|
||||
struct phys_vec phys_vec;
|
||||
struct p2pdma_provider *provider;
|
||||
struct kref kref;
|
||||
struct completion comp;
|
||||
u8 revoked :1;
|
||||
};
|
||||
|
||||
struct ib_uverbs_event {
|
||||
union {
|
||||
struct ib_uverbs_async_event_desc async;
|
||||
|
|
@ -290,4 +302,13 @@ ib_uverbs_get_async_event(struct uverbs_attr_bundle *attrs,
|
|||
void copy_port_attr_to_resp(struct ib_port_attr *attr,
|
||||
struct ib_uverbs_query_port_resp *resp,
|
||||
struct ib_device *ib_dev, u8 port_num);
|
||||
|
||||
static inline void ib_uverbs_dmabuf_done(struct kref *kref)
|
||||
{
|
||||
struct ib_uverbs_dmabuf_file *priv =
|
||||
container_of(kref, struct ib_uverbs_dmabuf_file, kref);
|
||||
|
||||
complete(&priv->comp);
|
||||
}
|
||||
|
||||
#endif /* UVERBS_H */
|
||||
|
|
|
|||
|
|
@ -2049,7 +2049,10 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
|
||||
if (cmd.wqe_size < sizeof(struct ib_uverbs_send_wr))
|
||||
return -EINVAL;
|
||||
|
||||
user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!user_wr)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
@ -2239,7 +2242,7 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
|
|||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
user_wr = kmalloc(wqe_size, GFP_KERNEL);
|
||||
user_wr = kmalloc(wqe_size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!user_wr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
|
|
|
|||
|
|
@ -209,6 +209,39 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
|
|||
&resp, sizeof(resp));
|
||||
}
|
||||
|
||||
static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT_SPEED)(
|
||||
struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
struct ib_ucontext *ucontext;
|
||||
struct ib_device *ib_dev;
|
||||
u32 port_num;
|
||||
u64 speed;
|
||||
int ret;
|
||||
|
||||
ucontext = ib_uverbs_get_ucontext(attrs);
|
||||
if (IS_ERR(ucontext))
|
||||
return PTR_ERR(ucontext);
|
||||
ib_dev = ucontext->device;
|
||||
|
||||
if (!ib_dev->ops.query_port_speed)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ret = uverbs_get_const(&port_num, attrs,
|
||||
UVERBS_ATTR_QUERY_PORT_SPEED_PORT_NUM);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!rdma_is_port_valid(ib_dev, port_num))
|
||||
return -EINVAL;
|
||||
|
||||
ret = ib_dev->ops.query_port_speed(ib_dev, port_num, &speed);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_PORT_SPEED_RESP,
|
||||
&speed, sizeof(speed));
|
||||
}
|
||||
|
||||
static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)(
|
||||
struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
|
|
@ -469,6 +502,14 @@ DECLARE_UVERBS_NAMED_METHOD(
|
|||
active_speed_ex),
|
||||
UA_MANDATORY));
|
||||
|
||||
DECLARE_UVERBS_NAMED_METHOD(
|
||||
UVERBS_METHOD_QUERY_PORT_SPEED,
|
||||
UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_PORT_SPEED_PORT_NUM, u32,
|
||||
UA_MANDATORY),
|
||||
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_PORT_SPEED_RESP,
|
||||
UVERBS_ATTR_TYPE(u64),
|
||||
UA_MANDATORY));
|
||||
|
||||
DECLARE_UVERBS_NAMED_METHOD(
|
||||
UVERBS_METHOD_QUERY_GID_TABLE,
|
||||
UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64,
|
||||
|
|
@ -498,6 +539,7 @@ DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
|
|||
&UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
|
||||
&UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
|
||||
&UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
|
||||
&UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT_SPEED),
|
||||
&UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT),
|
||||
&UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE),
|
||||
&UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY));
|
||||
|
|
|
|||
200
drivers/infiniband/core/uverbs_std_types_dmabuf.c
Normal file
200
drivers/infiniband/core/uverbs_std_types_dmabuf.c
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
|
||||
*/
|
||||
|
||||
#include <linux/dma-buf-mapping.h>
|
||||
#include <linux/pci-p2pdma.h>
|
||||
#include <linux/dma-resv.h>
|
||||
#include <rdma/uverbs_std_types.h>
|
||||
#include "rdma_core.h"
|
||||
#include "uverbs.h"
|
||||
|
||||
static int uverbs_dmabuf_attach(struct dma_buf *dmabuf,
|
||||
struct dma_buf_attachment *attachment)
|
||||
{
|
||||
if (!attachment->peer2peer)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct sg_table *
|
||||
uverbs_dmabuf_map(struct dma_buf_attachment *attachment,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_uverbs_dmabuf_file *priv = attachment->dmabuf->priv;
|
||||
struct sg_table *ret;
|
||||
|
||||
dma_resv_assert_held(priv->dmabuf->resv);
|
||||
|
||||
if (priv->revoked)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider,
|
||||
&priv->phys_vec, 1, priv->phys_vec.len,
|
||||
dir);
|
||||
if (IS_ERR(ret))
|
||||
return ret;
|
||||
|
||||
kref_get(&priv->kref);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void uverbs_dmabuf_unmap(struct dma_buf_attachment *attachment,
|
||||
struct sg_table *sgt,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_uverbs_dmabuf_file *priv = attachment->dmabuf->priv;
|
||||
|
||||
dma_resv_assert_held(priv->dmabuf->resv);
|
||||
dma_buf_free_sgt(attachment, sgt, dir);
|
||||
kref_put(&priv->kref, ib_uverbs_dmabuf_done);
|
||||
}
|
||||
|
||||
static int uverbs_dmabuf_pin(struct dma_buf_attachment *attach)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static void uverbs_dmabuf_unpin(struct dma_buf_attachment *attach)
|
||||
{
|
||||
}
|
||||
|
||||
static void uverbs_dmabuf_release(struct dma_buf *dmabuf)
|
||||
{
|
||||
struct ib_uverbs_dmabuf_file *priv = dmabuf->priv;
|
||||
|
||||
/*
|
||||
* This can only happen if the fput came from alloc_abort_fd_uobject()
|
||||
*/
|
||||
if (!priv->uobj.context)
|
||||
return;
|
||||
|
||||
uverbs_uobject_release(&priv->uobj);
|
||||
}
|
||||
|
||||
static const struct dma_buf_ops uverbs_dmabuf_ops = {
|
||||
.attach = uverbs_dmabuf_attach,
|
||||
.map_dma_buf = uverbs_dmabuf_map,
|
||||
.unmap_dma_buf = uverbs_dmabuf_unmap,
|
||||
.pin = uverbs_dmabuf_pin,
|
||||
.unpin = uverbs_dmabuf_unpin,
|
||||
.release = uverbs_dmabuf_release,
|
||||
};
|
||||
|
||||
static int UVERBS_HANDLER(UVERBS_METHOD_DMABUF_ALLOC)(
|
||||
struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
struct ib_uobject *uobj =
|
||||
uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DMABUF_HANDLE)
|
||||
->obj_attr.uobject;
|
||||
struct ib_uverbs_dmabuf_file *uverbs_dmabuf =
|
||||
container_of(uobj, struct ib_uverbs_dmabuf_file, uobj);
|
||||
struct ib_device *ib_dev = attrs->context->device;
|
||||
struct rdma_user_mmap_entry *mmap_entry;
|
||||
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
|
||||
off_t pg_off;
|
||||
int ret;
|
||||
|
||||
ret = uverbs_get_const(&pg_off, attrs, UVERBS_ATTR_ALLOC_DMABUF_PGOFF);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mmap_entry = ib_dev->ops.pgoff_to_mmap_entry(attrs->context, pg_off);
|
||||
if (!mmap_entry)
|
||||
return -EINVAL;
|
||||
|
||||
ret = ib_dev->ops.mmap_get_pfns(mmap_entry, &uverbs_dmabuf->phys_vec,
|
||||
&uverbs_dmabuf->provider);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
exp_info.ops = &uverbs_dmabuf_ops;
|
||||
exp_info.size = uverbs_dmabuf->phys_vec.len;
|
||||
exp_info.flags = O_CLOEXEC;
|
||||
exp_info.priv = uverbs_dmabuf;
|
||||
|
||||
uverbs_dmabuf->dmabuf = dma_buf_export(&exp_info);
|
||||
if (IS_ERR(uverbs_dmabuf->dmabuf)) {
|
||||
ret = PTR_ERR(uverbs_dmabuf->dmabuf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
kref_init(&uverbs_dmabuf->kref);
|
||||
init_completion(&uverbs_dmabuf->comp);
|
||||
INIT_LIST_HEAD(&uverbs_dmabuf->dmabufs_elm);
|
||||
mutex_lock(&mmap_entry->dmabufs_lock);
|
||||
if (mmap_entry->driver_removed)
|
||||
ret = -EIO;
|
||||
else
|
||||
list_add_tail(&uverbs_dmabuf->dmabufs_elm, &mmap_entry->dmabufs);
|
||||
mutex_unlock(&mmap_entry->dmabufs_lock);
|
||||
if (ret)
|
||||
goto err_revoked;
|
||||
|
||||
uobj->object = uverbs_dmabuf->dmabuf->file;
|
||||
uverbs_dmabuf->mmap_entry = mmap_entry;
|
||||
uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_ALLOC_DMABUF_HANDLE);
|
||||
return 0;
|
||||
|
||||
err_revoked:
|
||||
dma_buf_put(uverbs_dmabuf->dmabuf);
|
||||
err:
|
||||
rdma_user_mmap_entry_put(mmap_entry);
|
||||
return ret;
|
||||
}
|
||||
|
||||
DECLARE_UVERBS_NAMED_METHOD(
|
||||
UVERBS_METHOD_DMABUF_ALLOC,
|
||||
UVERBS_ATTR_FD(UVERBS_ATTR_ALLOC_DMABUF_HANDLE,
|
||||
UVERBS_OBJECT_DMABUF,
|
||||
UVERBS_ACCESS_NEW,
|
||||
UA_MANDATORY),
|
||||
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMABUF_PGOFF,
|
||||
UVERBS_ATTR_TYPE(u64),
|
||||
UA_MANDATORY));
|
||||
|
||||
static void uverbs_dmabuf_fd_destroy_uobj(struct ib_uobject *uobj,
|
||||
enum rdma_remove_reason why)
|
||||
{
|
||||
struct ib_uverbs_dmabuf_file *uverbs_dmabuf =
|
||||
container_of(uobj, struct ib_uverbs_dmabuf_file, uobj);
|
||||
bool wait_for_comp = false;
|
||||
|
||||
mutex_lock(&uverbs_dmabuf->mmap_entry->dmabufs_lock);
|
||||
dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
|
||||
if (!uverbs_dmabuf->revoked) {
|
||||
uverbs_dmabuf->revoked = true;
|
||||
list_del(&uverbs_dmabuf->dmabufs_elm);
|
||||
dma_buf_move_notify(uverbs_dmabuf->dmabuf);
|
||||
dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
|
||||
DMA_RESV_USAGE_BOOKKEEP, false,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
wait_for_comp = true;
|
||||
}
|
||||
dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
|
||||
if (wait_for_comp) {
|
||||
kref_put(&uverbs_dmabuf->kref, ib_uverbs_dmabuf_done);
|
||||
/* Let's wait till all DMA unmap are completed. */
|
||||
wait_for_completion(&uverbs_dmabuf->comp);
|
||||
}
|
||||
mutex_unlock(&uverbs_dmabuf->mmap_entry->dmabufs_lock);
|
||||
|
||||
/* Matches the get done as part of pgoff_to_mmap_entry() */
|
||||
rdma_user_mmap_entry_put(uverbs_dmabuf->mmap_entry);
|
||||
}
|
||||
|
||||
DECLARE_UVERBS_NAMED_OBJECT(
|
||||
UVERBS_OBJECT_DMABUF,
|
||||
UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_dmabuf_file),
|
||||
uverbs_dmabuf_fd_destroy_uobj,
|
||||
NULL, NULL, O_RDONLY),
|
||||
&UVERBS_METHOD(UVERBS_METHOD_DMABUF_ALLOC));
|
||||
|
||||
const struct uapi_definition uverbs_def_obj_dmabuf[] = {
|
||||
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DMABUF),
|
||||
UAPI_DEF_OBJ_NEEDS_FN(mmap_get_pfns),
|
||||
UAPI_DEF_OBJ_NEEDS_FN(pgoff_to_mmap_entry),
|
||||
{}
|
||||
};
|
||||
|
|
@ -631,6 +631,7 @@ static const struct uapi_definition uverbs_core_api[] = {
|
|||
UAPI_DEF_CHAIN(uverbs_def_obj_cq),
|
||||
UAPI_DEF_CHAIN(uverbs_def_obj_device),
|
||||
UAPI_DEF_CHAIN(uverbs_def_obj_dm),
|
||||
UAPI_DEF_CHAIN(uverbs_def_obj_dmabuf),
|
||||
UAPI_DEF_CHAIN(uverbs_def_obj_dmah),
|
||||
UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
|
||||
UAPI_DEF_CHAIN(uverbs_def_obj_intf),
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ static const char * const ib_events[] = {
|
|||
[IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached",
|
||||
[IB_EVENT_CLIENT_REREGISTER] = "client reregister",
|
||||
[IB_EVENT_GID_CHANGE] = "GID changed",
|
||||
[IB_EVENT_DEVICE_SPEED_CHANGE] = "device speed change"
|
||||
};
|
||||
|
||||
const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
|
||||
|
|
@ -216,6 +217,57 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
|
|||
}
|
||||
EXPORT_SYMBOL(ib_rate_to_mbps);
|
||||
|
||||
struct ib_speed_attr {
|
||||
const char *str;
|
||||
int speed;
|
||||
};
|
||||
|
||||
#define IB_SPEED_ATTR(speed_type, _str, _speed) \
|
||||
[speed_type] = {.str = _str, .speed = _speed}
|
||||
|
||||
static const struct ib_speed_attr ib_speed_attrs[] = {
|
||||
IB_SPEED_ATTR(IB_SPEED_SDR, " SDR", 25),
|
||||
IB_SPEED_ATTR(IB_SPEED_DDR, " DDR", 50),
|
||||
IB_SPEED_ATTR(IB_SPEED_QDR, " QDR", 100),
|
||||
IB_SPEED_ATTR(IB_SPEED_FDR10, " FDR10", 100),
|
||||
IB_SPEED_ATTR(IB_SPEED_FDR, " FDR", 140),
|
||||
IB_SPEED_ATTR(IB_SPEED_EDR, " EDR", 250),
|
||||
IB_SPEED_ATTR(IB_SPEED_HDR, " HDR", 500),
|
||||
IB_SPEED_ATTR(IB_SPEED_NDR, " NDR", 1000),
|
||||
IB_SPEED_ATTR(IB_SPEED_XDR, " XDR", 2000),
|
||||
};
|
||||
|
||||
int ib_port_attr_to_speed_info(struct ib_port_attr *attr,
|
||||
struct ib_port_speed_info *speed_info)
|
||||
{
|
||||
int speed_idx = attr->active_speed;
|
||||
|
||||
switch (attr->active_speed) {
|
||||
case IB_SPEED_DDR:
|
||||
case IB_SPEED_QDR:
|
||||
case IB_SPEED_FDR10:
|
||||
case IB_SPEED_FDR:
|
||||
case IB_SPEED_EDR:
|
||||
case IB_SPEED_HDR:
|
||||
case IB_SPEED_NDR:
|
||||
case IB_SPEED_XDR:
|
||||
case IB_SPEED_SDR:
|
||||
break;
|
||||
default:
|
||||
speed_idx = IB_SPEED_SDR; /* Default to SDR for invalid rates */
|
||||
break;
|
||||
}
|
||||
|
||||
speed_info->str = ib_speed_attrs[speed_idx].str;
|
||||
speed_info->rate = ib_speed_attrs[speed_idx].speed;
|
||||
speed_info->rate *= ib_width_enum_to_int(attr->active_width);
|
||||
if (speed_info->rate < 0)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ib_port_attr_to_speed_info);
|
||||
|
||||
__attribute_const__ enum rdma_transport_type
|
||||
rdma_node_get_transport(unsigned int node_type)
|
||||
{
|
||||
|
|
@ -1485,7 +1537,8 @@ static const struct {
|
|||
IB_QP_PKEY_INDEX),
|
||||
[IB_QPT_RC] = (IB_QP_ALT_PATH |
|
||||
IB_QP_ACCESS_FLAGS |
|
||||
IB_QP_PKEY_INDEX),
|
||||
IB_QP_PKEY_INDEX |
|
||||
IB_QP_RATE_LIMIT),
|
||||
[IB_QPT_XRC_INI] = (IB_QP_ALT_PATH |
|
||||
IB_QP_ACCESS_FLAGS |
|
||||
IB_QP_PKEY_INDEX),
|
||||
|
|
@ -1533,7 +1586,8 @@ static const struct {
|
|||
IB_QP_ALT_PATH |
|
||||
IB_QP_ACCESS_FLAGS |
|
||||
IB_QP_MIN_RNR_TIMER |
|
||||
IB_QP_PATH_MIG_STATE),
|
||||
IB_QP_PATH_MIG_STATE |
|
||||
IB_QP_RATE_LIMIT),
|
||||
[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
|
||||
IB_QP_ALT_PATH |
|
||||
IB_QP_ACCESS_FLAGS |
|
||||
|
|
@ -1567,7 +1621,8 @@ static const struct {
|
|||
IB_QP_ACCESS_FLAGS |
|
||||
IB_QP_ALT_PATH |
|
||||
IB_QP_PATH_MIG_STATE |
|
||||
IB_QP_MIN_RNR_TIMER),
|
||||
IB_QP_MIN_RNR_TIMER |
|
||||
IB_QP_RATE_LIMIT),
|
||||
[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
|
||||
IB_QP_ACCESS_FLAGS |
|
||||
IB_QP_ALT_PATH |
|
||||
|
|
|
|||
|
|
@ -87,25 +87,35 @@ static ssize_t qp_info_read(struct file *filep,
|
|||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct bnxt_re_qp *qp = filep->private_data;
|
||||
struct bnxt_qplib_qp *qplib_qp;
|
||||
u32 rate_limit = 0;
|
||||
char *buf;
|
||||
int len;
|
||||
|
||||
if (*ppos)
|
||||
return 0;
|
||||
|
||||
qplib_qp = &qp->qplib_qp;
|
||||
if (qplib_qp->shaper_allocation_status)
|
||||
rate_limit = qplib_qp->rate_limit;
|
||||
|
||||
buf = kasprintf(GFP_KERNEL,
|
||||
"QPN\t\t: %d\n"
|
||||
"transport\t: %s\n"
|
||||
"state\t\t: %s\n"
|
||||
"mtu\t\t: %d\n"
|
||||
"timeout\t\t: %d\n"
|
||||
"remote QPN\t: %d\n",
|
||||
"remote QPN\t: %d\n"
|
||||
"shaper allocated : %d\n"
|
||||
"rate limit\t: %d kbps\n",
|
||||
qp->qplib_qp.id,
|
||||
bnxt_re_qp_type_str(qp->qplib_qp.type),
|
||||
bnxt_re_qp_state_str(qp->qplib_qp.state),
|
||||
qp->qplib_qp.mtu,
|
||||
qp->qplib_qp.timeout,
|
||||
qp->qplib_qp.dest_qpn);
|
||||
qp->qplib_qp.dest_qpn,
|
||||
qplib_qp->shaper_allocation_status,
|
||||
rate_limit);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
if (count < strlen(buf)) {
|
||||
|
|
|
|||
|
|
@ -186,6 +186,9 @@ int bnxt_re_query_device(struct ib_device *ibdev,
|
|||
{
|
||||
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
|
||||
struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
|
||||
struct bnxt_re_query_device_ex_resp resp = {};
|
||||
size_t outlen = (udata) ? udata->outlen : 0;
|
||||
int rc = 0;
|
||||
|
||||
memset(ib_attr, 0, sizeof(*ib_attr));
|
||||
memcpy(&ib_attr->fw_ver, dev_attr->fw_ver,
|
||||
|
|
@ -250,7 +253,21 @@ int bnxt_re_query_device(struct ib_device *ibdev,
|
|||
|
||||
ib_attr->max_pkeys = 1;
|
||||
ib_attr->local_ca_ack_delay = BNXT_RE_DEFAULT_ACK_DELAY;
|
||||
return 0;
|
||||
|
||||
if ((offsetofend(typeof(resp), packet_pacing_caps) <= outlen) &&
|
||||
_is_modify_qp_rate_limit_supported(dev_attr->dev_cap_flags2)) {
|
||||
resp.packet_pacing_caps.qp_rate_limit_min =
|
||||
dev_attr->rate_limit_min;
|
||||
resp.packet_pacing_caps.qp_rate_limit_max =
|
||||
dev_attr->rate_limit_max;
|
||||
resp.packet_pacing_caps.supported_qpts =
|
||||
1 << IB_QPT_RC;
|
||||
}
|
||||
if (outlen)
|
||||
rc = ib_copy_to_udata(udata, &resp,
|
||||
min(sizeof(resp), outlen));
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int bnxt_re_modify_device(struct ib_device *ibdev,
|
||||
|
|
@ -2089,10 +2106,11 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
|
|||
unsigned int flags;
|
||||
u8 nw_type;
|
||||
|
||||
if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
|
||||
if (qp_attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
qp->qplib_qp.modify_flags = 0;
|
||||
qp->qplib_qp.ext_modify_flags = 0;
|
||||
if (qp_attr_mask & IB_QP_STATE) {
|
||||
curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
|
||||
new_qp_state = qp_attr->qp_state;
|
||||
|
|
@ -2129,6 +2147,15 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
|
|||
bnxt_re_unlock_cqs(qp, flags);
|
||||
}
|
||||
}
|
||||
|
||||
if (qp_attr_mask & IB_QP_RATE_LIMIT) {
|
||||
if (qp->qplib_qp.type != IB_QPT_RC ||
|
||||
!_is_modify_qp_rate_limit_supported(dev_attr->dev_cap_flags2))
|
||||
return -EOPNOTSUPP;
|
||||
qp->qplib_qp.ext_modify_flags |=
|
||||
CMDQ_MODIFY_QP_EXT_MODIFY_MASK_RATE_LIMIT_VALID;
|
||||
qp->qplib_qp.rate_limit = qp_attr->rate_limit;
|
||||
}
|
||||
if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
|
||||
qp->qplib_qp.modify_flags |=
|
||||
CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY;
|
||||
|
|
@ -4386,6 +4413,9 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
|
|||
if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
|
||||
resp.comp_mask |= BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED;
|
||||
|
||||
if (_is_modify_qp_rate_limit_supported(dev_attr->dev_cap_flags2))
|
||||
resp.comp_mask |= BNXT_RE_UCNTX_CMASK_QP_RATE_LIMIT_ENABLED;
|
||||
|
||||
if (udata->inlen >= sizeof(ureq)) {
|
||||
rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq)));
|
||||
if (rc)
|
||||
|
|
|
|||
|
|
@ -1313,8 +1313,8 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
|
|||
struct bnxt_qplib_cmdqmsg msg = {};
|
||||
struct cmdq_modify_qp req = {};
|
||||
u16 vlan_pcp_vlan_dei_vlan_id;
|
||||
u32 bmask, bmask_ext;
|
||||
u32 temp32[4];
|
||||
u32 bmask;
|
||||
int rc;
|
||||
|
||||
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
|
||||
|
|
@ -1329,9 +1329,16 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
|
|||
is_optimized_state_transition(qp))
|
||||
bnxt_set_mandatory_attributes(res, qp, &req);
|
||||
}
|
||||
|
||||
bmask = qp->modify_flags;
|
||||
req.modify_mask = cpu_to_le32(qp->modify_flags);
|
||||
bmask_ext = qp->ext_modify_flags;
|
||||
req.ext_modify_mask = cpu_to_le32(qp->ext_modify_flags);
|
||||
req.qp_cid = cpu_to_le32(qp->id);
|
||||
|
||||
if (bmask_ext & CMDQ_MODIFY_QP_EXT_MODIFY_MASK_RATE_LIMIT_VALID)
|
||||
req.rate_limit = cpu_to_le32(qp->rate_limit);
|
||||
|
||||
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
|
||||
req.network_type_en_sqd_async_notify_new_state =
|
||||
(qp->state & CMDQ_MODIFY_QP_NEW_STATE_MASK) |
|
||||
|
|
@ -1429,6 +1436,9 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
|
|||
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (bmask_ext & CMDQ_MODIFY_QP_EXT_MODIFY_MASK_RATE_LIMIT_VALID)
|
||||
qp->shaper_allocation_status = resp.shaper_allocation_status;
|
||||
qp->cur_qp_state = qp->state;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -280,6 +280,7 @@ struct bnxt_qplib_qp {
|
|||
u8 state;
|
||||
u8 cur_qp_state;
|
||||
u64 modify_flags;
|
||||
u32 ext_modify_flags;
|
||||
u32 max_inline_data;
|
||||
u32 mtu;
|
||||
u8 path_mtu;
|
||||
|
|
@ -346,6 +347,8 @@ struct bnxt_qplib_qp {
|
|||
bool is_host_msn_tbl;
|
||||
u8 tos_dscp;
|
||||
u32 ugid_index;
|
||||
u32 rate_limit;
|
||||
u8 shaper_allocation_status;
|
||||
};
|
||||
|
||||
#define BNXT_RE_MAX_MSG_SIZE 0x80000000
|
||||
|
|
|
|||
|
|
@ -623,4 +623,10 @@ static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
|
|||
return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
|
||||
}
|
||||
|
||||
static inline bool _is_modify_qp_rate_limit_supported(u16 dev_cap_ext_flags2)
|
||||
{
|
||||
return dev_cap_ext_flags2 &
|
||||
CREQ_QUERY_FUNC_RESP_SB_MODIFY_QP_RATE_LIMIT_SUPPORTED;
|
||||
}
|
||||
|
||||
#endif /* __BNXT_QPLIB_RES_H__ */
|
||||
|
|
|
|||
|
|
@ -193,6 +193,11 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
|
|||
attr->max_dpi = le32_to_cpu(sb->max_dpi);
|
||||
|
||||
attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
|
||||
|
||||
if (_is_modify_qp_rate_limit_supported(attr->dev_cap_flags2)) {
|
||||
attr->rate_limit_min = le16_to_cpu(sb->rate_limit_min);
|
||||
attr->rate_limit_max = le32_to_cpu(sb->rate_limit_max);
|
||||
}
|
||||
bail:
|
||||
dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
|
||||
sbuf.sb, sbuf.dma_addr);
|
||||
|
|
|
|||
|
|
@ -76,6 +76,8 @@ struct bnxt_qplib_dev_attr {
|
|||
u16 dev_cap_flags;
|
||||
u16 dev_cap_flags2;
|
||||
u32 max_dpi;
|
||||
u16 rate_limit_min;
|
||||
u32 rate_limit_max;
|
||||
};
|
||||
|
||||
struct bnxt_qplib_pd {
|
||||
|
|
|
|||
|
|
@ -690,10 +690,11 @@ struct cmdq_modify_qp {
|
|||
__le32 ext_modify_mask;
|
||||
#define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_EXT_STATS_CTX 0x1UL
|
||||
#define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_SCHQ_ID_VALID 0x2UL
|
||||
#define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_RATE_LIMIT_VALID 0x8UL
|
||||
__le32 ext_stats_ctx_id;
|
||||
__le16 schq_id;
|
||||
__le16 unused_0;
|
||||
__le32 reserved32;
|
||||
__le32 rate_limit;
|
||||
};
|
||||
|
||||
/* creq_modify_qp_resp (size:128b/16B) */
|
||||
|
|
@ -716,7 +717,8 @@ struct creq_modify_qp_resp {
|
|||
#define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_INDEX_MASK 0xeUL
|
||||
#define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_INDEX_SFT 1
|
||||
#define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_STATE 0x10UL
|
||||
u8 reserved8;
|
||||
u8 shaper_allocation_status;
|
||||
#define CREQ_MODIFY_QP_RESP_SHAPER_ALLOCATED 0x1UL
|
||||
__le32 lag_src_mac;
|
||||
};
|
||||
|
||||
|
|
@ -2179,7 +2181,7 @@ struct creq_query_func_resp {
|
|||
u8 reserved48[6];
|
||||
};
|
||||
|
||||
/* creq_query_func_resp_sb (size:1088b/136B) */
|
||||
/* creq_query_func_resp_sb (size:1280b/160B) */
|
||||
struct creq_query_func_resp_sb {
|
||||
u8 opcode;
|
||||
#define CREQ_QUERY_FUNC_RESP_SB_OPCODE_QUERY_FUNC 0x83UL
|
||||
|
|
@ -2256,12 +2258,15 @@ struct creq_query_func_resp_sb {
|
|||
#define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \
|
||||
CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE
|
||||
#define CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED 0x40UL
|
||||
#define CREQ_QUERY_FUNC_RESP_SB_MODIFY_QP_RATE_LIMIT_SUPPORTED 0x400UL
|
||||
#define CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED 0x1000UL
|
||||
__le16 max_xp_qp_size;
|
||||
__le16 create_qp_batch_size;
|
||||
__le16 destroy_qp_batch_size;
|
||||
__le16 max_srq_ext;
|
||||
__le64 reserved64;
|
||||
__le16 reserved16;
|
||||
__le16 rate_limit_min;
|
||||
__le32 rate_limit_max;
|
||||
};
|
||||
|
||||
/* cmdq_set_func_resources (size:448b/56B) */
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
* Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/log2.h>
|
||||
|
||||
#include "efa_com.h"
|
||||
#include "efa_regs_defs.h"
|
||||
|
||||
|
|
@ -21,6 +23,8 @@
|
|||
#define EFA_CTRL_SUB_MINOR 1
|
||||
|
||||
enum efa_cmd_status {
|
||||
EFA_CMD_UNUSED,
|
||||
EFA_CMD_ALLOCATED,
|
||||
EFA_CMD_SUBMITTED,
|
||||
EFA_CMD_COMPLETED,
|
||||
};
|
||||
|
|
@ -32,7 +36,6 @@ struct efa_comp_ctx {
|
|||
enum efa_cmd_status status;
|
||||
u16 cmd_id;
|
||||
u8 cmd_opcode;
|
||||
u8 occupied;
|
||||
};
|
||||
|
||||
static const char *efa_com_cmd_str(u8 cmd)
|
||||
|
|
@ -241,7 +244,6 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* ID to be used with efa_com_get_comp_ctx */
|
||||
static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
|
||||
{
|
||||
u16 ctx_id;
|
||||
|
|
@ -263,36 +265,47 @@ static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
|
|||
spin_unlock(&aq->comp_ctx_lock);
|
||||
}
|
||||
|
||||
static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
|
||||
struct efa_comp_ctx *comp_ctx)
|
||||
static struct efa_comp_ctx *efa_com_alloc_comp_ctx(struct efa_com_admin_queue *aq)
|
||||
{
|
||||
u16 cmd_id = EFA_GET(&comp_ctx->user_cqe->acq_common_descriptor.command,
|
||||
EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
|
||||
u16 ctx_id = cmd_id & (aq->depth - 1);
|
||||
struct efa_comp_ctx *comp_ctx;
|
||||
u16 ctx_id;
|
||||
|
||||
ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
|
||||
comp_ctx->occupied = 0;
|
||||
efa_com_dealloc_ctx_id(aq, ctx_id);
|
||||
}
|
||||
ctx_id = efa_com_alloc_ctx_id(aq);
|
||||
|
||||
static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
|
||||
u16 cmd_id, bool capture)
|
||||
{
|
||||
u16 ctx_id = cmd_id & (aq->depth - 1);
|
||||
|
||||
if (aq->comp_ctx[ctx_id].occupied && capture) {
|
||||
ibdev_err_ratelimited(
|
||||
aq->efa_dev,
|
||||
"Completion context for command_id %#x is occupied\n",
|
||||
cmd_id);
|
||||
comp_ctx = &aq->comp_ctx[ctx_id];
|
||||
if (comp_ctx->status != EFA_CMD_UNUSED) {
|
||||
efa_com_dealloc_ctx_id(aq, ctx_id);
|
||||
ibdev_err_ratelimited(aq->efa_dev,
|
||||
"Completion context[%u] is used[%u]\n",
|
||||
ctx_id, comp_ctx->status);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (capture) {
|
||||
aq->comp_ctx[ctx_id].occupied = 1;
|
||||
ibdev_dbg(aq->efa_dev,
|
||||
"Take completion ctxt for command_id %#x\n", cmd_id);
|
||||
}
|
||||
comp_ctx->status = EFA_CMD_ALLOCATED;
|
||||
ibdev_dbg(aq->efa_dev, "Take completion context[%u]\n", ctx_id);
|
||||
return comp_ctx;
|
||||
}
|
||||
|
||||
static inline u16 efa_com_get_comp_ctx_id(struct efa_com_admin_queue *aq,
|
||||
struct efa_comp_ctx *comp_ctx)
|
||||
{
|
||||
return comp_ctx - aq->comp_ctx;
|
||||
}
|
||||
|
||||
static inline void efa_com_dealloc_comp_ctx(struct efa_com_admin_queue *aq,
|
||||
struct efa_comp_ctx *comp_ctx)
|
||||
{
|
||||
u16 ctx_id = efa_com_get_comp_ctx_id(aq, comp_ctx);
|
||||
|
||||
ibdev_dbg(aq->efa_dev, "Put completion context[%u]\n", ctx_id);
|
||||
comp_ctx->status = EFA_CMD_UNUSED;
|
||||
efa_com_dealloc_ctx_id(aq, ctx_id);
|
||||
}
|
||||
|
||||
static inline struct efa_comp_ctx *efa_com_get_comp_ctx_by_cmd_id(struct efa_com_admin_queue *aq,
|
||||
u16 cmd_id)
|
||||
{
|
||||
u16 ctx_id = cmd_id & (aq->depth - 1);
|
||||
|
||||
return &aq->comp_ctx[ctx_id];
|
||||
}
|
||||
|
|
@ -310,26 +323,23 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
|
|||
u16 ctx_id;
|
||||
u16 pi;
|
||||
|
||||
comp_ctx = efa_com_alloc_comp_ctx(aq);
|
||||
if (!comp_ctx)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
queue_size_mask = aq->depth - 1;
|
||||
pi = aq->sq.pc & queue_size_mask;
|
||||
|
||||
ctx_id = efa_com_alloc_ctx_id(aq);
|
||||
ctx_id = efa_com_get_comp_ctx_id(aq, comp_ctx);
|
||||
|
||||
/* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
|
||||
cmd_id = ctx_id & queue_size_mask;
|
||||
cmd_id |= aq->sq.pc & ~queue_size_mask;
|
||||
cmd_id |= aq->sq.pc << ilog2(aq->depth);
|
||||
cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
|
||||
|
||||
cmd->aq_common_descriptor.command_id = cmd_id;
|
||||
EFA_SET(&cmd->aq_common_descriptor.flags,
|
||||
EFA_ADMIN_AQ_COMMON_DESC_PHASE, aq->sq.phase);
|
||||
|
||||
comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
|
||||
if (!comp_ctx) {
|
||||
efa_com_dealloc_ctx_id(aq, ctx_id);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
comp_ctx->status = EFA_CMD_SUBMITTED;
|
||||
comp_ctx->comp_size = comp_size_in_bytes;
|
||||
comp_ctx->user_cqe = comp;
|
||||
|
|
@ -370,9 +380,9 @@ static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
|
|||
}
|
||||
|
||||
for (i = 0; i < aq->depth; i++) {
|
||||
comp_ctx = efa_com_get_comp_ctx(aq, i, false);
|
||||
if (comp_ctx)
|
||||
init_completion(&comp_ctx->wait_event);
|
||||
comp_ctx = &aq->comp_ctx[i];
|
||||
comp_ctx->status = EFA_CMD_UNUSED;
|
||||
init_completion(&comp_ctx->wait_event);
|
||||
|
||||
aq->comp_ctx_pool[i] = i;
|
||||
}
|
||||
|
|
@ -417,11 +427,12 @@ static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq
|
|||
cmd_id = EFA_GET(&cqe->acq_common_descriptor.command,
|
||||
EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
|
||||
|
||||
comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
|
||||
if (comp_ctx->status != EFA_CMD_SUBMITTED) {
|
||||
comp_ctx = efa_com_get_comp_ctx_by_cmd_id(aq, cmd_id);
|
||||
if (comp_ctx->status != EFA_CMD_SUBMITTED || comp_ctx->cmd_id != cmd_id) {
|
||||
ibdev_err(aq->efa_dev,
|
||||
"Received completion with unexpected command id[%d], sq producer: %d, sq consumer: %d, cq consumer: %d\n",
|
||||
cmd_id, aq->sq.pc, aq->sq.cc, aq->cq.cc);
|
||||
"Received completion with unexpected command id[%x], status[%d] sq producer[%d], sq consumer[%d], cq consumer[%d]\n",
|
||||
cmd_id, comp_ctx->status, aq->sq.pc, aq->sq.cc,
|
||||
aq->cq.cc);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
|
@ -530,7 +541,7 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
|
|||
|
||||
err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
|
||||
out:
|
||||
efa_com_put_comp_ctx(aq, comp_ctx);
|
||||
efa_com_dealloc_comp_ctx(aq, comp_ctx);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
@ -580,7 +591,7 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com
|
|||
|
||||
err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
|
||||
out:
|
||||
efa_com_put_comp_ctx(aq, comp_ctx);
|
||||
efa_com_dealloc_comp_ctx(aq, comp_ctx);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
|
|||
u8 tclass = get_tclass(grh);
|
||||
u8 priority = 0;
|
||||
u8 tc_mode = 0;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) {
|
||||
ret = -EOPNOTSUPP;
|
||||
|
|
@ -77,19 +77,18 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
|
|||
ah->av.flowlabel = grh->flow_label;
|
||||
ah->av.udp_sport = get_ah_udp_sport(ah_attr);
|
||||
ah->av.tclass = tclass;
|
||||
ah->av.sl = rdma_ah_get_sl(ah_attr);
|
||||
|
||||
ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
|
||||
if (ret == -EOPNOTSUPP)
|
||||
ret = 0;
|
||||
if (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
|
||||
ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
|
||||
if (ret == -EOPNOTSUPP)
|
||||
ret = 0;
|
||||
else if (ret)
|
||||
goto err_out;
|
||||
|
||||
if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
|
||||
goto err_out;
|
||||
|
||||
if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
|
||||
grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
|
||||
ah->av.sl = priority;
|
||||
else
|
||||
ah->av.sl = rdma_ah_get_sl(ah_attr);
|
||||
if (tc_mode == HNAE3_TC_MAP_MODE_DSCP)
|
||||
ah->av.sl = priority;
|
||||
}
|
||||
|
||||
if (!check_sl_valid(hr_dev, ah->av.sl)) {
|
||||
ret = -EINVAL;
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
|
|||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
|
||||
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
|
||||
u32 least_load = cq_table->ctx_num[0];
|
||||
u32 least_load = U32_MAX;
|
||||
u8 bankid = 0;
|
||||
u8 i;
|
||||
|
||||
|
|
@ -63,7 +63,10 @@ void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
|
|||
return;
|
||||
|
||||
mutex_lock(&cq_table->bank_mutex);
|
||||
for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
|
||||
for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) {
|
||||
if (!(cq_table->valid_cq_bank_mask & BIT(i)))
|
||||
continue;
|
||||
|
||||
if (cq_table->ctx_num[i] < least_load) {
|
||||
least_load = cq_table->ctx_num[i];
|
||||
bankid = i;
|
||||
|
|
@ -581,6 +584,11 @@ void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
|
|||
cq_table->bank[i].max = hr_dev->caps.num_cqs /
|
||||
HNS_ROCE_CQ_BANK_NUM - 1;
|
||||
}
|
||||
|
||||
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK)
|
||||
cq_table->valid_cq_bank_mask = VALID_CQ_BANK_MASK_LIMIT;
|
||||
else
|
||||
cq_table->valid_cq_bank_mask = VALID_CQ_BANK_MASK_DEFAULT;
|
||||
}
|
||||
|
||||
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
|
||||
|
|
|
|||
|
|
@ -103,6 +103,10 @@
|
|||
|
||||
#define CQ_BANKID_SHIFT 2
|
||||
#define CQ_BANKID_MASK GENMASK(1, 0)
|
||||
#define VALID_CQ_BANK_MASK_DEFAULT 0xF
|
||||
#define VALID_CQ_BANK_MASK_LIMIT 0x9
|
||||
|
||||
#define VALID_EXT_SGE_QP_BANK_MASK_LIMIT 0x42
|
||||
|
||||
#define HNS_ROCE_MAX_CQ_COUNT 0xFFFF
|
||||
#define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF
|
||||
|
|
@ -156,6 +160,7 @@ enum {
|
|||
HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19),
|
||||
HNS_ROCE_CAP_FLAG_BOND = BIT(21),
|
||||
HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22),
|
||||
HNS_ROCE_CAP_FLAG_LIMIT_BANK = BIT(23),
|
||||
};
|
||||
|
||||
#define HNS_ROCE_DB_TYPE_COUNT 2
|
||||
|
|
@ -500,6 +505,7 @@ struct hns_roce_cq_table {
|
|||
struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM];
|
||||
struct mutex bank_mutex;
|
||||
u32 ctx_num[HNS_ROCE_CQ_BANK_NUM];
|
||||
u8 valid_cq_bank_mask;
|
||||
};
|
||||
|
||||
struct hns_roce_srq_table {
|
||||
|
|
|
|||
|
|
@ -876,6 +876,170 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int hns_roce_push_drain_wr(struct hns_roce_wq *wq, struct ib_cq *cq,
|
||||
u64 wr_id)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock_irqsave(&wq->lock, flags);
|
||||
if (hns_roce_wq_overflow(wq, 1, cq)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
wq->wrid[wq->head & (wq->wqe_cnt - 1)] = wr_id;
|
||||
wq->head++;
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&wq->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct hns_roce_drain_cqe {
|
||||
struct ib_cqe cqe;
|
||||
struct completion done;
|
||||
};
|
||||
|
||||
static void hns_roce_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
|
||||
{
|
||||
struct hns_roce_drain_cqe *cqe = container_of(wc->wr_cqe,
|
||||
struct hns_roce_drain_cqe,
|
||||
cqe);
|
||||
complete(&cqe->done);
|
||||
}
|
||||
|
||||
static void handle_drain_completion(struct ib_cq *ibcq,
|
||||
struct hns_roce_drain_cqe *drain,
|
||||
struct hns_roce_dev *hr_dev)
|
||||
{
|
||||
#define TIMEOUT (HZ / 10)
|
||||
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
|
||||
unsigned long flags;
|
||||
bool triggered;
|
||||
|
||||
if (ibcq->poll_ctx == IB_POLL_DIRECT) {
|
||||
while (wait_for_completion_timeout(&drain->done, TIMEOUT) <= 0)
|
||||
ib_process_cq_direct(ibcq, -1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (hr_dev->state < HNS_ROCE_DEVICE_STATE_RST_DOWN)
|
||||
goto waiting_done;
|
||||
|
||||
spin_lock_irqsave(&hr_cq->lock, flags);
|
||||
triggered = hr_cq->is_armed;
|
||||
hr_cq->is_armed = 1;
|
||||
spin_unlock_irqrestore(&hr_cq->lock, flags);
|
||||
|
||||
/* Triggered means this cq is processing or has been processed
|
||||
* by hns_roce_handle_device_err() or this function. We need to
|
||||
* cancel the already invoked comp_handler() to avoid concurrency.
|
||||
* If it has not been triggered, we can directly invoke
|
||||
* comp_handler().
|
||||
*/
|
||||
if (triggered) {
|
||||
switch (ibcq->poll_ctx) {
|
||||
case IB_POLL_SOFTIRQ:
|
||||
irq_poll_disable(&ibcq->iop);
|
||||
irq_poll_enable(&ibcq->iop);
|
||||
break;
|
||||
case IB_POLL_WORKQUEUE:
|
||||
case IB_POLL_UNBOUND_WORKQUEUE:
|
||||
cancel_work_sync(&ibcq->work);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (ibcq->comp_handler)
|
||||
ibcq->comp_handler(ibcq, ibcq->cq_context);
|
||||
|
||||
waiting_done:
|
||||
if (ibcq->comp_handler)
|
||||
wait_for_completion(&drain->done);
|
||||
}
|
||||
|
||||
static void hns_roce_v2_drain_rq(struct ib_qp *ibqp)
|
||||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
|
||||
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
|
||||
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
|
||||
struct hns_roce_drain_cqe rdrain = {};
|
||||
const struct ib_recv_wr *bad_rwr;
|
||||
struct ib_cq *cq = ibqp->recv_cq;
|
||||
struct ib_recv_wr rwr = {};
|
||||
int ret;
|
||||
|
||||
ret = ib_modify_qp(ibqp, &attr, IB_QP_STATE);
|
||||
if (ret && hr_dev->state < HNS_ROCE_DEVICE_STATE_RST_DOWN) {
|
||||
ibdev_err_ratelimited(&hr_dev->ib_dev,
|
||||
"failed to modify qp during drain rq, ret = %d.\n",
|
||||
ret);
|
||||
return;
|
||||
}
|
||||
|
||||
rwr.wr_cqe = &rdrain.cqe;
|
||||
rdrain.cqe.done = hns_roce_drain_qp_done;
|
||||
init_completion(&rdrain.done);
|
||||
|
||||
if (hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)
|
||||
ret = hns_roce_push_drain_wr(&hr_qp->rq, cq, rwr.wr_id);
|
||||
else
|
||||
ret = hns_roce_v2_post_recv(ibqp, &rwr, &bad_rwr);
|
||||
if (ret) {
|
||||
ibdev_err_ratelimited(&hr_dev->ib_dev,
|
||||
"failed to post recv for drain rq, ret = %d.\n",
|
||||
ret);
|
||||
return;
|
||||
}
|
||||
|
||||
handle_drain_completion(cq, &rdrain, hr_dev);
|
||||
}
|
||||
|
||||
static void hns_roce_v2_drain_sq(struct ib_qp *ibqp)
|
||||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
|
||||
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
|
||||
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
|
||||
struct hns_roce_drain_cqe sdrain = {};
|
||||
const struct ib_send_wr *bad_swr;
|
||||
struct ib_cq *cq = ibqp->send_cq;
|
||||
struct ib_rdma_wr swr = {
|
||||
.wr = {
|
||||
.next = NULL,
|
||||
{ .wr_cqe = &sdrain.cqe, },
|
||||
.opcode = IB_WR_RDMA_WRITE,
|
||||
},
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = ib_modify_qp(ibqp, &attr, IB_QP_STATE);
|
||||
if (ret && hr_dev->state < HNS_ROCE_DEVICE_STATE_RST_DOWN) {
|
||||
ibdev_err_ratelimited(&hr_dev->ib_dev,
|
||||
"failed to modify qp during drain sq, ret = %d.\n",
|
||||
ret);
|
||||
return;
|
||||
}
|
||||
|
||||
sdrain.cqe.done = hns_roce_drain_qp_done;
|
||||
init_completion(&sdrain.done);
|
||||
|
||||
if (hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)
|
||||
ret = hns_roce_push_drain_wr(&hr_qp->sq, cq, swr.wr.wr_id);
|
||||
else
|
||||
ret = hns_roce_v2_post_send(ibqp, &swr.wr, &bad_swr);
|
||||
if (ret) {
|
||||
ibdev_err_ratelimited(&hr_dev->ib_dev,
|
||||
"failed to post send for drain sq, ret = %d.\n",
|
||||
ret);
|
||||
return;
|
||||
}
|
||||
|
||||
handle_drain_completion(cq, &sdrain, hr_dev);
|
||||
}
|
||||
|
||||
static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
|
||||
{
|
||||
return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
|
||||
|
|
@ -3739,6 +3903,23 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
|
|||
HNS_ROCE_V2_CQ_DEFAULT_INTERVAL);
|
||||
}
|
||||
|
||||
static bool left_sw_wc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
|
||||
{
|
||||
struct hns_roce_qp *hr_qp;
|
||||
|
||||
list_for_each_entry(hr_qp, &hr_cq->sq_list, sq_node) {
|
||||
if (hr_qp->sq.head != hr_qp->sq.tail)
|
||||
return true;
|
||||
}
|
||||
|
||||
list_for_each_entry(hr_qp, &hr_cq->rq_list, rq_node) {
|
||||
if (hr_qp->rq.head != hr_qp->rq.tail)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
|
||||
enum ib_cq_notify_flags flags)
|
||||
{
|
||||
|
|
@ -3747,6 +3928,12 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
|
|||
struct hns_roce_v2_db cq_db = {};
|
||||
u32 notify_flag;
|
||||
|
||||
if (hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN) {
|
||||
if ((flags & IB_CQ_REPORT_MISSED_EVENTS) &&
|
||||
left_sw_wc(hr_dev, hr_cq))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* flags = 0, then notify_flag : next
|
||||
* flags = 1, then notify flag : solocited
|
||||
|
|
@ -5053,20 +5240,22 @@ static int hns_roce_set_sl(struct ib_qp *ibqp,
|
|||
struct ib_device *ibdev = &hr_dev->ib_dev;
|
||||
int ret;
|
||||
|
||||
ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh),
|
||||
&hr_qp->tc_mode, &hr_qp->priority);
|
||||
if (ret && ret != -EOPNOTSUPP &&
|
||||
grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
|
||||
ibdev_err_ratelimited(ibdev,
|
||||
"failed to get dscp, ret = %d.\n", ret);
|
||||
return ret;
|
||||
}
|
||||
hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
|
||||
|
||||
if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
|
||||
grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
|
||||
hr_qp->sl = hr_qp->priority;
|
||||
else
|
||||
hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
|
||||
if (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
|
||||
ret = hns_roce_hw_v2_get_dscp(hr_dev,
|
||||
get_tclass(&attr->ah_attr.grh),
|
||||
&hr_qp->tc_mode, &hr_qp->priority);
|
||||
if (ret && ret != -EOPNOTSUPP) {
|
||||
ibdev_err_ratelimited(ibdev,
|
||||
"failed to get dscp, ret = %d.\n",
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP)
|
||||
hr_qp->sl = hr_qp->priority;
|
||||
}
|
||||
|
||||
if (!check_sl_valid(hr_dev, hr_qp->sl))
|
||||
return -EINVAL;
|
||||
|
|
@ -6956,7 +7145,8 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
|
|||
|
||||
INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work);
|
||||
|
||||
hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
|
||||
hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq",
|
||||
WQ_MEM_RECLAIM);
|
||||
if (!hr_dev->irq_workq) {
|
||||
dev_err(dev, "failed to create irq workqueue.\n");
|
||||
ret = -ENOMEM;
|
||||
|
|
@ -7014,6 +7204,8 @@ static const struct ib_device_ops hns_roce_v2_dev_ops = {
|
|||
.post_send = hns_roce_v2_post_send,
|
||||
.query_qp = hns_roce_v2_query_qp,
|
||||
.req_notify_cq = hns_roce_v2_req_notify_cq,
|
||||
.drain_rq = hns_roce_v2_drain_rq,
|
||||
.drain_sq = hns_roce_v2_drain_sq,
|
||||
};
|
||||
|
||||
static const struct ib_device_ops hns_roce_v2_dev_srq_ops = {
|
||||
|
|
|
|||
|
|
@ -259,6 +259,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
|
|||
props->max_srq_sge = hr_dev->caps.max_srq_sges;
|
||||
}
|
||||
|
||||
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK) {
|
||||
props->max_cq >>= 1;
|
||||
props->max_qp >>= 1;
|
||||
}
|
||||
|
||||
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR &&
|
||||
hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
|
||||
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
|
||||
|
|
|
|||
|
|
@ -197,22 +197,16 @@ static u8 get_affinity_cq_bank(u8 qp_bank)
|
|||
return (qp_bank >> 1) & CQ_BANKID_MASK;
|
||||
}
|
||||
|
||||
static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr,
|
||||
struct hns_roce_bank *bank)
|
||||
static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank, u8 valid_qp_bank_mask)
|
||||
{
|
||||
#define INVALID_LOAD_QPNUM 0xFFFFFFFF
|
||||
struct ib_cq *scq = init_attr->send_cq;
|
||||
u32 least_load = INVALID_LOAD_QPNUM;
|
||||
unsigned long cqn = 0;
|
||||
u8 bankid = 0;
|
||||
u32 bankcnt;
|
||||
u8 i;
|
||||
|
||||
if (scq)
|
||||
cqn = to_hr_cq(scq)->cqn;
|
||||
|
||||
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
|
||||
if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
|
||||
if (!(valid_qp_bank_mask & BIT(i)))
|
||||
continue;
|
||||
|
||||
bankcnt = bank[i].inuse;
|
||||
|
|
@ -246,6 +240,42 @@ static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool use_ext_sge(struct ib_qp_init_attr *init_attr)
|
||||
{
|
||||
return init_attr->cap.max_send_sge > HNS_ROCE_SGE_IN_WQE ||
|
||||
init_attr->qp_type == IB_QPT_UD ||
|
||||
init_attr->qp_type == IB_QPT_GSI;
|
||||
}
|
||||
|
||||
static u8 select_qp_bankid(struct hns_roce_dev *hr_dev,
|
||||
struct ib_qp_init_attr *init_attr)
|
||||
{
|
||||
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
|
||||
struct hns_roce_bank *bank = qp_table->bank;
|
||||
struct ib_cq *scq = init_attr->send_cq;
|
||||
u8 valid_qp_bank_mask = 0;
|
||||
unsigned long cqn = 0;
|
||||
u8 i;
|
||||
|
||||
if (scq)
|
||||
cqn = to_hr_cq(scq)->cqn;
|
||||
|
||||
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
|
||||
if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
|
||||
continue;
|
||||
|
||||
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK) &&
|
||||
use_ext_sge(init_attr) &&
|
||||
!(VALID_EXT_SGE_QP_BANK_MASK_LIMIT & BIT(i)))
|
||||
continue;
|
||||
|
||||
valid_qp_bank_mask |= BIT(i);
|
||||
}
|
||||
|
||||
return get_least_load_bankid_for_qp(bank, valid_qp_bank_mask);
|
||||
}
|
||||
|
||||
static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
|
||||
struct ib_qp_init_attr *init_attr)
|
||||
{
|
||||
|
|
@ -258,8 +288,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
|
|||
num = 1;
|
||||
} else {
|
||||
mutex_lock(&qp_table->bank_mutex);
|
||||
bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank);
|
||||
|
||||
bankid = select_qp_bankid(hr_dev, init_attr);
|
||||
ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
|
||||
&num);
|
||||
if (ret) {
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
|
|||
|
||||
ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
|
||||
ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
|
|||
|
||||
ret = hr_dev->hw->query_mpt(hr_dev, hr_mr->key, &context);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
|
||||
ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
|
||||
|
||||
|
|
|
|||
|
|
@ -2886,15 +2886,6 @@ static int irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* irdma_sc_cq_ack - acknowledge completion q
|
||||
* @cq: cq struct
|
||||
*/
|
||||
static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq)
|
||||
{
|
||||
writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db);
|
||||
}
|
||||
|
||||
/**
|
||||
* irdma_sc_cq_init - initialize completion q
|
||||
* @cq: cq struct
|
||||
|
|
@ -2956,7 +2947,7 @@ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch,
|
|||
return -ENOMEM;
|
||||
|
||||
set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
|
||||
set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
|
||||
set_64bit_val(wqe, 8, cq->cq_uk.cq_id);
|
||||
set_64bit_val(wqe, 16,
|
||||
FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, cq->shadow_read_threshold));
|
||||
set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
|
||||
|
|
@ -3013,7 +3004,7 @@ int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq)
|
|||
return -ENOMEM;
|
||||
|
||||
set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
|
||||
set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
|
||||
set_64bit_val(wqe, 8, cq->cq_uk.cq_id);
|
||||
set_64bit_val(wqe, 40, cq->shadow_area_pa);
|
||||
set_64bit_val(wqe, 48,
|
||||
(cq->virtual_map ? cq->first_pm_pbl_idx : 0));
|
||||
|
|
@ -3082,7 +3073,7 @@ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq,
|
|||
return -ENOMEM;
|
||||
|
||||
set_64bit_val(wqe, 0, info->cq_size);
|
||||
set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
|
||||
set_64bit_val(wqe, 8, cq->cq_uk.cq_id);
|
||||
set_64bit_val(wqe, 16,
|
||||
FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, info->shadow_read_threshold));
|
||||
set_64bit_val(wqe, 32, info->cq_pa);
|
||||
|
|
@ -3887,8 +3878,6 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
|
|||
set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
|
||||
spin_unlock_irqrestore(&ccq->dev->cqp_lock, flags);
|
||||
|
||||
dma_wmb(); /* make sure shadow area is updated before arming */
|
||||
|
||||
writel(ccq->cq_uk.cq_id, ccq->dev->cq_arm_db);
|
||||
}
|
||||
|
||||
|
|
@ -4460,47 +4449,38 @@ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq)
|
|||
* irdma_sc_process_ceq - process ceq
|
||||
* @dev: sc device struct
|
||||
* @ceq: ceq sc structure
|
||||
* @cq_idx: Pointer to a CQ ID that will be populated.
|
||||
*
|
||||
* It is expected caller serializes this function with cleanup_ceqes()
|
||||
* because these functions manipulate the same ceq
|
||||
*
|
||||
* Return: True if cq_idx has been populated with a CQ ID.
|
||||
*/
|
||||
void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq)
|
||||
bool irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq,
|
||||
u32 *cq_idx)
|
||||
{
|
||||
u64 temp;
|
||||
__le64 *ceqe;
|
||||
struct irdma_sc_cq *cq = NULL;
|
||||
struct irdma_sc_cq *temp_cq;
|
||||
u8 polarity;
|
||||
u32 cq_idx;
|
||||
|
||||
do {
|
||||
cq_idx = 0;
|
||||
ceqe = IRDMA_GET_CURRENT_CEQ_ELEM(ceq);
|
||||
get_64bit_val(ceqe, 0, &temp);
|
||||
polarity = (u8)FIELD_GET(IRDMA_CEQE_VALID, temp);
|
||||
if (polarity != ceq->polarity)
|
||||
return NULL;
|
||||
return false;
|
||||
|
||||
temp_cq = (struct irdma_sc_cq *)(unsigned long)(temp << 1);
|
||||
if (!temp_cq) {
|
||||
cq_idx = IRDMA_INVALID_CQ_IDX;
|
||||
IRDMA_RING_MOVE_TAIL(ceq->ceq_ring);
|
||||
|
||||
if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring))
|
||||
ceq->polarity ^= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
cq = temp_cq;
|
||||
/* Truncate. Discard valid bit which is MSb of temp. */
|
||||
*cq_idx = temp;
|
||||
if (*cq_idx >= dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt)
|
||||
*cq_idx = IRDMA_INVALID_CQ_IDX;
|
||||
|
||||
IRDMA_RING_MOVE_TAIL(ceq->ceq_ring);
|
||||
if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring))
|
||||
ceq->polarity ^= 1;
|
||||
} while (cq_idx == IRDMA_INVALID_CQ_IDX);
|
||||
} while (*cq_idx == IRDMA_INVALID_CQ_IDX);
|
||||
|
||||
if (cq)
|
||||
irdma_sc_cq_ack(cq);
|
||||
return cq;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -4514,10 +4494,10 @@ void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq)
|
|||
*/
|
||||
void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq)
|
||||
{
|
||||
struct irdma_sc_cq *next_cq;
|
||||
u8 ceq_polarity = ceq->polarity;
|
||||
__le64 *ceqe;
|
||||
u8 polarity;
|
||||
u32 cq_idx;
|
||||
u64 temp;
|
||||
int next;
|
||||
u32 i;
|
||||
|
|
@ -4532,9 +4512,10 @@ void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq)
|
|||
if (polarity != ceq_polarity)
|
||||
return;
|
||||
|
||||
next_cq = (struct irdma_sc_cq *)(unsigned long)(temp << 1);
|
||||
if (cq == next_cq)
|
||||
set_64bit_val(ceqe, 0, temp & IRDMA_CEQE_VALID);
|
||||
cq_idx = temp;
|
||||
if (cq_idx == cq->cq_uk.cq_id)
|
||||
set_64bit_val(ceqe, 0, (temp & IRDMA_CEQE_VALID) |
|
||||
IRDMA_INVALID_CQ_IDX);
|
||||
|
||||
next = IRDMA_RING_GET_NEXT_TAIL(ceq->ceq_ring, i);
|
||||
if (!next)
|
||||
|
|
@ -4975,7 +4956,7 @@ int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq)
|
|||
return -ENOMEM;
|
||||
|
||||
set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
|
||||
set_64bit_val(wqe, 8, (uintptr_t)ccq >> 1);
|
||||
set_64bit_val(wqe, 8, ccq->cq_uk.cq_id);
|
||||
set_64bit_val(wqe, 40, ccq->shadow_area_pa);
|
||||
|
||||
hdr = ccq->cq_uk.cq_id |
|
||||
|
|
@ -5788,8 +5769,7 @@ static int cfg_fpm_value_gen_3(struct irdma_sc_dev *dev,
|
|||
bool is_mrte_loc_mem;
|
||||
|
||||
loc_mem_pages = hmc_fpm_misc->loc_mem_pages;
|
||||
is_mrte_loc_mem = hmc_fpm_misc->loc_mem_pages == hmc_fpm_misc->max_sds ?
|
||||
true : false;
|
||||
is_mrte_loc_mem = hmc_fpm_misc->loc_mem_pages == hmc_fpm_misc->max_sds;
|
||||
|
||||
irdma_get_rsrc_mem_config(dev, is_mrte_loc_mem);
|
||||
mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc;
|
||||
|
|
@ -6462,6 +6442,9 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
|
|||
int ret_code = 0;
|
||||
u8 db_size;
|
||||
|
||||
spin_lock_init(&dev->puda_cq_lock);
|
||||
dev->ilq_cq = NULL;
|
||||
dev->ieq_cq = NULL;
|
||||
INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for CQP command backlog */
|
||||
mutex_init(&dev->ws_mutex);
|
||||
dev->hmc_fn_id = info->hmc_fn_id;
|
||||
|
|
|
|||
|
|
@ -98,6 +98,74 @@ static void irdma_puda_ce_handler(struct irdma_pci_f *rf,
|
|||
irdma_sc_ccq_arm(cq);
|
||||
}
|
||||
|
||||
/**
|
||||
* irdma_process_normal_ceqe - Handle a CEQE for a normal CQ.
|
||||
* @rf: RDMA PCI function.
|
||||
* @dev: iWARP device.
|
||||
* @cq_idx: CQ ID. Must be in table bounds.
|
||||
*
|
||||
* Context: Atomic (CEQ lock must be held)
|
||||
*/
|
||||
static void irdma_process_normal_ceqe(struct irdma_pci_f *rf,
|
||||
struct irdma_sc_dev *dev, u32 cq_idx)
|
||||
{
|
||||
/* cq_idx bounds validated in irdma_sc_process_ceq. */
|
||||
struct irdma_cq *icq = READ_ONCE(rf->cq_table[cq_idx]);
|
||||
struct irdma_sc_cq *cq;
|
||||
|
||||
if (unlikely(!icq)) {
|
||||
/* Should not happen since CEQ is scrubbed upon CQ delete. */
|
||||
ibdev_warn_ratelimited(to_ibdev(dev), "Stale CEQE for CQ %u",
|
||||
cq_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
cq = &icq->sc_cq;
|
||||
|
||||
if (unlikely(cq->cq_type != IRDMA_CQ_TYPE_IWARP)) {
|
||||
ibdev_warn_ratelimited(to_ibdev(dev), "Unexpected CQ type %u",
|
||||
cq->cq_type);
|
||||
return;
|
||||
}
|
||||
|
||||
writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db);
|
||||
irdma_iwarp_ce_handler(cq);
|
||||
}
|
||||
|
||||
/**
|
||||
* irdma_process_reserved_ceqe - Handle a CEQE for a reserved CQ.
|
||||
* @rf: RDMA PCI function.
|
||||
* @dev: iWARP device.
|
||||
* @cq_idx: CQ ID.
|
||||
*
|
||||
* Context: Atomic
|
||||
*/
|
||||
static void irdma_process_reserved_ceqe(struct irdma_pci_f *rf,
|
||||
struct irdma_sc_dev *dev, u32 cq_idx)
|
||||
{
|
||||
struct irdma_sc_cq *cq;
|
||||
|
||||
if (cq_idx == IRDMA_RSVD_CQ_ID_CQP) {
|
||||
cq = &rf->ccq.sc_cq;
|
||||
/* CQP CQ lifetime > CEQ. */
|
||||
writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db);
|
||||
queue_work(rf->cqp_cmpl_wq, &rf->cqp_cmpl_work);
|
||||
} else if (cq_idx == IRDMA_RSVD_CQ_ID_ILQ ||
|
||||
cq_idx == IRDMA_RSVD_CQ_ID_IEQ) {
|
||||
scoped_guard(spinlock_irqsave, &dev->puda_cq_lock) {
|
||||
cq = (cq_idx == IRDMA_RSVD_CQ_ID_ILQ) ?
|
||||
dev->ilq_cq : dev->ieq_cq;
|
||||
if (!cq) {
|
||||
ibdev_warn_ratelimited(to_ibdev(dev),
|
||||
"Stale ILQ/IEQ CEQE");
|
||||
return;
|
||||
}
|
||||
writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db);
|
||||
irdma_puda_ce_handler(rf, cq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* irdma_process_ceq - handle ceq for completions
|
||||
* @rf: RDMA PCI function
|
||||
|
|
@ -107,28 +175,28 @@ static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq)
|
|||
{
|
||||
struct irdma_sc_dev *dev = &rf->sc_dev;
|
||||
struct irdma_sc_ceq *sc_ceq;
|
||||
struct irdma_sc_cq *cq;
|
||||
unsigned long flags;
|
||||
u32 cq_idx;
|
||||
|
||||
sc_ceq = &ceq->sc_ceq;
|
||||
do {
|
||||
spin_lock_irqsave(&ceq->ce_lock, flags);
|
||||
cq = irdma_sc_process_ceq(dev, sc_ceq);
|
||||
if (!cq) {
|
||||
|
||||
if (!irdma_sc_process_ceq(dev, sc_ceq, &cq_idx)) {
|
||||
spin_unlock_irqrestore(&ceq->ce_lock, flags);
|
||||
break;
|
||||
}
|
||||
|
||||
if (cq->cq_type == IRDMA_CQ_TYPE_IWARP)
|
||||
irdma_iwarp_ce_handler(cq);
|
||||
/* Normal CQs must be handled while holding CEQ lock. */
|
||||
if (likely(cq_idx > IRDMA_RSVD_CQ_ID_IEQ)) {
|
||||
irdma_process_normal_ceqe(rf, dev, cq_idx);
|
||||
spin_unlock_irqrestore(&ceq->ce_lock, flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&ceq->ce_lock, flags);
|
||||
|
||||
if (cq->cq_type == IRDMA_CQ_TYPE_CQP)
|
||||
queue_work(rf->cqp_cmpl_wq, &rf->cqp_cmpl_work);
|
||||
else if (cq->cq_type == IRDMA_CQ_TYPE_ILQ ||
|
||||
cq->cq_type == IRDMA_CQ_TYPE_IEQ)
|
||||
irdma_puda_ce_handler(rf, cq);
|
||||
irdma_process_reserved_ceqe(rf, dev, cq_idx);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
|
|
@ -1532,8 +1600,8 @@ static int irdma_initialize_ilq(struct irdma_device *iwdev)
|
|||
int status;
|
||||
|
||||
info.type = IRDMA_PUDA_RSRC_TYPE_ILQ;
|
||||
info.cq_id = 1;
|
||||
info.qp_id = 1;
|
||||
info.cq_id = IRDMA_RSVD_CQ_ID_ILQ;
|
||||
info.qp_id = IRDMA_RSVD_QP_ID_GSI_ILQ;
|
||||
info.count = 1;
|
||||
info.pd_id = 1;
|
||||
info.abi_ver = IRDMA_ABI_VER;
|
||||
|
|
@ -1562,7 +1630,7 @@ static int irdma_initialize_ieq(struct irdma_device *iwdev)
|
|||
int status;
|
||||
|
||||
info.type = IRDMA_PUDA_RSRC_TYPE_IEQ;
|
||||
info.cq_id = 2;
|
||||
info.cq_id = IRDMA_RSVD_CQ_ID_IEQ;
|
||||
info.qp_id = iwdev->vsi.exception_lan_q;
|
||||
info.count = 1;
|
||||
info.pd_id = 2;
|
||||
|
|
@ -1868,7 +1936,7 @@ int irdma_rt_init_hw(struct irdma_device *iwdev,
|
|||
vsi_info.pf_data_vsi_num = iwdev->vsi_num;
|
||||
vsi_info.register_qset = rf->gen_ops.register_qset;
|
||||
vsi_info.unregister_qset = rf->gen_ops.unregister_qset;
|
||||
vsi_info.exception_lan_q = 2;
|
||||
vsi_info.exception_lan_q = IRDMA_RSVD_QP_ID_IEQ;
|
||||
irdma_sc_vsi_init(&iwdev->vsi, &vsi_info);
|
||||
|
||||
status = irdma_setup_cm_core(iwdev, rf->rdma_ver);
|
||||
|
|
@ -2099,18 +2167,18 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
|
|||
irdma_set_hw_rsrc(rf);
|
||||
|
||||
set_bit(0, rf->allocated_mrs);
|
||||
set_bit(0, rf->allocated_qps);
|
||||
set_bit(0, rf->allocated_cqs);
|
||||
set_bit(IRDMA_RSVD_QP_ID_0, rf->allocated_qps);
|
||||
set_bit(IRDMA_RSVD_CQ_ID_CQP, rf->allocated_cqs);
|
||||
set_bit(0, rf->allocated_srqs);
|
||||
set_bit(0, rf->allocated_pds);
|
||||
set_bit(0, rf->allocated_arps);
|
||||
set_bit(0, rf->allocated_ahs);
|
||||
set_bit(0, rf->allocated_mcgs);
|
||||
set_bit(2, rf->allocated_qps); /* qp 2 IEQ */
|
||||
set_bit(1, rf->allocated_qps); /* qp 1 ILQ */
|
||||
set_bit(1, rf->allocated_cqs);
|
||||
set_bit(IRDMA_RSVD_QP_ID_IEQ, rf->allocated_qps);
|
||||
set_bit(IRDMA_RSVD_QP_ID_GSI_ILQ, rf->allocated_qps);
|
||||
set_bit(IRDMA_RSVD_CQ_ID_ILQ, rf->allocated_cqs);
|
||||
set_bit(1, rf->allocated_pds);
|
||||
set_bit(2, rf->allocated_cqs);
|
||||
set_bit(IRDMA_RSVD_CQ_ID_IEQ, rf->allocated_cqs);
|
||||
set_bit(2, rf->allocated_pds);
|
||||
|
||||
INIT_LIST_HEAD(&rf->mc_qht_list.list);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include <linux/workqueue.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/iopoll.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/kthread.h>
|
||||
#ifndef CONFIG_64BIT
|
||||
|
|
@ -528,6 +529,7 @@ void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
|
|||
void irdma_srq_event(struct irdma_sc_srq *srq);
|
||||
void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq);
|
||||
void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf);
|
||||
int irdma_get_timeout_threshold(struct irdma_sc_dev *dev);
|
||||
int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp,
|
||||
struct irdma_modify_qp_info *info, bool wait);
|
||||
int irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend);
|
||||
|
|
|
|||
|
|
@ -809,6 +809,13 @@ error:
|
|||
dma_free_coherent(dev->hw->device, rsrc->cqmem.size,
|
||||
rsrc->cqmem.va, rsrc->cqmem.pa);
|
||||
rsrc->cqmem.va = NULL;
|
||||
} else {
|
||||
scoped_guard(spinlock_irqsave, &dev->puda_cq_lock) {
|
||||
if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ)
|
||||
dev->ilq_cq = cq;
|
||||
else
|
||||
dev->ieq_cq = cq;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
|
@ -856,6 +863,13 @@ static void irdma_puda_free_cq(struct irdma_puda_rsrc *rsrc)
|
|||
struct irdma_ccq_cqe_info compl_info;
|
||||
struct irdma_sc_dev *dev = rsrc->dev;
|
||||
|
||||
scoped_guard(spinlock_irqsave, &dev->puda_cq_lock) {
|
||||
if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ)
|
||||
dev->ilq_cq = NULL;
|
||||
else
|
||||
dev->ieq_cq = NULL;
|
||||
}
|
||||
|
||||
if (rsrc->dev->ceq_valid) {
|
||||
irdma_cqp_cq_destroy_cmd(dev, &rsrc->cq);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -239,6 +239,18 @@ enum irdma_queue_type {
|
|||
IRDMA_QUEUE_TYPE_SRQ,
|
||||
};
|
||||
|
||||
enum irdma_rsvd_cq_id {
|
||||
IRDMA_RSVD_CQ_ID_CQP,
|
||||
IRDMA_RSVD_CQ_ID_ILQ,
|
||||
IRDMA_RSVD_CQ_ID_IEQ,
|
||||
};
|
||||
|
||||
enum irdma_rsvd_qp_id {
|
||||
IRDMA_RSVD_QP_ID_0,
|
||||
IRDMA_RSVD_QP_ID_GSI_ILQ,
|
||||
IRDMA_RSVD_QP_ID_IEQ,
|
||||
};
|
||||
|
||||
struct irdma_sc_dev;
|
||||
struct irdma_vsi_pestat;
|
||||
|
||||
|
|
@ -695,6 +707,9 @@ struct irdma_sc_dev {
|
|||
struct irdma_sc_aeq *aeq;
|
||||
struct irdma_sc_ceq *ceq[IRDMA_CEQ_MAX_COUNT];
|
||||
struct irdma_sc_cq *ccq;
|
||||
spinlock_t puda_cq_lock;
|
||||
struct irdma_sc_cq *ilq_cq;
|
||||
struct irdma_sc_cq *ieq_cq;
|
||||
const struct irdma_irq_ops *irq_ops;
|
||||
struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY];
|
||||
struct irdma_hmc_fpm_misc hmc_fpm_misc;
|
||||
|
|
@ -1332,7 +1347,8 @@ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq);
|
|||
int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
|
||||
struct irdma_ceq_init_info *info);
|
||||
void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq);
|
||||
void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq);
|
||||
bool irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq,
|
||||
u32 *cq_idx);
|
||||
|
||||
int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
|
||||
struct irdma_aeq_init_info *info);
|
||||
|
|
|
|||
|
|
@ -114,7 +114,6 @@ void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx)
|
|||
*/
|
||||
void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp)
|
||||
{
|
||||
dma_wmb();
|
||||
writel(qp->qp_id, qp->wqe_alloc_db);
|
||||
}
|
||||
|
||||
|
|
@ -1107,8 +1106,6 @@ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
|
|||
|
||||
set_64bit_val(cq->shadow_area, 32, temp_val);
|
||||
|
||||
dma_wmb(); /* make sure WQE is populated before valid bit is set */
|
||||
|
||||
writel(cq->cq_id, cq->cqe_alloc_db);
|
||||
}
|
||||
|
||||
|
|
@ -1408,8 +1405,7 @@ exit:
|
|||
* from SW for all unprocessed WQEs. For GEN3 and beyond
|
||||
* FW will generate/flush these CQEs so move to the next CQE
|
||||
*/
|
||||
move_cq_head = qp->uk_attrs->hw_rev <= IRDMA_GEN_2 ?
|
||||
false : true;
|
||||
move_cq_head = qp->uk_attrs->hw_rev > IRDMA_GEN_2;
|
||||
}
|
||||
|
||||
if (move_cq_head) {
|
||||
|
|
|
|||
|
|
@ -573,7 +573,7 @@ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf)
|
|||
}
|
||||
}
|
||||
|
||||
static int irdma_get_timeout_threshold(struct irdma_sc_dev *dev)
|
||||
int irdma_get_timeout_threshold(struct irdma_sc_dev *dev)
|
||||
{
|
||||
u16 time_s = dev->vc_caps.cqp_timeout_s;
|
||||
|
||||
|
|
@ -830,7 +830,8 @@ void irdma_cq_rem_ref(struct ib_cq *ibcq)
|
|||
return;
|
||||
}
|
||||
|
||||
iwdev->rf->cq_table[iwcq->cq_num] = NULL;
|
||||
/* May be asynchronously sampled by CEQ ISR without holding tbl lock. */
|
||||
WRITE_ONCE(iwdev->rf->cq_table[iwcq->cq_num], NULL);
|
||||
spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags);
|
||||
complete(&iwcq->free_cq);
|
||||
}
|
||||
|
|
@ -2239,7 +2240,7 @@ void irdma_pble_free_paged_mem(struct irdma_chunk *chunk)
|
|||
chunk->pg_cnt);
|
||||
|
||||
done:
|
||||
kfree(chunk->dmainfo.dmaaddrs);
|
||||
kvfree(chunk->dmainfo.dmaaddrs);
|
||||
chunk->dmainfo.dmaaddrs = NULL;
|
||||
vfree(chunk->vaddr);
|
||||
chunk->vaddr = NULL;
|
||||
|
|
@ -2256,7 +2257,7 @@ int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt)
|
|||
u32 size;
|
||||
void *va;
|
||||
|
||||
chunk->dmainfo.dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
|
||||
chunk->dmainfo.dmaaddrs = kvzalloc(pg_cnt << 3, GFP_KERNEL);
|
||||
if (!chunk->dmainfo.dmaaddrs)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
@ -2277,7 +2278,7 @@ int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt)
|
|||
|
||||
return 0;
|
||||
err:
|
||||
kfree(chunk->dmainfo.dmaaddrs);
|
||||
kvfree(chunk->dmainfo.dmaaddrs);
|
||||
chunk->dmainfo.dmaaddrs = NULL;
|
||||
|
||||
return -ENOMEM;
|
||||
|
|
|
|||
|
|
@ -2669,9 +2669,12 @@ static int irdma_create_cq(struct ib_cq *ibcq,
|
|||
goto cq_destroy;
|
||||
}
|
||||
}
|
||||
rf->cq_table[cq_num] = iwcq;
|
||||
|
||||
init_completion(&iwcq->free_cq);
|
||||
|
||||
/* Populate table entry after CQ is fully created. */
|
||||
smp_store_release(&rf->cq_table[cq_num], iwcq);
|
||||
|
||||
return 0;
|
||||
cq_destroy:
|
||||
irdma_cq_wq_destroy(rf, cq);
|
||||
|
|
@ -5027,15 +5030,15 @@ static int irdma_create_hw_ah(struct irdma_device *iwdev, struct irdma_ah *ah, b
|
|||
}
|
||||
|
||||
if (!sleep) {
|
||||
int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD;
|
||||
const u64 tmout_ms = irdma_get_timeout_threshold(&rf->sc_dev) *
|
||||
CQP_COMPL_WAIT_TIME_MS;
|
||||
|
||||
do {
|
||||
irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
|
||||
mdelay(1);
|
||||
} while (!ah->sc_ah.ah_info.ah_valid && --cnt);
|
||||
|
||||
if (!cnt) {
|
||||
ibdev_dbg(&iwdev->ibdev, "VERBS: CQP create AH timed out");
|
||||
if (poll_timeout_us_atomic(irdma_cqp_ce_handler(rf,
|
||||
&rf->ccq.sc_cq),
|
||||
ah->sc_ah.ah_info.ah_valid, 1,
|
||||
tmout_ms * USEC_PER_MSEC, false)) {
|
||||
ibdev_dbg(&iwdev->ibdev,
|
||||
"VERBS: CQP create AH timed out");
|
||||
err = -ETIMEDOUT;
|
||||
goto err_ah_create;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
|
|||
|
||||
cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
|
||||
cq->cq_handle = INVALID_MANA_HANDLE;
|
||||
is_rnic_cq = mana_ib_is_rnic(mdev);
|
||||
|
||||
if (udata) {
|
||||
if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
|
||||
|
|
@ -35,8 +36,6 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
|
|||
return err;
|
||||
}
|
||||
|
||||
is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
|
||||
|
||||
if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
|
||||
attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
|
||||
ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
|
||||
|
|
@ -55,7 +54,6 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
|
|||
ibucontext);
|
||||
doorbell = mana_ucontext->doorbell;
|
||||
} else {
|
||||
is_rnic_cq = true;
|
||||
if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
|
||||
ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
|
||||
return -EINVAL;
|
||||
|
|
|
|||
|
|
@ -69,6 +69,12 @@ static const struct ib_device_ops mana_ib_device_stats_ops = {
|
|||
.alloc_hw_device_stats = mana_ib_alloc_hw_device_stats,
|
||||
};
|
||||
|
||||
const struct ib_device_ops mana_ib_dev_dm_ops = {
|
||||
.alloc_dm = mana_ib_alloc_dm,
|
||||
.dealloc_dm = mana_ib_dealloc_dm,
|
||||
.reg_dm_mr = mana_ib_reg_dm_mr,
|
||||
};
|
||||
|
||||
static int mana_ib_netdev_event(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
|
|
@ -139,6 +145,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
|
|||
ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
|
||||
if (dev->adapter_caps.feature_flags & MANA_IB_FEATURE_DEV_COUNTERS_SUPPORT)
|
||||
ib_set_device_ops(&dev->ib_dev, &mana_ib_device_stats_ops);
|
||||
ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_dm_ops);
|
||||
|
||||
ret = mana_ib_create_eqs(dev);
|
||||
if (ret) {
|
||||
|
|
|
|||
|
|
@ -131,6 +131,11 @@ struct mana_ib_mr {
|
|||
mana_handle_t mr_handle;
|
||||
};
|
||||
|
||||
struct mana_ib_dm {
|
||||
struct ib_dm ibdm;
|
||||
mana_handle_t dm_handle;
|
||||
};
|
||||
|
||||
struct mana_ib_cq {
|
||||
struct ib_cq ibcq;
|
||||
struct mana_ib_queue queue;
|
||||
|
|
@ -735,4 +740,11 @@ struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 leng
|
|||
u64 iova, int fd, int mr_access_flags,
|
||||
struct ib_dmah *dmah,
|
||||
struct uverbs_attr_bundle *attrs);
|
||||
|
||||
struct ib_dm *mana_ib_alloc_dm(struct ib_device *dev, struct ib_ucontext *context,
|
||||
struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs);
|
||||
int mana_ib_dealloc_dm(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
|
||||
struct ib_mr *mana_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr,
|
||||
struct uverbs_attr_bundle *attrs);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
|
|||
|
||||
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
|
||||
sizeof(resp));
|
||||
req.hdr.req.msg_version = GDMA_MESSAGE_V2;
|
||||
req.pd_handle = mr_params->pd_handle;
|
||||
req.mr_type = mr_params->mr_type;
|
||||
|
||||
|
|
@ -55,6 +56,12 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
|
|||
req.zbva.dma_region_handle = mr_params->zbva.dma_region_handle;
|
||||
req.zbva.access_flags = mr_params->zbva.access_flags;
|
||||
break;
|
||||
case GDMA_MR_TYPE_DM:
|
||||
req.da_ext.length = mr_params->da.length;
|
||||
req.da.dm_handle = mr_params->da.dm_handle;
|
||||
req.da.offset = mr_params->da.offset;
|
||||
req.da.access_flags = mr_params->da.access_flags;
|
||||
break;
|
||||
default:
|
||||
ibdev_dbg(&dev->ib_dev,
|
||||
"invalid param (GDMA_MR_TYPE) passed, type %d\n",
|
||||
|
|
@ -317,3 +324,126 @@ int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mana_ib_gd_alloc_dm(struct mana_ib_dev *mdev, struct mana_ib_dm *dm,
|
||||
struct ib_dm_alloc_attr *attr)
|
||||
{
|
||||
struct gdma_context *gc = mdev_to_gc(mdev);
|
||||
struct gdma_alloc_dm_resp resp = {};
|
||||
struct gdma_alloc_dm_req req = {};
|
||||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOC_DM, sizeof(req), sizeof(resp));
|
||||
req.length = attr->length;
|
||||
req.alignment = attr->alignment;
|
||||
req.flags = attr->flags;
|
||||
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err || resp.hdr.status) {
|
||||
if (!err)
|
||||
err = -EPROTO;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
dm->dm_handle = resp.dm_handle;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ib_dm *mana_ib_alloc_dm(struct ib_device *ibdev,
|
||||
struct ib_ucontext *context,
|
||||
struct ib_dm_alloc_attr *attr,
|
||||
struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_dm *dm;
|
||||
int err;
|
||||
|
||||
dm = kzalloc(sizeof(*dm), GFP_KERNEL);
|
||||
if (!dm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = mana_ib_gd_alloc_dm(dev, dm, attr);
|
||||
if (err)
|
||||
goto err_free;
|
||||
|
||||
return &dm->ibdm;
|
||||
|
||||
err_free:
|
||||
kfree(dm);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int mana_ib_gd_destroy_dm(struct mana_ib_dev *mdev, struct mana_ib_dm *dm)
|
||||
{
|
||||
struct gdma_context *gc = mdev_to_gc(mdev);
|
||||
struct gdma_destroy_dm_resp resp = {};
|
||||
struct gdma_destroy_dm_req req = {};
|
||||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DM, sizeof(req), sizeof(resp));
|
||||
req.dm_handle = dm->dm_handle;
|
||||
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err || resp.hdr.status) {
|
||||
if (!err)
|
||||
err = -EPROTO;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mana_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
struct mana_ib_dev *dev = container_of(ibdm->device, struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_dm *dm = container_of(ibdm, struct mana_ib_dm, ibdm);
|
||||
int err;
|
||||
|
||||
err = mana_ib_gd_destroy_dm(dev, dm);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kfree(dm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ib_mr *mana_ib_reg_dm_mr(struct ib_pd *ibpd, struct ib_dm *ibdm,
|
||||
struct ib_dm_mr_attr *attr,
|
||||
struct uverbs_attr_bundle *attrs)
|
||||
{
|
||||
struct mana_ib_dev *dev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
|
||||
struct mana_ib_dm *mana_dm = container_of(ibdm, struct mana_ib_dm, ibdm);
|
||||
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
|
||||
struct gdma_create_mr_params mr_params = {};
|
||||
struct mana_ib_mr *mr;
|
||||
int err;
|
||||
|
||||
attr->access_flags &= ~IB_ACCESS_OPTIONAL;
|
||||
if (attr->access_flags & ~VALID_MR_FLAGS)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
|
||||
if (!mr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mr_params.pd_handle = pd->pd_handle;
|
||||
mr_params.mr_type = GDMA_MR_TYPE_DM;
|
||||
mr_params.da.dm_handle = mana_dm->dm_handle;
|
||||
mr_params.da.offset = attr->offset;
|
||||
mr_params.da.length = attr->length;
|
||||
mr_params.da.access_flags =
|
||||
mana_ib_verbs_to_gdma_access_flags(attr->access_flags);
|
||||
|
||||
err = mana_ib_gd_create_mr(dev, mr, &mr_params);
|
||||
if (err)
|
||||
goto err_free;
|
||||
|
||||
return &mr->ibmr;
|
||||
|
||||
err_free:
|
||||
kfree(mr);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -561,12 +561,20 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
|
|||
* of an error it will still be zeroed out.
|
||||
* Use native port in case of reps
|
||||
*/
|
||||
if (dev->is_rep)
|
||||
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
|
||||
1, 0);
|
||||
else
|
||||
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
|
||||
mdev_port_num, 0);
|
||||
if (dev->is_rep) {
|
||||
struct mlx5_eswitch_rep *rep;
|
||||
|
||||
rep = dev->port[port_num - 1].rep;
|
||||
if (rep) {
|
||||
mdev = mlx5_eswitch_get_core_dev(rep->esw);
|
||||
WARN_ON(!mdev);
|
||||
}
|
||||
mdev_port_num = 1;
|
||||
}
|
||||
|
||||
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
|
||||
mdev_port_num, 0);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
|
||||
|
|
@ -1581,6 +1589,129 @@ static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int mlx5_ib_query_port_speed_from_port(struct mlx5_ib_dev *dev,
|
||||
u32 port_num, u64 *speed)
|
||||
{
|
||||
struct ib_port_speed_info speed_info;
|
||||
struct ib_port_attr attr = {};
|
||||
int err;
|
||||
|
||||
err = mlx5_ib_query_port(&dev->ib_dev, port_num, &attr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (attr.state == IB_PORT_DOWN) {
|
||||
*speed = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = ib_port_attr_to_speed_info(&attr, &speed_info);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
*speed = speed_info.rate;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mlx5_ib_query_port_speed_from_vport(struct mlx5_core_dev *mdev,
|
||||
u8 op_mod, u16 vport,
|
||||
u8 other_vport, u64 *speed,
|
||||
struct mlx5_ib_dev *dev,
|
||||
u32 port_num)
|
||||
{
|
||||
u32 max_tx_speed;
|
||||
int err;
|
||||
|
||||
err = mlx5_query_vport_max_tx_speed(mdev, op_mod, vport, other_vport,
|
||||
&max_tx_speed);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (max_tx_speed == 0)
|
||||
/* Value 0 indicates field not supported, fallback */
|
||||
return mlx5_ib_query_port_speed_from_port(dev, port_num,
|
||||
speed);
|
||||
|
||||
*speed = max_tx_speed;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mlx5_ib_query_port_speed_from_bond(struct mlx5_ib_dev *dev,
|
||||
u32 port_num, u64 *speed)
|
||||
{
|
||||
struct mlx5_core_dev *mdev = dev->mdev;
|
||||
u32 bond_speed;
|
||||
int err;
|
||||
|
||||
err = mlx5_lag_query_bond_speed(mdev, &bond_speed);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
*speed = bond_speed / MLX5_MAX_TX_SPEED_UNIT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mlx5_ib_query_port_speed_non_rep(struct mlx5_ib_dev *dev,
|
||||
u32 port_num, u64 *speed)
|
||||
{
|
||||
u16 op_mod = MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT;
|
||||
|
||||
if (mlx5_lag_is_roce(dev->mdev))
|
||||
return mlx5_ib_query_port_speed_from_bond(dev, port_num,
|
||||
speed);
|
||||
|
||||
return mlx5_ib_query_port_speed_from_vport(dev->mdev, op_mod, 0, false,
|
||||
speed, dev, port_num);
|
||||
}
|
||||
|
||||
static int mlx5_ib_query_port_speed_rep(struct mlx5_ib_dev *dev, u32 port_num,
|
||||
u64 *speed)
|
||||
{
|
||||
struct mlx5_eswitch_rep *rep;
|
||||
struct mlx5_core_dev *mdev;
|
||||
u16 op_mod;
|
||||
|
||||
if (!dev->port[port_num - 1].rep) {
|
||||
mlx5_ib_warn(dev, "Representor doesn't exist for port %u\n",
|
||||
port_num);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rep = dev->port[port_num - 1].rep;
|
||||
mdev = mlx5_eswitch_get_core_dev(rep->esw);
|
||||
if (!mdev)
|
||||
return -ENODEV;
|
||||
|
||||
if (rep->vport == MLX5_VPORT_UPLINK) {
|
||||
if (mlx5_lag_is_sriov(mdev))
|
||||
return mlx5_ib_query_port_speed_from_bond(dev,
|
||||
port_num,
|
||||
speed);
|
||||
|
||||
return mlx5_ib_query_port_speed_from_port(dev, port_num,
|
||||
speed);
|
||||
}
|
||||
|
||||
op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
|
||||
return mlx5_ib_query_port_speed_from_vport(dev->mdev, op_mod,
|
||||
rep->vport, true, speed, dev,
|
||||
port_num);
|
||||
}
|
||||
|
||||
int mlx5_ib_query_port_speed(struct ib_device *ibdev, u32 port_num, u64 *speed)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(ibdev);
|
||||
|
||||
if (mlx5_ib_port_link_layer(ibdev, port_num) ==
|
||||
IB_LINK_LAYER_INFINIBAND || mlx5_core_mp_enabled(dev->mdev))
|
||||
return mlx5_ib_query_port_speed_from_port(dev, port_num, speed);
|
||||
else if (!dev->is_rep)
|
||||
return mlx5_ib_query_port_speed_non_rep(dev, port_num, speed);
|
||||
else
|
||||
return mlx5_ib_query_port_speed_rep(dev, port_num, speed);
|
||||
}
|
||||
|
||||
static int mlx5_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
|
||||
union ib_gid *gid)
|
||||
{
|
||||
|
|
@ -2323,6 +2454,70 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
|
|||
virt_to_page(dev->mdev->clock_info));
|
||||
}
|
||||
|
||||
static int phys_addr_to_bar(struct pci_dev *pdev, phys_addr_t pa)
|
||||
{
|
||||
resource_size_t start, end;
|
||||
int bar;
|
||||
|
||||
for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
|
||||
/* Skip BARs not present or not memory-mapped */
|
||||
if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
|
||||
continue;
|
||||
|
||||
start = pci_resource_start(pdev, bar);
|
||||
end = pci_resource_end(pdev, bar);
|
||||
|
||||
if (!start || !end)
|
||||
continue;
|
||||
|
||||
if (pa >= start && pa <= end)
|
||||
return bar;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int mlx5_ib_mmap_get_pfns(struct rdma_user_mmap_entry *entry,
|
||||
struct phys_vec *phys_vec,
|
||||
struct p2pdma_provider **provider)
|
||||
{
|
||||
struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
|
||||
struct pci_dev *pdev = to_mdev(entry->ucontext->device)->mdev->pdev;
|
||||
int bar;
|
||||
|
||||
phys_vec->paddr = mentry->address;
|
||||
phys_vec->len = entry->npages * PAGE_SIZE;
|
||||
|
||||
bar = phys_addr_to_bar(pdev, phys_vec->paddr);
|
||||
if (bar < 0)
|
||||
return -EINVAL;
|
||||
|
||||
*provider = pcim_p2pdma_provider(pdev, bar);
|
||||
/* If the kernel was not compiled with CONFIG_PCI_P2PDMA the
|
||||
* functionality is not supported.
|
||||
*/
|
||||
if (!*provider)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct rdma_user_mmap_entry *
|
||||
mlx5_ib_pgoff_to_mmap_entry(struct ib_ucontext *ucontext, off_t pg_off)
|
||||
{
|
||||
unsigned long entry_pgoff;
|
||||
unsigned long idx;
|
||||
u8 command;
|
||||
|
||||
pg_off = pg_off >> PAGE_SHIFT;
|
||||
command = get_command(pg_off);
|
||||
idx = get_extended_index(pg_off);
|
||||
|
||||
entry_pgoff = command << 16 | idx;
|
||||
|
||||
return rdma_user_mmap_entry_get_pgoff(ucontext, entry_pgoff);
|
||||
}
|
||||
|
||||
static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
|
||||
{
|
||||
struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
|
||||
|
|
@ -2838,6 +3033,14 @@ static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
|
|||
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
|
||||
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
|
||||
case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
|
||||
if (ibdev->ib_active) {
|
||||
struct ib_event speed_event = {};
|
||||
|
||||
speed_event.device = &ibdev->ib_dev;
|
||||
speed_event.event = IB_EVENT_DEVICE_SPEED_CHANGE;
|
||||
ib_dispatch_event(&speed_event);
|
||||
}
|
||||
|
||||
/* In RoCE, port up/down events are handled in
|
||||
* mlx5_netdev_event().
|
||||
*/
|
||||
|
|
@ -2878,7 +3081,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
|
|||
container_of(_work, struct mlx5_ib_event_work, work);
|
||||
struct mlx5_ib_dev *ibdev;
|
||||
struct ib_event ibev;
|
||||
bool fatal = false;
|
||||
|
||||
if (work->is_slave) {
|
||||
ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
|
||||
|
|
@ -2889,12 +3091,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
|
|||
}
|
||||
|
||||
switch (work->event) {
|
||||
case MLX5_DEV_EVENT_SYS_ERROR:
|
||||
ibev.event = IB_EVENT_DEVICE_FATAL;
|
||||
mlx5_ib_handle_internal_error(ibdev);
|
||||
ibev.element.port_num = (u8)(unsigned long)work->param;
|
||||
fatal = true;
|
||||
break;
|
||||
case MLX5_EVENT_TYPE_PORT_CHANGE:
|
||||
if (handle_port_change(ibdev, work->param, &ibev))
|
||||
goto out;
|
||||
|
|
@ -2916,8 +3112,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
|
|||
if (ibdev->ib_active)
|
||||
ib_dispatch_event(&ibev);
|
||||
|
||||
if (fatal)
|
||||
ibdev->ib_active = false;
|
||||
out:
|
||||
kfree(work);
|
||||
}
|
||||
|
|
@ -2961,6 +3155,66 @@ static int mlx5_ib_event_slave_port(struct notifier_block *nb,
|
|||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static void mlx5_ib_handle_sys_error_event(struct work_struct *_work)
|
||||
{
|
||||
struct mlx5_ib_event_work *work =
|
||||
container_of(_work, struct mlx5_ib_event_work, work);
|
||||
struct mlx5_ib_dev *ibdev = work->dev;
|
||||
struct ib_event ibev;
|
||||
|
||||
ibev.event = IB_EVENT_DEVICE_FATAL;
|
||||
mlx5_ib_handle_internal_error(ibdev);
|
||||
ibev.element.port_num = (u8)(unsigned long)work->param;
|
||||
ibev.device = &ibdev->ib_dev;
|
||||
|
||||
if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
|
||||
mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ibdev->ib_active)
|
||||
ib_dispatch_event(&ibev);
|
||||
|
||||
ibdev->ib_active = false;
|
||||
out:
|
||||
kfree(work);
|
||||
}
|
||||
|
||||
static int mlx5_ib_sys_error_event(struct notifier_block *nb,
|
||||
unsigned long event, void *param)
|
||||
{
|
||||
struct mlx5_ib_event_work *work;
|
||||
|
||||
if (event != MLX5_DEV_EVENT_SYS_ERROR)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
work = kmalloc(sizeof(*work), GFP_ATOMIC);
|
||||
if (!work)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
INIT_WORK(&work->work, mlx5_ib_handle_sys_error_event);
|
||||
work->dev = container_of(nb, struct mlx5_ib_dev, sys_error_events);
|
||||
work->is_slave = false;
|
||||
work->param = param;
|
||||
work->event = event;
|
||||
|
||||
queue_work(mlx5_ib_event_wq, &work->work);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static int mlx5_ib_stage_sys_error_notifier_init(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
dev->sys_error_events.notifier_call = mlx5_ib_sys_error_event;
|
||||
mlx5_notifier_register(dev->mdev, &dev->sys_error_events);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mlx5_ib_stage_sys_error_notifier_cleanup(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
mlx5_notifier_unregister(dev->mdev, &dev->sys_error_events);
|
||||
}
|
||||
|
||||
static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane)
|
||||
{
|
||||
struct mlx5_hca_vport_context vport_ctx;
|
||||
|
|
@ -4229,7 +4483,13 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
|
|||
if (err)
|
||||
goto err_mp;
|
||||
|
||||
err = pcim_p2pdma_init(mdev->pdev);
|
||||
if (err && err != -EOPNOTSUPP)
|
||||
goto err_dd;
|
||||
|
||||
return 0;
|
||||
err_dd:
|
||||
mlx5_ib_data_direct_cleanup(dev);
|
||||
err_mp:
|
||||
mlx5_ib_cleanup_multiport_master(dev);
|
||||
err:
|
||||
|
|
@ -4281,11 +4541,13 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
|
|||
.map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
|
||||
.mmap = mlx5_ib_mmap,
|
||||
.mmap_free = mlx5_ib_mmap_free,
|
||||
.mmap_get_pfns = mlx5_ib_mmap_get_pfns,
|
||||
.modify_cq = mlx5_ib_modify_cq,
|
||||
.modify_device = mlx5_ib_modify_device,
|
||||
.modify_port = mlx5_ib_modify_port,
|
||||
.modify_qp = mlx5_ib_modify_qp,
|
||||
.modify_srq = mlx5_ib_modify_srq,
|
||||
.pgoff_to_mmap_entry = mlx5_ib_pgoff_to_mmap_entry,
|
||||
.pre_destroy_cq = mlx5_ib_pre_destroy_cq,
|
||||
.poll_cq = mlx5_ib_poll_cq,
|
||||
.post_destroy_cq = mlx5_ib_post_destroy_cq,
|
||||
|
|
@ -4297,6 +4559,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
|
|||
.query_device = mlx5_ib_query_device,
|
||||
.query_gid = mlx5_ib_query_gid,
|
||||
.query_pkey = mlx5_ib_query_pkey,
|
||||
.query_port_speed = mlx5_ib_query_port_speed,
|
||||
.query_qp = mlx5_ib_query_qp,
|
||||
.query_srq = mlx5_ib_query_srq,
|
||||
.query_ucontext = mlx5_ib_query_ucontext,
|
||||
|
|
@ -4466,12 +4729,16 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
|
|||
MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) {
|
||||
err = mlx5_ib_init_ucaps(dev);
|
||||
if (err)
|
||||
return err;
|
||||
goto err_ucaps;
|
||||
}
|
||||
|
||||
dev->ib_dev.use_cq_dim = true;
|
||||
|
||||
return 0;
|
||||
|
||||
err_ucaps:
|
||||
bitmap_free(dev->var_table.bitmap);
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct ib_device_ops mlx5_ib_dev_port_ops = {
|
||||
|
|
@ -4807,6 +5074,9 @@ static const struct mlx5_ib_profile pf_profile = {
|
|||
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
|
||||
mlx5_ib_devx_init,
|
||||
mlx5_ib_devx_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
|
||||
mlx5_ib_stage_sys_error_notifier_init,
|
||||
mlx5_ib_stage_sys_error_notifier_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
|
||||
mlx5_ib_stage_ib_reg_init,
|
||||
mlx5_ib_stage_ib_reg_cleanup),
|
||||
|
|
@ -4864,6 +5134,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
|
|||
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
|
||||
mlx5_ib_devx_init,
|
||||
mlx5_ib_devx_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
|
||||
mlx5_ib_stage_sys_error_notifier_init,
|
||||
mlx5_ib_stage_sys_error_notifier_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
|
||||
mlx5_ib_stage_ib_reg_init,
|
||||
mlx5_ib_stage_ib_reg_cleanup),
|
||||
|
|
|
|||
|
|
@ -1007,6 +1007,7 @@ enum mlx5_ib_stages {
|
|||
MLX5_IB_STAGE_BFREG,
|
||||
MLX5_IB_STAGE_PRE_IB_REG_UMR,
|
||||
MLX5_IB_STAGE_WHITELIST_UID,
|
||||
MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
|
||||
MLX5_IB_STAGE_IB_REG,
|
||||
MLX5_IB_STAGE_DEVICE_NOTIFIER,
|
||||
MLX5_IB_STAGE_POST_IB_REG_UMR,
|
||||
|
|
@ -1165,6 +1166,7 @@ struct mlx5_ib_dev {
|
|||
/* protect accessing data_direct_dev */
|
||||
struct mutex data_direct_lock;
|
||||
struct notifier_block mdev_events;
|
||||
struct notifier_block sys_error_events;
|
||||
struct notifier_block lag_events;
|
||||
int num_ports;
|
||||
/* serialize update of capability mask
|
||||
|
|
@ -1435,6 +1437,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
|
|||
struct ib_port_attr *props);
|
||||
int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
|
||||
struct ib_port_attr *props);
|
||||
int mlx5_ib_query_port_speed(struct ib_device *ibdev, u32 port_num,
|
||||
u64 *speed);
|
||||
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
|
||||
u64 access_flags);
|
||||
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
|
||||
|
|
|
|||
|
|
@ -1646,10 +1646,13 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
|
|||
offset, length, fd,
|
||||
access_flags,
|
||||
&mlx5_ib_dmabuf_attach_ops);
|
||||
else
|
||||
else if (dma_device)
|
||||
umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev,
|
||||
dma_device, offset, length,
|
||||
fd, access_flags);
|
||||
else
|
||||
umem_dmabuf = ib_umem_dmabuf_get_pinned(
|
||||
&dev->ib_dev, offset, length, fd, access_flags);
|
||||
|
||||
if (IS_ERR(umem_dmabuf)) {
|
||||
mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf);
|
||||
|
|
@ -1782,10 +1785,8 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
|
|||
return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr,
|
||||
fd, access_flags);
|
||||
|
||||
return reg_user_mr_dmabuf(pd, pd->device->dma_device,
|
||||
offset, length, virt_addr,
|
||||
fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT,
|
||||
dmah);
|
||||
return reg_user_mr_dmabuf(pd, NULL, offset, length, virt_addr, fd,
|
||||
access_flags, MLX5_MKC_ACCESS_MODE_MTT, dmah);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -4362,6 +4362,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
|
|||
optpar |= ib_mask_to_mlx5_opt(attr_mask);
|
||||
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
|
||||
|
||||
if (attr_mask & IB_QP_RATE_LIMIT && qp->type != IB_QPT_RAW_PACKET) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (qp->type == IB_QPT_RAW_PACKET ||
|
||||
qp->flags & IB_QP_CREATE_SOURCE_QPN) {
|
||||
struct mlx5_modify_raw_qp_param raw_qp_param = {};
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH)(
|
|||
int out_len = uverbs_attr_get_len(attrs,
|
||||
MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH);
|
||||
u32 dev_path_len;
|
||||
char *dev_path;
|
||||
char *dev_path = NULL;
|
||||
int ret;
|
||||
|
||||
c = to_mucontext(ib_uverbs_get_ucontext(attrs));
|
||||
|
|
@ -223,9 +223,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH)(
|
|||
|
||||
ret = uverbs_copy_to(attrs, MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH, dev_path,
|
||||
dev_path_len);
|
||||
kfree(dev_path);
|
||||
|
||||
end:
|
||||
kfree(dev_path);
|
||||
mutex_unlock(&dev->data_direct_lock);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,8 +67,6 @@
|
|||
#define OC_SKH_DEVICE_VF 0x728
|
||||
#define OCRDMA_MAX_AH 512
|
||||
|
||||
#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
|
||||
|
||||
#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo)
|
||||
#define EQ_INTR_PER_SEC_THRSH_HI 150000
|
||||
#define EQ_INTR_PER_SEC_THRSH_LOW 100000
|
||||
|
|
|
|||
|
|
@ -53,11 +53,8 @@
|
|||
DP_NAME(dev) ? DP_NAME(dev) : "", ## __VA_ARGS__)
|
||||
|
||||
#define QEDR_MSG_INIT "INIT"
|
||||
#define QEDR_MSG_MISC "MISC"
|
||||
#define QEDR_MSG_CQ " CQ"
|
||||
#define QEDR_MSG_MR " MR"
|
||||
#define QEDR_MSG_RQ " RQ"
|
||||
#define QEDR_MSG_SQ " SQ"
|
||||
#define QEDR_MSG_QP " QP"
|
||||
#define QEDR_MSG_SRQ " SRQ"
|
||||
#define QEDR_MSG_GSI " GSI"
|
||||
|
|
@ -65,7 +62,6 @@
|
|||
|
||||
#define QEDR_CQ_MAGIC_NUMBER (0x11223344)
|
||||
|
||||
#define FW_PAGE_SIZE (RDMA_RING_PAGE_SIZE)
|
||||
#define FW_PAGE_SHIFT (12)
|
||||
|
||||
struct qedr_dev;
|
||||
|
|
@ -178,24 +174,18 @@ struct qedr_dev {
|
|||
u8 user_dpm_enabled;
|
||||
};
|
||||
|
||||
#define QEDR_MAX_SQ_PBL (0x8000)
|
||||
#define QEDR_MAX_SQ_PBL_ENTRIES (0x10000 / sizeof(void *))
|
||||
#define QEDR_SQE_ELEMENT_SIZE (sizeof(struct rdma_sq_sge))
|
||||
#define QEDR_MAX_SQE_ELEMENTS_PER_SQE (ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE / \
|
||||
QEDR_SQE_ELEMENT_SIZE)
|
||||
#define QEDR_MAX_SQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \
|
||||
QEDR_SQE_ELEMENT_SIZE)
|
||||
#define QEDR_MAX_SQE ((QEDR_MAX_SQ_PBL_ENTRIES) *\
|
||||
(RDMA_RING_PAGE_SIZE) / \
|
||||
(QEDR_SQE_ELEMENT_SIZE) /\
|
||||
(QEDR_MAX_SQE_ELEMENTS_PER_SQE))
|
||||
/* RQ */
|
||||
#define QEDR_MAX_RQ_PBL (0x2000)
|
||||
#define QEDR_MAX_RQ_PBL_ENTRIES (0x10000 / sizeof(void *))
|
||||
#define QEDR_RQE_ELEMENT_SIZE (sizeof(struct rdma_rq_sge))
|
||||
#define QEDR_MAX_RQE_ELEMENTS_PER_RQE (RDMA_MAX_SGE_PER_RQ_WQE)
|
||||
#define QEDR_MAX_RQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \
|
||||
QEDR_RQE_ELEMENT_SIZE)
|
||||
#define QEDR_MAX_RQE ((QEDR_MAX_RQ_PBL_ENTRIES) *\
|
||||
(RDMA_RING_PAGE_SIZE) / \
|
||||
(QEDR_RQE_ELEMENT_SIZE) /\
|
||||
|
|
@ -210,12 +200,8 @@ struct qedr_dev {
|
|||
|
||||
#define QEDR_ROCE_MAX_CNQ_SIZE (0x4000)
|
||||
|
||||
#define QEDR_MAX_PORT (1)
|
||||
#define QEDR_PORT (1)
|
||||
|
||||
#define QEDR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
|
||||
|
||||
#define QEDR_ROCE_PKEY_MAX 1
|
||||
#define QEDR_ROCE_PKEY_TABLE_LEN 1
|
||||
#define QEDR_ROCE_PKEY_DEFAULT 0xffff
|
||||
|
||||
|
|
@ -336,12 +322,6 @@ struct qedr_qp_hwq_info {
|
|||
union db_prod32 iwarp_db2_data;
|
||||
};
|
||||
|
||||
#define QEDR_INC_SW_IDX(p_info, index) \
|
||||
do { \
|
||||
p_info->index = (p_info->index + 1) & \
|
||||
qed_chain_get_capacity(p_info->pbl) \
|
||||
} while (0)
|
||||
|
||||
struct qedr_srq_hwq_info {
|
||||
u32 max_sges;
|
||||
u32 max_wr;
|
||||
|
|
|
|||
|
|
@ -119,12 +119,15 @@ void retransmit_timer(struct timer_list *t)
|
|||
|
||||
rxe_dbg_qp(qp, "retransmit timer fired\n");
|
||||
|
||||
if (!rxe_get(qp))
|
||||
return;
|
||||
spin_lock_irqsave(&qp->state_lock, flags);
|
||||
if (qp->valid) {
|
||||
qp->comp.timeout = 1;
|
||||
rxe_sched_task(&qp->send_task);
|
||||
}
|
||||
spin_unlock_irqrestore(&qp->state_lock, flags);
|
||||
rxe_put(qp);
|
||||
}
|
||||
|
||||
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
|
||||
|
|
|
|||
|
|
@ -72,14 +72,46 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr)
|
|||
mr->ibmr.type = IB_MR_TYPE_DMA;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert iova to page_info index. The page_info stores pages of size
|
||||
* PAGE_SIZE, but MRs can have different page sizes. This function
|
||||
* handles the conversion for all cases:
|
||||
*
|
||||
* 1. mr->page_size > PAGE_SIZE:
|
||||
* The MR's iova may not be aligned to mr->page_size. We use the
|
||||
* aligned base (iova & page_mask) as reference, then calculate
|
||||
* which PAGE_SIZE sub-page the iova falls into.
|
||||
*
|
||||
* 2. mr->page_size <= PAGE_SIZE:
|
||||
* Use simple shift arithmetic since each page_info entry corresponds
|
||||
* to one or more MR pages.
|
||||
*/
|
||||
static unsigned long rxe_mr_iova_to_index(struct rxe_mr *mr, u64 iova)
|
||||
{
|
||||
return (iova >> mr->page_shift) - (mr->ibmr.iova >> mr->page_shift);
|
||||
int idx;
|
||||
|
||||
if (mr_page_size(mr) > PAGE_SIZE)
|
||||
idx = (iova - (mr->ibmr.iova & mr->page_mask)) >> PAGE_SHIFT;
|
||||
else
|
||||
idx = (iova >> mr->page_shift) -
|
||||
(mr->ibmr.iova >> mr->page_shift);
|
||||
|
||||
WARN_ON(idx >= mr->nbuf);
|
||||
return idx;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert iova to offset within the page_info entry.
|
||||
*
|
||||
* For mr_page_size > PAGE_SIZE, the offset is within the system page.
|
||||
* For mr_page_size <= PAGE_SIZE, the offset is within the MR page size.
|
||||
*/
|
||||
static unsigned long rxe_mr_iova_to_page_offset(struct rxe_mr *mr, u64 iova)
|
||||
{
|
||||
return iova & (mr_page_size(mr) - 1);
|
||||
if (mr_page_size(mr) > PAGE_SIZE)
|
||||
return iova & (PAGE_SIZE - 1);
|
||||
else
|
||||
return iova & (mr_page_size(mr) - 1);
|
||||
}
|
||||
|
||||
static bool is_pmem_page(struct page *pg)
|
||||
|
|
@ -93,37 +125,69 @@ static bool is_pmem_page(struct page *pg)
|
|||
|
||||
static int rxe_mr_fill_pages_from_sgt(struct rxe_mr *mr, struct sg_table *sgt)
|
||||
{
|
||||
XA_STATE(xas, &mr->page_list, 0);
|
||||
struct sg_page_iter sg_iter;
|
||||
struct page *page;
|
||||
bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
|
||||
|
||||
WARN_ON(mr_page_size(mr) != PAGE_SIZE);
|
||||
|
||||
__sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
|
||||
if (!__sg_page_iter_next(&sg_iter))
|
||||
return 0;
|
||||
|
||||
do {
|
||||
xas_lock(&xas);
|
||||
while (true) {
|
||||
page = sg_page_iter_page(&sg_iter);
|
||||
while (true) {
|
||||
page = sg_page_iter_page(&sg_iter);
|
||||
|
||||
if (persistent && !is_pmem_page(page)) {
|
||||
rxe_dbg_mr(mr, "Page can't be persistent\n");
|
||||
xas_set_err(&xas, -EINVAL);
|
||||
break;
|
||||
}
|
||||
|
||||
xas_store(&xas, page);
|
||||
if (xas_error(&xas))
|
||||
break;
|
||||
xas_next(&xas);
|
||||
if (!__sg_page_iter_next(&sg_iter))
|
||||
break;
|
||||
if (persistent && !is_pmem_page(page)) {
|
||||
rxe_dbg_mr(mr, "Page can't be persistent\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
xas_unlock(&xas);
|
||||
} while (xas_nomem(&xas, GFP_KERNEL));
|
||||
|
||||
return xas_error(&xas);
|
||||
mr->page_info[mr->nbuf].page = page;
|
||||
mr->page_info[mr->nbuf].offset = 0;
|
||||
mr->nbuf++;
|
||||
|
||||
if (!__sg_page_iter_next(&sg_iter))
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __alloc_mr_page_info(struct rxe_mr *mr, int num_pages)
|
||||
{
|
||||
mr->page_info = kcalloc(num_pages, sizeof(struct rxe_mr_page),
|
||||
GFP_KERNEL);
|
||||
if (!mr->page_info)
|
||||
return -ENOMEM;
|
||||
|
||||
mr->max_allowed_buf = num_pages;
|
||||
mr->nbuf = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int alloc_mr_page_info(struct rxe_mr *mr, int num_pages)
|
||||
{
|
||||
int ret;
|
||||
|
||||
WARN_ON(mr->num_buf);
|
||||
ret = __alloc_mr_page_info(mr, num_pages);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mr->num_buf = num_pages;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_mr_page_info(struct rxe_mr *mr)
|
||||
{
|
||||
if (!mr->page_info)
|
||||
return;
|
||||
|
||||
kfree(mr->page_info);
|
||||
mr->page_info = NULL;
|
||||
}
|
||||
|
||||
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
||||
|
|
@ -134,8 +198,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
|||
|
||||
rxe_mr_init(access, mr);
|
||||
|
||||
xa_init(&mr->page_list);
|
||||
|
||||
umem = ib_umem_get(&rxe->ib_dev, start, length, access);
|
||||
if (IS_ERR(umem)) {
|
||||
rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n",
|
||||
|
|
@ -143,46 +205,24 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
|||
return PTR_ERR(umem);
|
||||
}
|
||||
|
||||
err = alloc_mr_page_info(mr, ib_umem_num_pages(umem));
|
||||
if (err)
|
||||
goto err2;
|
||||
|
||||
err = rxe_mr_fill_pages_from_sgt(mr, &umem->sgt_append.sgt);
|
||||
if (err) {
|
||||
ib_umem_release(umem);
|
||||
return err;
|
||||
}
|
||||
if (err)
|
||||
goto err1;
|
||||
|
||||
mr->umem = umem;
|
||||
mr->ibmr.type = IB_MR_TYPE_USER;
|
||||
mr->state = RXE_MR_STATE_VALID;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
|
||||
{
|
||||
XA_STATE(xas, &mr->page_list, 0);
|
||||
int i = 0;
|
||||
int err;
|
||||
|
||||
xa_init(&mr->page_list);
|
||||
|
||||
do {
|
||||
xas_lock(&xas);
|
||||
while (i != num_buf) {
|
||||
xas_store(&xas, XA_ZERO_ENTRY);
|
||||
if (xas_error(&xas))
|
||||
break;
|
||||
xas_next(&xas);
|
||||
i++;
|
||||
}
|
||||
xas_unlock(&xas);
|
||||
} while (xas_nomem(&xas, GFP_KERNEL));
|
||||
|
||||
err = xas_error(&xas);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mr->num_buf = num_buf;
|
||||
|
||||
return 0;
|
||||
err1:
|
||||
free_mr_page_info(mr);
|
||||
err2:
|
||||
ib_umem_release(umem);
|
||||
return err;
|
||||
}
|
||||
|
||||
int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
|
||||
|
|
@ -192,7 +232,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
|
|||
/* always allow remote access for FMRs */
|
||||
rxe_mr_init(RXE_ACCESS_REMOTE, mr);
|
||||
|
||||
err = rxe_mr_alloc(mr, max_pages);
|
||||
err = alloc_mr_page_info(mr, max_pages);
|
||||
if (err)
|
||||
goto err1;
|
||||
|
||||
|
|
@ -205,26 +245,43 @@ err1:
|
|||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* I) MRs with page_size >= PAGE_SIZE,
|
||||
* Split a large MR page (mr->page_size) into multiple PAGE_SIZE
|
||||
* sub-pages and store them in page_info, offset is always 0.
|
||||
*
|
||||
* Called when mr->page_size > PAGE_SIZE. Each call to rxe_set_page()
|
||||
* represents one mr->page_size region, which we must split into
|
||||
* (mr->page_size >> PAGE_SHIFT) individual pages.
|
||||
*
|
||||
* II) MRs with page_size < PAGE_SIZE,
|
||||
* Save each PAGE_SIZE page and its offset within the system page in page_info.
|
||||
*/
|
||||
static int rxe_set_page(struct ib_mr *ibmr, u64 dma_addr)
|
||||
{
|
||||
struct rxe_mr *mr = to_rmr(ibmr);
|
||||
struct page *page = ib_virt_dma_to_page(dma_addr);
|
||||
bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
|
||||
int err;
|
||||
u32 i, pages_per_mr = mr_page_size(mr) >> PAGE_SHIFT;
|
||||
|
||||
if (persistent && !is_pmem_page(page)) {
|
||||
rxe_dbg_mr(mr, "Page cannot be persistent\n");
|
||||
return -EINVAL;
|
||||
pages_per_mr = MAX(1, pages_per_mr);
|
||||
|
||||
for (i = 0; i < pages_per_mr; i++) {
|
||||
u64 addr = dma_addr + i * PAGE_SIZE;
|
||||
struct page *sub_page = ib_virt_dma_to_page(addr);
|
||||
|
||||
if (unlikely(mr->nbuf >= mr->max_allowed_buf))
|
||||
return -ENOMEM;
|
||||
|
||||
if (persistent && !is_pmem_page(sub_page)) {
|
||||
rxe_dbg_mr(mr, "Page cannot be persistent\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mr->page_info[mr->nbuf].page = sub_page;
|
||||
mr->page_info[mr->nbuf].offset = addr & (PAGE_SIZE - 1);
|
||||
mr->nbuf++;
|
||||
}
|
||||
|
||||
if (unlikely(mr->nbuf == mr->num_buf))
|
||||
return -ENOMEM;
|
||||
|
||||
err = xa_err(xa_store(&mr->page_list, mr->nbuf, page, GFP_KERNEL));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mr->nbuf++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -234,10 +291,34 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
|
|||
struct rxe_mr *mr = to_rmr(ibmr);
|
||||
unsigned int page_size = mr_page_size(mr);
|
||||
|
||||
/*
|
||||
* Ensure page_size and PAGE_SIZE are compatible for mapping.
|
||||
* We require one to be a multiple of the other for correct
|
||||
* iova-to-page conversion.
|
||||
*/
|
||||
if (!IS_ALIGNED(page_size, PAGE_SIZE) &&
|
||||
!IS_ALIGNED(PAGE_SIZE, page_size)) {
|
||||
rxe_dbg_mr(mr, "MR page size %u must be compatible with PAGE_SIZE %lu\n",
|
||||
page_size, PAGE_SIZE);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mr_page_size(mr) > PAGE_SIZE) {
|
||||
/* resize page_info if needed */
|
||||
u32 map_mr_pages = (page_size >> PAGE_SHIFT) * mr->num_buf;
|
||||
|
||||
if (map_mr_pages > mr->max_allowed_buf) {
|
||||
rxe_dbg_mr(mr, "requested pages %u exceed max %u\n",
|
||||
map_mr_pages, mr->max_allowed_buf);
|
||||
free_mr_page_info(mr);
|
||||
if (__alloc_mr_page_info(mr, map_mr_pages))
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
mr->nbuf = 0;
|
||||
mr->page_shift = ilog2(page_size);
|
||||
mr->page_mask = ~((u64)page_size - 1);
|
||||
mr->page_offset = mr->ibmr.iova & (page_size - 1);
|
||||
|
||||
return ib_sg_to_pages(ibmr, sgl, sg_nents, sg_offset, rxe_set_page);
|
||||
}
|
||||
|
|
@ -245,30 +326,30 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
|
|||
static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr,
|
||||
unsigned int length, enum rxe_mr_copy_dir dir)
|
||||
{
|
||||
unsigned int page_offset = rxe_mr_iova_to_page_offset(mr, iova);
|
||||
unsigned long index = rxe_mr_iova_to_index(mr, iova);
|
||||
unsigned int bytes;
|
||||
struct page *page;
|
||||
void *va;
|
||||
u8 *va;
|
||||
|
||||
while (length) {
|
||||
page = xa_load(&mr->page_list, index);
|
||||
if (!page)
|
||||
unsigned long index = rxe_mr_iova_to_index(mr, iova);
|
||||
struct rxe_mr_page *info = &mr->page_info[index];
|
||||
unsigned int page_offset = rxe_mr_iova_to_page_offset(mr, iova);
|
||||
|
||||
if (!info->page)
|
||||
return -EFAULT;
|
||||
|
||||
bytes = min_t(unsigned int, length,
|
||||
mr_page_size(mr) - page_offset);
|
||||
va = kmap_local_page(page);
|
||||
page_offset += info->offset;
|
||||
bytes = min_t(unsigned int, length, PAGE_SIZE - page_offset);
|
||||
va = kmap_local_page(info->page);
|
||||
|
||||
if (dir == RXE_FROM_MR_OBJ)
|
||||
memcpy(addr, va + page_offset, bytes);
|
||||
else
|
||||
memcpy(va + page_offset, addr, bytes);
|
||||
kunmap_local(va);
|
||||
|
||||
page_offset = 0;
|
||||
addr += bytes;
|
||||
iova += bytes;
|
||||
length -= bytes;
|
||||
index++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -426,9 +507,6 @@ err1:
|
|||
|
||||
static int rxe_mr_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
|
||||
{
|
||||
unsigned int page_offset;
|
||||
unsigned long index;
|
||||
struct page *page;
|
||||
unsigned int bytes;
|
||||
int err;
|
||||
u8 *va;
|
||||
|
|
@ -438,15 +516,17 @@ static int rxe_mr_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int leng
|
|||
return err;
|
||||
|
||||
while (length > 0) {
|
||||
index = rxe_mr_iova_to_index(mr, iova);
|
||||
page = xa_load(&mr->page_list, index);
|
||||
page_offset = rxe_mr_iova_to_page_offset(mr, iova);
|
||||
if (!page)
|
||||
return -EFAULT;
|
||||
bytes = min_t(unsigned int, length,
|
||||
mr_page_size(mr) - page_offset);
|
||||
unsigned long index = rxe_mr_iova_to_index(mr, iova);
|
||||
struct rxe_mr_page *info = &mr->page_info[index];
|
||||
unsigned int page_offset = rxe_mr_iova_to_page_offset(mr, iova);
|
||||
|
||||
va = kmap_local_page(page);
|
||||
if (!info->page)
|
||||
return -EFAULT;
|
||||
|
||||
page_offset += info->offset;
|
||||
bytes = min_t(unsigned int, length, PAGE_SIZE - page_offset);
|
||||
|
||||
va = kmap_local_page(info->page);
|
||||
arch_wb_cache_pmem(va + page_offset, bytes);
|
||||
kunmap_local(va);
|
||||
|
||||
|
|
@ -501,6 +581,7 @@ enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
|||
} else {
|
||||
unsigned long index;
|
||||
int err;
|
||||
struct rxe_mr_page *info;
|
||||
|
||||
err = mr_check_range(mr, iova, sizeof(value));
|
||||
if (err) {
|
||||
|
|
@ -509,9 +590,12 @@ enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
|
|||
}
|
||||
page_offset = rxe_mr_iova_to_page_offset(mr, iova);
|
||||
index = rxe_mr_iova_to_index(mr, iova);
|
||||
page = xa_load(&mr->page_list, index);
|
||||
if (!page)
|
||||
info = &mr->page_info[index];
|
||||
if (!info->page)
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
|
||||
page_offset += info->offset;
|
||||
page = info->page;
|
||||
}
|
||||
|
||||
if (unlikely(page_offset & 0x7)) {
|
||||
|
|
@ -550,6 +634,7 @@ enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
|||
} else {
|
||||
unsigned long index;
|
||||
int err;
|
||||
struct rxe_mr_page *info;
|
||||
|
||||
/* See IBA oA19-28 */
|
||||
err = mr_check_range(mr, iova, sizeof(value));
|
||||
|
|
@ -559,9 +644,12 @@ enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
|
|||
}
|
||||
page_offset = rxe_mr_iova_to_page_offset(mr, iova);
|
||||
index = rxe_mr_iova_to_index(mr, iova);
|
||||
page = xa_load(&mr->page_list, index);
|
||||
if (!page)
|
||||
info = &mr->page_info[index];
|
||||
if (!info->page)
|
||||
return RESPST_ERR_RKEY_VIOLATION;
|
||||
|
||||
page_offset += info->offset;
|
||||
page = info->page;
|
||||
}
|
||||
|
||||
/* See IBA A19.4.2 */
|
||||
|
|
@ -725,5 +813,5 @@ void rxe_mr_cleanup(struct rxe_pool_elem *elem)
|
|||
ib_umem_release(mr->umem);
|
||||
|
||||
if (mr->ibmr.type != IB_MR_TYPE_DMA)
|
||||
xa_destroy(&mr->page_list);
|
||||
free_mr_page_info(mr);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -110,7 +110,6 @@ int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
|
|||
mr->access = access_flags;
|
||||
mr->ibmr.length = length;
|
||||
mr->ibmr.iova = iova;
|
||||
mr->page_offset = ib_umem_offset(&umem_odp->umem);
|
||||
|
||||
err = rxe_odp_init_pages(mr);
|
||||
if (err) {
|
||||
|
|
|
|||
|
|
@ -102,6 +102,8 @@ void rnr_nak_timer(struct timer_list *t)
|
|||
|
||||
rxe_dbg_qp(qp, "nak timer fired\n");
|
||||
|
||||
if (!rxe_get(qp))
|
||||
return;
|
||||
spin_lock_irqsave(&qp->state_lock, flags);
|
||||
if (qp->valid) {
|
||||
/* request a send queue retry */
|
||||
|
|
@ -110,6 +112,7 @@ void rnr_nak_timer(struct timer_list *t)
|
|||
rxe_sched_task(&qp->send_task);
|
||||
}
|
||||
spin_unlock_irqrestore(&qp->state_lock, flags);
|
||||
rxe_put(qp);
|
||||
}
|
||||
|
||||
static void req_check_sq_drain_done(struct rxe_qp *qp)
|
||||
|
|
|
|||
|
|
@ -77,9 +77,6 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
|
|||
goto err_free;
|
||||
}
|
||||
|
||||
srq->rq.queue = q;
|
||||
init->attr.max_wr = srq->rq.max_wr;
|
||||
|
||||
if (uresp) {
|
||||
if (copy_to_user(&uresp->srq_num, &srq->srq_num,
|
||||
sizeof(uresp->srq_num))) {
|
||||
|
|
@ -88,6 +85,9 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
|
|||
}
|
||||
}
|
||||
|
||||
srq->rq.queue = q;
|
||||
init->attr.max_wr = srq->rq.max_wr;
|
||||
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
|
|
|
|||
|
|
@ -335,6 +335,11 @@ static inline int rkey_is_mw(u32 rkey)
|
|||
return (index >= RXE_MIN_MW_INDEX) && (index <= RXE_MAX_MW_INDEX);
|
||||
}
|
||||
|
||||
struct rxe_mr_page {
|
||||
struct page *page;
|
||||
unsigned int offset; /* offset in system page */
|
||||
};
|
||||
|
||||
struct rxe_mr {
|
||||
struct rxe_pool_elem elem;
|
||||
struct ib_mr ibmr;
|
||||
|
|
@ -347,14 +352,16 @@ struct rxe_mr {
|
|||
int access;
|
||||
atomic_t num_mw;
|
||||
|
||||
unsigned int page_offset;
|
||||
unsigned int page_shift;
|
||||
u64 page_mask;
|
||||
|
||||
/* size of page_info when mr allocated */
|
||||
u32 num_buf;
|
||||
/* real size of page_info */
|
||||
u32 max_allowed_buf;
|
||||
u32 nbuf;
|
||||
|
||||
struct xarray page_list;
|
||||
struct rxe_mr_page *page_info;
|
||||
};
|
||||
|
||||
static inline unsigned int mr_page_size(struct rxe_mr *mr)
|
||||
|
|
|
|||
|
|
@ -1435,7 +1435,8 @@ int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
|
|||
}
|
||||
if (unlikely(rv != 0 && rv != -EAGAIN)) {
|
||||
if ((srx->state > SIW_GET_HDR ||
|
||||
qp->rx_fpdu->more_ddp_segs) && run_completion)
|
||||
(qp->rx_fpdu && qp->rx_fpdu->more_ddp_segs)) &&
|
||||
run_completion)
|
||||
siw_rdmap_complete(qp, rv);
|
||||
|
||||
siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
|
||||
|
|
|
|||
|
|
@ -439,19 +439,19 @@ int rtrs_clt_create_path_files(struct rtrs_clt_path *clt_path)
|
|||
clt->kobj_paths,
|
||||
"%s", str);
|
||||
if (err) {
|
||||
pr_err("kobject_init_and_add: %d\n", err);
|
||||
pr_err("kobject_init_and_add: %pe\n", ERR_PTR(err));
|
||||
kobject_put(&clt_path->kobj);
|
||||
return err;
|
||||
}
|
||||
err = sysfs_create_group(&clt_path->kobj, &rtrs_clt_path_attr_group);
|
||||
if (err) {
|
||||
pr_err("sysfs_create_group(): %d\n", err);
|
||||
pr_err("sysfs_create_group(): %pe\n", ERR_PTR(err));
|
||||
goto put_kobj;
|
||||
}
|
||||
err = kobject_init_and_add(&clt_path->stats->kobj_stats, &ktype_stats,
|
||||
&clt_path->kobj, "stats");
|
||||
if (err) {
|
||||
pr_err("kobject_init_and_add: %d\n", err);
|
||||
pr_err("kobject_init_and_add: %pe\n", ERR_PTR(err));
|
||||
kobject_put(&clt_path->stats->kobj_stats);
|
||||
goto remove_group;
|
||||
}
|
||||
|
|
@ -459,7 +459,7 @@ int rtrs_clt_create_path_files(struct rtrs_clt_path *clt_path)
|
|||
err = sysfs_create_group(&clt_path->stats->kobj_stats,
|
||||
&rtrs_clt_stats_attr_group);
|
||||
if (err) {
|
||||
pr_err("failed to create stats sysfs group, err: %d\n", err);
|
||||
pr_err("failed to create stats sysfs group, err: %pe\n", ERR_PTR(err));
|
||||
goto put_kobj_stats;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -422,8 +422,8 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
|
|||
refcount_inc(&req->ref);
|
||||
err = rtrs_inv_rkey(req);
|
||||
if (err) {
|
||||
rtrs_err_rl(con->c.path, "Send INV WR key=%#x: %d\n",
|
||||
req->mr->rkey, err);
|
||||
rtrs_err_rl(con->c.path, "Send INV WR key=%#x: %pe\n",
|
||||
req->mr->rkey, ERR_PTR(err));
|
||||
} else if (can_wait) {
|
||||
wait_for_completion(&req->inv_comp);
|
||||
}
|
||||
|
|
@ -443,8 +443,8 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
|
|||
|
||||
if (errno) {
|
||||
rtrs_err_rl(con->c.path,
|
||||
"IO %s request failed: error=%d path=%s [%s:%u] notify=%d\n",
|
||||
req->dir == DMA_TO_DEVICE ? "write" : "read", errno,
|
||||
"IO %s request failed: error=%pe path=%s [%s:%u] notify=%d\n",
|
||||
req->dir == DMA_TO_DEVICE ? "write" : "read", ERR_PTR(errno),
|
||||
kobject_name(&clt_path->kobj), clt_path->hca_name,
|
||||
clt_path->hca_port, notify);
|
||||
}
|
||||
|
|
@ -514,7 +514,7 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc)
|
|||
cqe);
|
||||
err = rtrs_iu_post_recv(&con->c, iu);
|
||||
if (err) {
|
||||
rtrs_err(con->c.path, "post iu failed %d\n", err);
|
||||
rtrs_err(con->c.path, "post iu failed %pe\n", ERR_PTR(err));
|
||||
rtrs_rdma_error_recovery(con);
|
||||
}
|
||||
}
|
||||
|
|
@ -659,8 +659,8 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
|
|||
else
|
||||
err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
|
||||
if (err) {
|
||||
rtrs_err(con->c.path, "rtrs_post_recv_empty(): %d\n",
|
||||
err);
|
||||
rtrs_err(con->c.path, "rtrs_post_recv_empty(): %pe\n",
|
||||
ERR_PTR(err));
|
||||
rtrs_rdma_error_recovery(con);
|
||||
}
|
||||
break;
|
||||
|
|
@ -731,8 +731,8 @@ static int post_recv_path(struct rtrs_clt_path *clt_path)
|
|||
|
||||
err = post_recv_io(to_clt_con(clt_path->s.con[cid]), q_size);
|
||||
if (err) {
|
||||
rtrs_err(clt_path->clt, "post_recv_io(), err: %d\n",
|
||||
err);
|
||||
rtrs_err(clt_path->clt, "post_recv_io(), err: %pe\n",
|
||||
ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
|
@ -1122,8 +1122,8 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
|
|||
ret = rtrs_map_sg_fr(req, count);
|
||||
if (ret < 0) {
|
||||
rtrs_err_rl(s,
|
||||
"Write request failed, failed to map fast reg. data, err: %d\n",
|
||||
ret);
|
||||
"Write request failed, failed to map fast reg. data, err: %pe\n",
|
||||
ERR_PTR(ret));
|
||||
ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist,
|
||||
req->sg_cnt, req->dir);
|
||||
return ret;
|
||||
|
|
@ -1150,9 +1150,9 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
|
|||
imm, wr, NULL);
|
||||
if (ret) {
|
||||
rtrs_err_rl(s,
|
||||
"Write request failed: error=%d path=%s [%s:%u]\n",
|
||||
ret, kobject_name(&clt_path->kobj), clt_path->hca_name,
|
||||
clt_path->hca_port);
|
||||
"Write request failed: error=%pe path=%s [%s:%u]\n",
|
||||
ERR_PTR(ret), kobject_name(&clt_path->kobj),
|
||||
clt_path->hca_name, clt_path->hca_port);
|
||||
if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
|
||||
atomic_dec(&clt_path->stats->inflight);
|
||||
if (req->mr->need_inval) {
|
||||
|
|
@ -1208,8 +1208,8 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
|
|||
ret = rtrs_map_sg_fr(req, count);
|
||||
if (ret < 0) {
|
||||
rtrs_err_rl(s,
|
||||
"Read request failed, failed to map fast reg. data, err: %d\n",
|
||||
ret);
|
||||
"Read request failed, failed to map fast reg. data, err: %pe\n",
|
||||
ERR_PTR(ret));
|
||||
ib_dma_unmap_sg(dev->ib_dev, req->sglist, req->sg_cnt,
|
||||
req->dir);
|
||||
return ret;
|
||||
|
|
@ -1260,9 +1260,9 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
|
|||
req->data_len, imm, wr);
|
||||
if (ret) {
|
||||
rtrs_err_rl(s,
|
||||
"Read request failed: error=%d path=%s [%s:%u]\n",
|
||||
ret, kobject_name(&clt_path->kobj), clt_path->hca_name,
|
||||
clt_path->hca_port);
|
||||
"Read request failed: error=%pe path=%s [%s:%u]\n",
|
||||
ERR_PTR(ret), kobject_name(&clt_path->kobj),
|
||||
clt_path->hca_name, clt_path->hca_port);
|
||||
if (req->mp_policy == MP_POLICY_MIN_INFLIGHT)
|
||||
atomic_dec(&clt_path->stats->inflight);
|
||||
req->mr->need_inval = false;
|
||||
|
|
@ -1359,7 +1359,9 @@ static void free_path_reqs(struct rtrs_clt_path *clt_path)
|
|||
|
||||
static int alloc_path_reqs(struct rtrs_clt_path *clt_path)
|
||||
{
|
||||
struct ib_device *ib_dev = clt_path->s.dev->ib_dev;
|
||||
struct rtrs_clt_io_req *req;
|
||||
enum ib_mr_type mr_type;
|
||||
int i, err = -ENOMEM;
|
||||
|
||||
clt_path->reqs = kcalloc(clt_path->queue_depth,
|
||||
|
|
@ -1368,6 +1370,11 @@ static int alloc_path_reqs(struct rtrs_clt_path *clt_path)
|
|||
if (!clt_path->reqs)
|
||||
return -ENOMEM;
|
||||
|
||||
if (ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
|
||||
mr_type = IB_MR_TYPE_SG_GAPS;
|
||||
else
|
||||
mr_type = IB_MR_TYPE_MEM_REG;
|
||||
|
||||
for (i = 0; i < clt_path->queue_depth; ++i) {
|
||||
req = &clt_path->reqs[i];
|
||||
req->iu = rtrs_iu_alloc(1, clt_path->max_hdr_size, GFP_KERNEL,
|
||||
|
|
@ -1381,8 +1388,7 @@ static int alloc_path_reqs(struct rtrs_clt_path *clt_path)
|
|||
if (!req->sge)
|
||||
goto out;
|
||||
|
||||
req->mr = ib_alloc_mr(clt_path->s.dev->ib_pd,
|
||||
IB_MR_TYPE_MEM_REG,
|
||||
req->mr = ib_alloc_mr(clt_path->s.dev->ib_pd, mr_type,
|
||||
clt_path->max_pages_per_mr);
|
||||
if (IS_ERR(req->mr)) {
|
||||
err = PTR_ERR(req->mr);
|
||||
|
|
@ -1775,12 +1781,12 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
|
|||
err = create_con_cq_qp(con);
|
||||
mutex_unlock(&con->con_mutex);
|
||||
if (err) {
|
||||
rtrs_err(s, "create_con_cq_qp(), err: %d\n", err);
|
||||
rtrs_err(s, "create_con_cq_qp(), err: %pe\n", ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS);
|
||||
if (err)
|
||||
rtrs_err(s, "Resolving route failed, err: %d\n", err);
|
||||
rtrs_err(s, "Resolving route failed, err: %pe\n", ERR_PTR(err));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
@ -1814,7 +1820,7 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con)
|
|||
|
||||
err = rdma_connect_locked(con->c.cm_id, ¶m);
|
||||
if (err)
|
||||
rtrs_err(clt, "rdma_connect_locked(): %d\n", err);
|
||||
rtrs_err(clt, "rdma_connect_locked(): %pe\n", ERR_PTR(err));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
@ -1847,8 +1853,8 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con,
|
|||
}
|
||||
errno = le16_to_cpu(msg->errno);
|
||||
if (errno) {
|
||||
rtrs_err(clt, "Invalid RTRS message: errno %d\n",
|
||||
errno);
|
||||
rtrs_err(clt, "Invalid RTRS message: errno %pe\n",
|
||||
ERR_PTR(errno));
|
||||
return -ECONNRESET;
|
||||
}
|
||||
if (con->c.cid == 0) {
|
||||
|
|
@ -1923,7 +1929,7 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con,
|
|||
struct rtrs_path *s = con->c.path;
|
||||
const struct rtrs_msg_conn_rsp *msg;
|
||||
const char *rej_msg;
|
||||
int status, errno;
|
||||
int status, errno = -ECONNRESET;
|
||||
u8 data_len;
|
||||
|
||||
status = ev->status;
|
||||
|
|
@ -1937,15 +1943,15 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con,
|
|||
"Previous session is still exists on the server, please reconnect later\n");
|
||||
else
|
||||
rtrs_err(s,
|
||||
"Connect rejected: status %d (%s), rtrs errno %d\n",
|
||||
status, rej_msg, errno);
|
||||
"Connect rejected: status %d (%s), rtrs errno %pe\n",
|
||||
status, rej_msg, ERR_PTR(errno));
|
||||
} else {
|
||||
rtrs_err(s,
|
||||
"Connect rejected but with malformed message: status %d (%s)\n",
|
||||
status, rej_msg);
|
||||
}
|
||||
|
||||
return -ECONNRESET;
|
||||
return errno;
|
||||
}
|
||||
|
||||
void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait)
|
||||
|
|
@ -2009,27 +2015,53 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
|
|||
case RDMA_CM_EVENT_UNREACHABLE:
|
||||
case RDMA_CM_EVENT_ADDR_CHANGE:
|
||||
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
|
||||
rtrs_wrn(s, "CM error (CM event: %s, err: %d)\n",
|
||||
rdma_event_msg(ev->event), ev->status);
|
||||
if (ev->status < 0) {
|
||||
rtrs_wrn(s, "CM error (CM event: %s, err: %pe)\n",
|
||||
rdma_event_msg(ev->event), ERR_PTR(ev->status));
|
||||
} else if (ev->status > 0) {
|
||||
rtrs_wrn(s, "CM error (CM event: %s, err: %s)\n",
|
||||
rdma_event_msg(ev->event),
|
||||
rdma_reject_msg(cm_id, ev->status));
|
||||
}
|
||||
cm_err = -ECONNRESET;
|
||||
break;
|
||||
case RDMA_CM_EVENT_ADDR_ERROR:
|
||||
case RDMA_CM_EVENT_ROUTE_ERROR:
|
||||
rtrs_wrn(s, "CM error (CM event: %s, err: %d)\n",
|
||||
rdma_event_msg(ev->event), ev->status);
|
||||
if (ev->status < 0) {
|
||||
rtrs_wrn(s, "CM error (CM event: %s, err: %pe)\n",
|
||||
rdma_event_msg(ev->event),
|
||||
ERR_PTR(ev->status));
|
||||
} else if (ev->status > 0) {
|
||||
rtrs_wrn(s, "CM error (CM event: %s, err: %s)\n",
|
||||
rdma_event_msg(ev->event),
|
||||
rdma_reject_msg(cm_id, ev->status));
|
||||
}
|
||||
cm_err = -EHOSTUNREACH;
|
||||
break;
|
||||
case RDMA_CM_EVENT_DEVICE_REMOVAL:
|
||||
/*
|
||||
* Device removal is a special case. Queue close and return 0.
|
||||
*/
|
||||
rtrs_wrn_rl(s, "CM event: %s, status: %d\n", rdma_event_msg(ev->event),
|
||||
ev->status);
|
||||
if (ev->status < 0) {
|
||||
rtrs_wrn_rl(s, "CM event: %s, status: %pe\n",
|
||||
rdma_event_msg(ev->event),
|
||||
ERR_PTR(ev->status));
|
||||
} else if (ev->status > 0) {
|
||||
rtrs_wrn_rl(s, "CM event: %s, status: %s\n",
|
||||
rdma_event_msg(ev->event),
|
||||
rdma_reject_msg(cm_id, ev->status));
|
||||
}
|
||||
rtrs_clt_close_conns(clt_path, false);
|
||||
return 0;
|
||||
default:
|
||||
rtrs_err(s, "Unexpected RDMA CM error (CM event: %s, err: %d)\n",
|
||||
rdma_event_msg(ev->event), ev->status);
|
||||
if (ev->status < 0) {
|
||||
rtrs_err(s, "Unexpected RDMA CM error (CM event: %s, err: %pe)\n",
|
||||
rdma_event_msg(ev->event), ERR_PTR(ev->status));
|
||||
} else if (ev->status > 0) {
|
||||
rtrs_err(s, "Unexpected RDMA CM error (CM event: %s, err: %s)\n",
|
||||
rdma_event_msg(ev->event),
|
||||
rdma_reject_msg(cm_id, ev->status));
|
||||
}
|
||||
cm_err = -ECONNRESET;
|
||||
break;
|
||||
}
|
||||
|
|
@ -2066,14 +2098,14 @@ static int create_cm(struct rtrs_clt_con *con)
|
|||
/* allow the port to be reused */
|
||||
err = rdma_set_reuseaddr(cm_id, 1);
|
||||
if (err != 0) {
|
||||
rtrs_err(s, "Set address reuse failed, err: %d\n", err);
|
||||
rtrs_err(s, "Set address reuse failed, err: %pe\n", ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr,
|
||||
(struct sockaddr *)&clt_path->s.dst_addr,
|
||||
RTRS_CONNECT_TIMEOUT_MS);
|
||||
if (err) {
|
||||
rtrs_err(s, "Failed to resolve address, err: %d\n", err);
|
||||
rtrs_err(s, "Failed to resolve address, err: %pe\n", ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
/*
|
||||
|
|
@ -2548,7 +2580,7 @@ static int rtrs_send_path_info(struct rtrs_clt_path *clt_path)
|
|||
/* Prepare for getting info response */
|
||||
err = rtrs_iu_post_recv(&usr_con->c, rx_iu);
|
||||
if (err) {
|
||||
rtrs_err(clt_path->clt, "rtrs_iu_post_recv(), err: %d\n", err);
|
||||
rtrs_err(clt_path->clt, "rtrs_iu_post_recv(), err: %pe\n", ERR_PTR(err));
|
||||
goto out;
|
||||
}
|
||||
rx_iu = NULL;
|
||||
|
|
@ -2564,7 +2596,7 @@ static int rtrs_send_path_info(struct rtrs_clt_path *clt_path)
|
|||
/* Send info request */
|
||||
err = rtrs_iu_post_send(&usr_con->c, tx_iu, sizeof(*msg), NULL);
|
||||
if (err) {
|
||||
rtrs_err(clt_path->clt, "rtrs_iu_post_send(), err: %d\n", err);
|
||||
rtrs_err(clt_path->clt, "rtrs_iu_post_send(), err: %pe\n", ERR_PTR(err));
|
||||
goto out;
|
||||
}
|
||||
tx_iu = NULL;
|
||||
|
|
@ -2615,15 +2647,15 @@ static int init_path(struct rtrs_clt_path *clt_path)
|
|||
err = init_conns(clt_path);
|
||||
if (err) {
|
||||
rtrs_err(clt_path->clt,
|
||||
"init_conns() failed: err=%d path=%s [%s:%u]\n", err,
|
||||
str, clt_path->hca_name, clt_path->hca_port);
|
||||
"init_conns() failed: err=%pe path=%s [%s:%u]\n",
|
||||
ERR_PTR(err), str, clt_path->hca_name, clt_path->hca_port);
|
||||
goto out;
|
||||
}
|
||||
err = rtrs_send_path_info(clt_path);
|
||||
if (err) {
|
||||
rtrs_err(clt_path->clt,
|
||||
"rtrs_send_path_info() failed: err=%d path=%s [%s:%u]\n",
|
||||
err, str, clt_path->hca_name, clt_path->hca_port);
|
||||
"rtrs_send_path_info() failed: err=%pe path=%s [%s:%u]\n",
|
||||
ERR_PTR(err), str, clt_path->hca_name, clt_path->hca_port);
|
||||
goto out;
|
||||
}
|
||||
rtrs_clt_path_up(clt_path);
|
||||
|
|
@ -3147,8 +3179,11 @@ close_path:
|
|||
void rtrs_clt_ib_event_handler(struct ib_event_handler *handler,
|
||||
struct ib_event *ibevent)
|
||||
{
|
||||
pr_info("Handling event: %s (%d).\n", ib_event_msg(ibevent->event),
|
||||
ibevent->event);
|
||||
struct ib_device *idev = ibevent->device;
|
||||
u32 port_num = ibevent->element.port_num;
|
||||
|
||||
pr_info("Handling event: %s (%d). HCA name: %s, port num: %u\n",
|
||||
ib_event_msg(ibevent->event), ibevent->event, idev->name, port_num);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,6 @@ struct rtrs_permit {
|
|||
* rtrs_clt_io_req - describes one inflight IO request
|
||||
*/
|
||||
struct rtrs_clt_io_req {
|
||||
struct list_head list;
|
||||
struct rtrs_iu *iu;
|
||||
struct scatterlist *sglist; /* list holding user data */
|
||||
unsigned int sg_cnt;
|
||||
|
|
@ -103,12 +102,10 @@ struct rtrs_clt_io_req {
|
|||
bool in_use;
|
||||
enum rtrs_mp_policy mp_policy;
|
||||
struct rtrs_clt_con *con;
|
||||
struct rtrs_sg_desc *desc;
|
||||
struct ib_sge *sge;
|
||||
struct rtrs_permit *permit;
|
||||
enum dma_data_direction dir;
|
||||
void (*conf)(void *priv, int errno);
|
||||
unsigned long start_jiffies;
|
||||
|
||||
struct ib_mr *mr;
|
||||
struct ib_cqe inv_cqe;
|
||||
|
|
|
|||
|
|
@ -176,14 +176,14 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_path *srv_pat
|
|||
dev_set_uevent_suppress(&srv->dev, true);
|
||||
err = device_add(&srv->dev);
|
||||
if (err) {
|
||||
pr_err("device_add(): %d\n", err);
|
||||
pr_err("device_add(): %pe\n", ERR_PTR(err));
|
||||
put_device(&srv->dev);
|
||||
goto unlock;
|
||||
}
|
||||
srv->kobj_paths = kobject_create_and_add("paths", &srv->dev.kobj);
|
||||
if (!srv->kobj_paths) {
|
||||
err = -ENOMEM;
|
||||
pr_err("kobject_create_and_add(): %d\n", err);
|
||||
pr_err("kobject_create_and_add(): %pe\n", ERR_PTR(err));
|
||||
device_del(&srv->dev);
|
||||
put_device(&srv->dev);
|
||||
goto unlock;
|
||||
|
|
@ -237,14 +237,14 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_path *srv_path)
|
|||
err = kobject_init_and_add(&srv_path->stats->kobj_stats, &ktype_stats,
|
||||
&srv_path->kobj, "stats");
|
||||
if (err) {
|
||||
rtrs_err(s, "kobject_init_and_add(): %d\n", err);
|
||||
rtrs_err(s, "kobject_init_and_add(): %pe\n", ERR_PTR(err));
|
||||
kobject_put(&srv_path->stats->kobj_stats);
|
||||
return err;
|
||||
}
|
||||
err = sysfs_create_group(&srv_path->stats->kobj_stats,
|
||||
&rtrs_srv_stats_attr_group);
|
||||
if (err) {
|
||||
rtrs_err(s, "sysfs_create_group(): %d\n", err);
|
||||
rtrs_err(s, "sysfs_create_group(): %pe\n", ERR_PTR(err));
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
|
@ -276,12 +276,12 @@ int rtrs_srv_create_path_files(struct rtrs_srv_path *srv_path)
|
|||
err = kobject_init_and_add(&srv_path->kobj, &ktype, srv->kobj_paths,
|
||||
"%s", str);
|
||||
if (err) {
|
||||
rtrs_err(s, "kobject_init_and_add(): %d\n", err);
|
||||
rtrs_err(s, "kobject_init_and_add(): %pe\n", ERR_PTR(err));
|
||||
goto destroy_root;
|
||||
}
|
||||
err = sysfs_create_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
|
||||
if (err) {
|
||||
rtrs_err(s, "sysfs_create_group(): %d\n", err);
|
||||
rtrs_err(s, "sysfs_create_group(): %pe\n", ERR_PTR(err));
|
||||
goto put_kobj;
|
||||
}
|
||||
err = rtrs_srv_create_stats_files(srv_path);
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc)
|
|||
struct rtrs_srv_path *srv_path = to_srv_path(s);
|
||||
|
||||
if (wc->status != IB_WC_SUCCESS) {
|
||||
rtrs_err(s, "REG MR failed: %s\n",
|
||||
rtrs_err_rl(s, "REG MR failed: %s\n",
|
||||
ib_wc_status_msg(wc->status));
|
||||
close_path(srv_path);
|
||||
return;
|
||||
|
|
@ -208,7 +208,6 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
|
|||
size_t sg_cnt;
|
||||
int err, offset;
|
||||
bool need_inval;
|
||||
u32 rkey = 0;
|
||||
struct ib_reg_wr rwr;
|
||||
struct ib_sge *plist;
|
||||
struct ib_sge list;
|
||||
|
|
@ -240,11 +239,6 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
|
|||
wr->wr.num_sge = 1;
|
||||
wr->remote_addr = le64_to_cpu(id->rd_msg->desc[0].addr);
|
||||
wr->rkey = le32_to_cpu(id->rd_msg->desc[0].key);
|
||||
if (rkey == 0)
|
||||
rkey = wr->rkey;
|
||||
else
|
||||
/* Only one key is actually used */
|
||||
WARN_ON_ONCE(rkey != wr->rkey);
|
||||
|
||||
wr->wr.opcode = IB_WR_RDMA_WRITE;
|
||||
wr->wr.wr_cqe = &io_comp_cqe;
|
||||
|
|
@ -277,7 +271,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
|
|||
inv_wr.opcode = IB_WR_SEND_WITH_INV;
|
||||
inv_wr.wr_cqe = &io_comp_cqe;
|
||||
inv_wr.send_flags = 0;
|
||||
inv_wr.ex.invalidate_rkey = rkey;
|
||||
inv_wr.ex.invalidate_rkey = wr->rkey;
|
||||
}
|
||||
|
||||
imm_wr.wr.next = NULL;
|
||||
|
|
@ -323,8 +317,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
|
|||
err = ib_post_send(id->con->c.qp, &id->tx_wr.wr, NULL);
|
||||
if (err)
|
||||
rtrs_err(s,
|
||||
"Posting RDMA-Write-Request to QP failed, err: %d\n",
|
||||
err);
|
||||
"Posting RDMA-Write-Request to QP failed, err: %pe\n",
|
||||
ERR_PTR(err));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
@ -440,8 +434,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
|
|||
|
||||
err = ib_post_send(id->con->c.qp, wr, NULL);
|
||||
if (err)
|
||||
rtrs_err_rl(s, "Posting RDMA-Reply to QP failed, err: %d\n",
|
||||
err);
|
||||
rtrs_err_rl(s, "Posting RDMA-Reply to QP failed, err: %pe\n",
|
||||
ERR_PTR(err));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
@ -525,8 +519,8 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status)
|
|||
err = rdma_write_sg(id);
|
||||
|
||||
if (err) {
|
||||
rtrs_err_rl(s, "IO response failed: %d: srv_path=%s\n", err,
|
||||
kobject_name(&srv_path->kobj));
|
||||
rtrs_err_rl(s, "IO response failed: %pe: srv_path=%s\n",
|
||||
ERR_PTR(err), kobject_name(&srv_path->kobj));
|
||||
close_path(srv_path);
|
||||
}
|
||||
out:
|
||||
|
|
@ -568,13 +562,15 @@ static void unmap_cont_bufs(struct rtrs_srv_path *srv_path)
|
|||
|
||||
static int map_cont_bufs(struct rtrs_srv_path *srv_path)
|
||||
{
|
||||
struct ib_device *ib_dev = srv_path->s.dev->ib_dev;
|
||||
struct rtrs_srv_sess *srv = srv_path->srv;
|
||||
struct rtrs_path *ss = &srv_path->s;
|
||||
int i, err, mrs_num;
|
||||
unsigned int chunk_bits;
|
||||
enum ib_mr_type mr_type;
|
||||
int chunks_per_mr = 1;
|
||||
struct ib_mr *mr;
|
||||
struct sg_table *sgt;
|
||||
struct ib_mr *mr;
|
||||
|
||||
/*
|
||||
* Here we map queue_depth chunks to MR. Firstly we have to
|
||||
|
|
@ -601,7 +597,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
|
|||
srv_path->mrs_num++) {
|
||||
struct rtrs_srv_mr *srv_mr = &srv_path->mrs[srv_path->mrs_num];
|
||||
struct scatterlist *s;
|
||||
int nr, nr_sgt, chunks;
|
||||
int nr, nr_sgt, chunks, ind;
|
||||
|
||||
sgt = &srv_mr->sgt;
|
||||
chunks = chunks_per_mr * srv_path->mrs_num;
|
||||
|
|
@ -623,15 +619,20 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
|
|||
err = -EINVAL;
|
||||
goto free_sg;
|
||||
}
|
||||
mr = ib_alloc_mr(srv_path->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
|
||||
nr_sgt);
|
||||
|
||||
if (ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
|
||||
mr_type = IB_MR_TYPE_SG_GAPS;
|
||||
else
|
||||
mr_type = IB_MR_TYPE_MEM_REG;
|
||||
|
||||
mr = ib_alloc_mr(srv_path->s.dev->ib_pd, mr_type, nr_sgt);
|
||||
if (IS_ERR(mr)) {
|
||||
err = PTR_ERR(mr);
|
||||
goto unmap_sg;
|
||||
}
|
||||
nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
|
||||
NULL, max_chunk_size);
|
||||
if (nr != nr_sgt) {
|
||||
if (nr < nr_sgt) {
|
||||
err = nr < 0 ? nr : -EINVAL;
|
||||
goto dereg_mr;
|
||||
}
|
||||
|
|
@ -643,13 +644,28 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
|
|||
DMA_TO_DEVICE, rtrs_srv_rdma_done);
|
||||
if (!srv_mr->iu) {
|
||||
err = -ENOMEM;
|
||||
rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err);
|
||||
rtrs_err(ss, "rtrs_iu_alloc(), err: %pe\n", ERR_PTR(err));
|
||||
goto dereg_mr;
|
||||
}
|
||||
}
|
||||
/* Eventually dma addr for each chunk can be cached */
|
||||
for_each_sg(sgt->sgl, s, nr_sgt, i)
|
||||
srv_path->dma_addr[chunks + i] = sg_dma_address(s);
|
||||
|
||||
/*
|
||||
* Cache DMA addresses by traversing sg entries. If
|
||||
* regions were merged, an inner loop is required to
|
||||
* populate the DMA address array by traversing larger
|
||||
* regions.
|
||||
*/
|
||||
ind = chunks;
|
||||
for_each_sg(sgt->sgl, s, nr_sgt, i) {
|
||||
unsigned int dma_len = sg_dma_len(s);
|
||||
u64 dma_addr = sg_dma_address(s);
|
||||
u64 dma_addr_end = dma_addr + dma_len;
|
||||
|
||||
do {
|
||||
srv_path->dma_addr[ind++] = dma_addr;
|
||||
dma_addr += max_chunk_size;
|
||||
} while (dma_addr < dma_addr_end);
|
||||
}
|
||||
|
||||
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
|
||||
srv_mr->mr = mr;
|
||||
|
|
@ -804,7 +820,7 @@ static int process_info_req(struct rtrs_srv_con *con,
|
|||
|
||||
err = post_recv_path(srv_path);
|
||||
if (err) {
|
||||
rtrs_err(s, "post_recv_path(), err: %d\n", err);
|
||||
rtrs_err(s, "post_recv_path(), err: %pe\n", ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
@ -867,7 +883,7 @@ static int process_info_req(struct rtrs_srv_con *con,
|
|||
get_device(&srv_path->srv->dev);
|
||||
err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
|
||||
if (!err) {
|
||||
rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err);
|
||||
rtrs_err(s, "rtrs_srv_change_state() failed\n");
|
||||
goto iu_free;
|
||||
}
|
||||
|
||||
|
|
@ -881,7 +897,7 @@ static int process_info_req(struct rtrs_srv_con *con,
|
|||
*/
|
||||
err = rtrs_srv_path_up(srv_path);
|
||||
if (err) {
|
||||
rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err);
|
||||
rtrs_err(s, "rtrs_srv_path_up(), err: %pe\n", ERR_PTR(err));
|
||||
goto iu_free;
|
||||
}
|
||||
|
||||
|
|
@ -889,10 +905,16 @@ static int process_info_req(struct rtrs_srv_con *con,
|
|||
tx_iu->dma_addr,
|
||||
tx_iu->size, DMA_TO_DEVICE);
|
||||
|
||||
/*
|
||||
* Now disable zombie connection closing. Since from the logs and code,
|
||||
* we know that it can never be in CONNECTED state.
|
||||
*/
|
||||
srv_path->connection_timeout = 0;
|
||||
|
||||
/* Send info response */
|
||||
err = rtrs_iu_post_send(&con->c, tx_iu, tx_sz, reg_wr);
|
||||
if (err) {
|
||||
rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err);
|
||||
rtrs_err(s, "rtrs_iu_post_send(), err: %pe\n", ERR_PTR(err));
|
||||
iu_free:
|
||||
rtrs_iu_free(tx_iu, srv_path->s.dev->ib_dev, 1);
|
||||
}
|
||||
|
|
@ -960,7 +982,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con)
|
|||
/* Prepare for getting info response */
|
||||
err = rtrs_iu_post_recv(&con->c, rx_iu);
|
||||
if (err) {
|
||||
rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err);
|
||||
rtrs_err(s, "rtrs_iu_post_recv(), err: %pe\n", ERR_PTR(err));
|
||||
rtrs_iu_free(rx_iu, srv_path->s.dev->ib_dev, 1);
|
||||
return err;
|
||||
}
|
||||
|
|
@ -1006,7 +1028,7 @@ static int post_recv_path(struct rtrs_srv_path *srv_path)
|
|||
|
||||
err = post_recv_io(to_srv_con(srv_path->s.con[cid]), q_size);
|
||||
if (err) {
|
||||
rtrs_err(s, "post_recv_io(), err: %d\n", err);
|
||||
rtrs_err(s, "post_recv_io(), err: %pe\n", ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
|
@ -1054,8 +1076,8 @@ static void process_read(struct rtrs_srv_con *con,
|
|||
|
||||
if (ret) {
|
||||
rtrs_err_rl(s,
|
||||
"Processing read request failed, user module cb reported for msg_id %d, err: %d\n",
|
||||
buf_id, ret);
|
||||
"Processing read request failed, user module cb reported for msg_id %d, err: %pe\n",
|
||||
buf_id, ERR_PTR(ret));
|
||||
goto send_err_msg;
|
||||
}
|
||||
|
||||
|
|
@ -1065,8 +1087,8 @@ send_err_msg:
|
|||
ret = send_io_resp_imm(con, id, ret);
|
||||
if (ret < 0) {
|
||||
rtrs_err_rl(s,
|
||||
"Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %d\n",
|
||||
buf_id, ret);
|
||||
"Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %pe\n",
|
||||
buf_id, ERR_PTR(ret));
|
||||
close_path(srv_path);
|
||||
}
|
||||
rtrs_srv_put_ops_ids(srv_path);
|
||||
|
|
@ -1106,8 +1128,8 @@ static void process_write(struct rtrs_srv_con *con,
|
|||
data + data_len, usr_len);
|
||||
if (ret) {
|
||||
rtrs_err_rl(s,
|
||||
"Processing write request failed, user module callback reports err: %d\n",
|
||||
ret);
|
||||
"Processing write request failed, user module callback reports err: %pe\n",
|
||||
ERR_PTR(ret));
|
||||
goto send_err_msg;
|
||||
}
|
||||
|
||||
|
|
@ -1117,8 +1139,8 @@ send_err_msg:
|
|||
ret = send_io_resp_imm(con, id, ret);
|
||||
if (ret < 0) {
|
||||
rtrs_err_rl(s,
|
||||
"Processing write request failed, sending I/O response failed, msg_id %d, err: %d\n",
|
||||
buf_id, ret);
|
||||
"Processing write request failed, sending I/O response failed, msg_id %d, err: %pe\n",
|
||||
buf_id, ERR_PTR(ret));
|
||||
close_path(srv_path);
|
||||
}
|
||||
rtrs_srv_put_ops_ids(srv_path);
|
||||
|
|
@ -1248,7 +1270,8 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
|
|||
srv_path->s.hb_missed_cnt = 0;
|
||||
err = rtrs_post_recv_empty(&con->c, &io_comp_cqe);
|
||||
if (err) {
|
||||
rtrs_err(s, "rtrs_post_recv(), err: %d\n", err);
|
||||
rtrs_err(s, "rtrs_post_recv(), err: %pe\n",
|
||||
ERR_PTR(err));
|
||||
close_path(srv_path);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1273,8 +1296,8 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
|
|||
mr->msg_id = msg_id;
|
||||
err = rtrs_srv_inv_rkey(con, mr);
|
||||
if (err) {
|
||||
rtrs_err(s, "rtrs_post_recv(), err: %d\n",
|
||||
err);
|
||||
rtrs_err(s, "rtrs_post_recv(), err: %pe\n",
|
||||
ERR_PTR(err));
|
||||
close_path(srv_path);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1514,17 +1537,38 @@ static int sockaddr_cmp(const struct sockaddr *a, const struct sockaddr *b)
|
|||
}
|
||||
}
|
||||
|
||||
/* Let's close connections which have been waiting for more than 30 seconds */
|
||||
#define RTRS_MAX_CONN_TIMEOUT 30000
|
||||
|
||||
static void rtrs_srv_check_close_path(struct rtrs_srv_path *srv_path)
|
||||
{
|
||||
struct rtrs_path *s = &srv_path->s;
|
||||
|
||||
if (srv_path->state == RTRS_SRV_CONNECTING && srv_path->connection_timeout &&
|
||||
(jiffies_to_msecs(jiffies - srv_path->connection_timeout) > RTRS_MAX_CONN_TIMEOUT)) {
|
||||
rtrs_err(s, "Closing zombie path\n");
|
||||
close_path(srv_path);
|
||||
}
|
||||
}
|
||||
|
||||
static bool __is_path_w_addr_exists(struct rtrs_srv_sess *srv,
|
||||
struct rdma_addr *addr)
|
||||
{
|
||||
struct rtrs_srv_path *srv_path;
|
||||
|
||||
list_for_each_entry(srv_path, &srv->paths_list, s.entry)
|
||||
list_for_each_entry(srv_path, &srv->paths_list, s.entry) {
|
||||
if (!sockaddr_cmp((struct sockaddr *)&srv_path->s.dst_addr,
|
||||
(struct sockaddr *)&addr->dst_addr) &&
|
||||
!sockaddr_cmp((struct sockaddr *)&srv_path->s.src_addr,
|
||||
(struct sockaddr *)&addr->src_addr))
|
||||
(struct sockaddr *)&addr->src_addr)) {
|
||||
rtrs_err((&srv_path->s),
|
||||
"Path (%s) with same addr exists (lifetime %u)\n",
|
||||
rtrs_srv_state_str(srv_path->state),
|
||||
(jiffies_to_msecs(jiffies - srv_path->connection_timeout)));
|
||||
rtrs_srv_check_close_path(srv_path);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1623,7 +1667,7 @@ static int rtrs_rdma_do_accept(struct rtrs_srv_path *srv_path,
|
|||
|
||||
err = rdma_accept(cm_id, ¶m);
|
||||
if (err)
|
||||
pr_err("rdma_accept(), err: %d\n", err);
|
||||
pr_err("rdma_accept(), err: %pe\n", ERR_PTR(err));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
@ -1641,7 +1685,7 @@ static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno)
|
|||
|
||||
err = rdma_reject(cm_id, &msg, sizeof(msg), IB_CM_REJ_CONSUMER_DEFINED);
|
||||
if (err)
|
||||
pr_err("rdma_reject(), err: %d\n", err);
|
||||
pr_err("rdma_reject(), err: %pe\n", ERR_PTR(err));
|
||||
|
||||
/* Bounce errno back */
|
||||
return errno;
|
||||
|
|
@ -1717,7 +1761,7 @@ static int create_con(struct rtrs_srv_path *srv_path,
|
|||
max_send_wr, max_recv_wr,
|
||||
IB_POLL_WORKQUEUE);
|
||||
if (err) {
|
||||
rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err);
|
||||
rtrs_err(s, "rtrs_cq_qp_create(), err: %pe\n", ERR_PTR(err));
|
||||
goto free_con;
|
||||
}
|
||||
if (con->c.cid == 0) {
|
||||
|
|
@ -1762,7 +1806,6 @@ static struct rtrs_srv_path *__alloc_path(struct rtrs_srv_sess *srv,
|
|||
}
|
||||
if (__is_path_w_addr_exists(srv, &cm_id->route.addr)) {
|
||||
err = -EEXIST;
|
||||
pr_err("Path with same addr exists\n");
|
||||
goto err;
|
||||
}
|
||||
srv_path = kzalloc(sizeof(*srv_path), GFP_KERNEL);
|
||||
|
|
@ -1809,6 +1852,7 @@ static struct rtrs_srv_path *__alloc_path(struct rtrs_srv_sess *srv,
|
|||
spin_lock_init(&srv_path->state_lock);
|
||||
INIT_WORK(&srv_path->close_work, rtrs_srv_close_work);
|
||||
rtrs_srv_init_hb(srv_path);
|
||||
srv_path->connection_timeout = 0;
|
||||
|
||||
srv_path->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd);
|
||||
if (!srv_path->s.dev) {
|
||||
|
|
@ -1914,8 +1958,10 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
|
|||
goto reject_w_err;
|
||||
}
|
||||
if (s->con[cid]) {
|
||||
rtrs_err(s, "Connection already exists: %d\n",
|
||||
cid);
|
||||
rtrs_err(s, "Connection (%s) already exists: %d (lifetime %u)\n",
|
||||
rtrs_srv_state_str(srv_path->state), cid,
|
||||
(jiffies_to_msecs(jiffies - srv_path->connection_timeout)));
|
||||
rtrs_srv_check_close_path(srv_path);
|
||||
mutex_unlock(&srv->paths_mutex);
|
||||
goto reject_w_err;
|
||||
}
|
||||
|
|
@ -1930,9 +1976,15 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
|
|||
goto reject_w_err;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start of any connection creation resets the timeout for the path.
|
||||
*/
|
||||
srv_path->connection_timeout = jiffies;
|
||||
|
||||
err = create_con(srv_path, cm_id, cid);
|
||||
if (err) {
|
||||
rtrs_err((&srv_path->s), "create_con(), error %d\n", err);
|
||||
rtrs_err((&srv_path->s), "create_con(), error %pe\n", ERR_PTR(err));
|
||||
rtrs_rdma_do_reject(cm_id, err);
|
||||
/*
|
||||
* Since session has other connections we follow normal way
|
||||
|
|
@ -1943,7 +1995,8 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
|
|||
}
|
||||
err = rtrs_rdma_do_accept(srv_path, cm_id);
|
||||
if (err) {
|
||||
rtrs_err((&srv_path->s), "rtrs_rdma_do_accept(), error %d\n", err);
|
||||
rtrs_err((&srv_path->s), "rtrs_rdma_do_accept(), error %pe\n",
|
||||
ERR_PTR(err));
|
||||
rtrs_rdma_do_reject(cm_id, err);
|
||||
/*
|
||||
* Since current connection was successfully added to the
|
||||
|
|
@ -1994,8 +2047,15 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
|
|||
case RDMA_CM_EVENT_REJECTED:
|
||||
case RDMA_CM_EVENT_CONNECT_ERROR:
|
||||
case RDMA_CM_EVENT_UNREACHABLE:
|
||||
rtrs_err(s, "CM error (CM event: %s, err: %d)\n",
|
||||
rdma_event_msg(ev->event), ev->status);
|
||||
if (ev->status < 0) {
|
||||
rtrs_err(s, "CM error (CM event: %s, err: %pe)\n",
|
||||
rdma_event_msg(ev->event),
|
||||
ERR_PTR(ev->status));
|
||||
} else if (ev->status > 0) {
|
||||
rtrs_err(s, "CM error (CM event: %s, err: %s)\n",
|
||||
rdma_event_msg(ev->event),
|
||||
rdma_reject_msg(cm_id, ev->status));
|
||||
}
|
||||
fallthrough;
|
||||
case RDMA_CM_EVENT_DISCONNECTED:
|
||||
case RDMA_CM_EVENT_ADDR_CHANGE:
|
||||
|
|
@ -2004,8 +2064,15 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
|
|||
close_path(srv_path);
|
||||
break;
|
||||
default:
|
||||
pr_err("Ignoring unexpected CM event %s, err %d\n",
|
||||
rdma_event_msg(ev->event), ev->status);
|
||||
if (ev->status < 0) {
|
||||
pr_err("Ignoring unexpected CM event %s, err %pe\n",
|
||||
rdma_event_msg(ev->event),
|
||||
ERR_PTR(ev->status));
|
||||
} else if (ev->status > 0) {
|
||||
pr_err("Ignoring unexpected CM event %s, err %s\n",
|
||||
rdma_event_msg(ev->event),
|
||||
rdma_reject_msg(cm_id, ev->status));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -2029,13 +2096,13 @@ static struct rdma_cm_id *rtrs_srv_cm_init(struct rtrs_srv_ctx *ctx,
|
|||
}
|
||||
ret = rdma_bind_addr(cm_id, addr);
|
||||
if (ret) {
|
||||
pr_err("Binding RDMA address failed, err: %d\n", ret);
|
||||
pr_err("Binding RDMA address failed, err: %pe\n", ERR_PTR(ret));
|
||||
goto err_cm;
|
||||
}
|
||||
ret = rdma_listen(cm_id, 64);
|
||||
if (ret) {
|
||||
pr_err("Listening on RDMA connection failed, err: %d\n",
|
||||
ret);
|
||||
pr_err("Listening on RDMA connection failed, err: %pe\n",
|
||||
ERR_PTR(ret));
|
||||
goto err_cm;
|
||||
}
|
||||
|
||||
|
|
@ -2275,8 +2342,11 @@ static int check_module_params(void)
|
|||
void rtrs_srv_ib_event_handler(struct ib_event_handler *handler,
|
||||
struct ib_event *ibevent)
|
||||
{
|
||||
pr_info("Handling event: %s (%d).\n", ib_event_msg(ibevent->event),
|
||||
ibevent->event);
|
||||
struct ib_device *idev = ibevent->device;
|
||||
u32 port_num = ibevent->element.port_num;
|
||||
|
||||
pr_info("Handling event: %s (%d). HCA name: %s, port num: %u\n",
|
||||
ib_event_msg(ibevent->event), ibevent->event, idev->name, port_num);
|
||||
}
|
||||
|
||||
static int rtrs_srv_ib_dev_init(struct rtrs_ib_dev *dev)
|
||||
|
|
@ -2313,8 +2383,8 @@ static int __init rtrs_server_init(void)
|
|||
|
||||
err = check_module_params();
|
||||
if (err) {
|
||||
pr_err("Failed to load module, invalid module parameters, err: %d\n",
|
||||
err);
|
||||
pr_err("Failed to load module, invalid module parameters, err: %pe\n",
|
||||
ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
err = class_register(&rtrs_dev_class);
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ struct rtrs_srv_path {
|
|||
unsigned int mem_bits;
|
||||
struct kobject kobj;
|
||||
struct rtrs_srv_stats *stats;
|
||||
unsigned long connection_timeout;
|
||||
};
|
||||
|
||||
static inline struct rtrs_srv_path *to_srv_path(struct rtrs_path *s)
|
||||
|
|
|
|||
|
|
@ -273,7 +273,8 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
|
|||
|
||||
ret = rdma_create_qp(cm_id, pd, &init_attr);
|
||||
if (ret) {
|
||||
rtrs_err(con->path, "Creating QP failed, err: %d\n", ret);
|
||||
rtrs_err(con->path, "Creating QP failed, err: %pe\n",
|
||||
ERR_PTR(ret));
|
||||
return ret;
|
||||
}
|
||||
con->qp = cm_id->qp;
|
||||
|
|
@ -341,7 +342,8 @@ void rtrs_send_hb_ack(struct rtrs_path *path)
|
|||
err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm,
|
||||
NULL);
|
||||
if (err) {
|
||||
rtrs_err(path, "send HB ACK failed, errno: %d\n", err);
|
||||
rtrs_err(path, "send HB ACK failed, errno: %pe\n",
|
||||
ERR_PTR(err));
|
||||
path->hb_err_handler(usr_con);
|
||||
return;
|
||||
}
|
||||
|
|
@ -375,7 +377,8 @@ static void hb_work(struct work_struct *work)
|
|||
err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm,
|
||||
NULL);
|
||||
if (err) {
|
||||
rtrs_err(path, "HB send failed, errno: %d\n", err);
|
||||
rtrs_err(path, "HB send failed, errno: %pe\n",
|
||||
ERR_PTR(err));
|
||||
path->hb_err_handler(usr_con);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ enum gdma_request_type {
|
|||
GDMA_CREATE_MR = 31,
|
||||
GDMA_DESTROY_MR = 32,
|
||||
GDMA_QUERY_HWC_TIMEOUT = 84, /* 0x54 */
|
||||
GDMA_ALLOC_DM = 96, /* 0x60 */
|
||||
GDMA_DESTROY_DM = 97, /* 0x61 */
|
||||
};
|
||||
|
||||
#define GDMA_RESOURCE_DOORBELL_PAGE 27
|
||||
|
|
@ -866,6 +868,8 @@ enum gdma_mr_type {
|
|||
GDMA_MR_TYPE_GVA = 2,
|
||||
/* Guest zero-based address MRs */
|
||||
GDMA_MR_TYPE_ZBVA = 4,
|
||||
/* Device address MRs */
|
||||
GDMA_MR_TYPE_DM = 5,
|
||||
};
|
||||
|
||||
struct gdma_create_mr_params {
|
||||
|
|
@ -881,6 +885,12 @@ struct gdma_create_mr_params {
|
|||
u64 dma_region_handle;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} zbva;
|
||||
struct {
|
||||
u64 dm_handle;
|
||||
u64 offset;
|
||||
u64 length;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} da;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
@ -895,13 +905,23 @@ struct gdma_create_mr_request {
|
|||
u64 dma_region_handle;
|
||||
u64 virtual_address;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} gva;
|
||||
} __packed gva;
|
||||
struct {
|
||||
u64 dma_region_handle;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} zbva;
|
||||
};
|
||||
} __packed zbva;
|
||||
struct {
|
||||
u64 dm_handle;
|
||||
u64 offset;
|
||||
enum gdma_mr_access_flags access_flags;
|
||||
} __packed da;
|
||||
} __packed;
|
||||
u32 reserved_2;
|
||||
union {
|
||||
struct {
|
||||
u64 length;
|
||||
} da_ext;
|
||||
};
|
||||
};/* HW DATA */
|
||||
|
||||
struct gdma_create_mr_response {
|
||||
|
|
@ -920,6 +940,27 @@ struct gdma_destroy_mr_response {
|
|||
struct gdma_resp_hdr hdr;
|
||||
};/* HW DATA */
|
||||
|
||||
struct gdma_alloc_dm_req {
|
||||
struct gdma_req_hdr hdr;
|
||||
u64 length;
|
||||
u32 alignment;
|
||||
u32 flags;
|
||||
}; /* HW Data */
|
||||
|
||||
struct gdma_alloc_dm_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
u64 dm_handle;
|
||||
}; /* HW Data */
|
||||
|
||||
struct gdma_destroy_dm_req {
|
||||
struct gdma_req_hdr hdr;
|
||||
u64 dm_handle;
|
||||
}; /* HW Data */
|
||||
|
||||
struct gdma_destroy_dm_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
}; /* HW Data */
|
||||
|
||||
int mana_gd_verify_vf_version(struct pci_dev *pdev);
|
||||
|
||||
int mana_gd_register_device(struct gdma_dev *gd);
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/ethtool.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/list.h>
|
||||
|
|
@ -43,6 +44,7 @@
|
|||
#include <uapi/rdma/rdma_user_ioctl.h>
|
||||
#include <uapi/rdma/ib_user_ioctl_verbs.h>
|
||||
#include <linux/pci-tph.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
|
||||
|
||||
|
|
@ -764,6 +766,7 @@ enum ib_event_type {
|
|||
IB_EVENT_CLIENT_REREGISTER,
|
||||
IB_EVENT_GID_CHANGE,
|
||||
IB_EVENT_WQ_FATAL,
|
||||
IB_EVENT_DEVICE_SPEED_CHANGE,
|
||||
};
|
||||
|
||||
const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
|
||||
|
|
@ -877,6 +880,20 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
|
|||
*/
|
||||
__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
|
||||
|
||||
struct ib_port_speed_info {
|
||||
const char *str;
|
||||
int rate; /* in deci-Gb/sec (100 MBps units) */
|
||||
};
|
||||
|
||||
/**
|
||||
* ib_port_attr_to_speed_info - Convert port attributes to speed information
|
||||
* @attr: Port attributes containing active_speed and active_width
|
||||
* @speed_info: Speed information to return
|
||||
*
|
||||
* Returns 0 on success, -EINVAL on error.
|
||||
*/
|
||||
int ib_port_attr_to_speed_info(struct ib_port_attr *attr,
|
||||
struct ib_port_speed_info *speed_info);
|
||||
|
||||
/**
|
||||
* enum ib_mr_type - memory region type
|
||||
|
|
@ -2348,6 +2365,9 @@ struct rdma_user_mmap_entry {
|
|||
unsigned long start_pgoff;
|
||||
size_t npages;
|
||||
bool driver_removed;
|
||||
/* protects access to dmabufs */
|
||||
struct mutex dmabufs_lock;
|
||||
struct list_head dmabufs;
|
||||
};
|
||||
|
||||
/* Return the offset (in bytes) the user should pass to libc's mmap() */
|
||||
|
|
@ -2403,6 +2423,8 @@ struct ib_device_ops {
|
|||
int comp_vector);
|
||||
int (*query_port)(struct ib_device *device, u32 port_num,
|
||||
struct ib_port_attr *port_attr);
|
||||
int (*query_port_speed)(struct ib_device *device, u32 port_num,
|
||||
u64 *speed);
|
||||
int (*modify_port)(struct ib_device *device, u32 port_num,
|
||||
int port_modify_mask,
|
||||
struct ib_port_modify *port_modify);
|
||||
|
|
@ -2483,6 +2505,11 @@ struct ib_device_ops {
|
|||
* Therefore needs to be implemented by the driver in mmap_free.
|
||||
*/
|
||||
void (*mmap_free)(struct rdma_user_mmap_entry *entry);
|
||||
int (*mmap_get_pfns)(struct rdma_user_mmap_entry *entry,
|
||||
struct phys_vec *phys_vec,
|
||||
struct p2pdma_provider **provider);
|
||||
struct rdma_user_mmap_entry *(*pgoff_to_mmap_entry)(struct ib_ucontext *ucontext,
|
||||
off_t pg_off);
|
||||
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
|
||||
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
|
||||
int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
|
||||
|
|
@ -4249,6 +4276,47 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
|
|||
dma_unmap_page(dev->dma_device, addr, size, direction);
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_dma_map_bvec - Map a bio_vec to DMA address
|
||||
* @dev: The device for which the dma_addr is to be created
|
||||
* @bvec: The bio_vec to map
|
||||
* @direction: The direction of the DMA
|
||||
*
|
||||
* Returns a DMA address for the bio_vec. The caller must check the
|
||||
* result with ib_dma_mapping_error() before use; a failed mapping
|
||||
* must not be passed to ib_dma_unmap_bvec().
|
||||
*
|
||||
* For software RDMA devices (rxe, siw), returns a virtual address
|
||||
* and no actual DMA mapping occurs.
|
||||
*/
|
||||
static inline u64 ib_dma_map_bvec(struct ib_device *dev,
|
||||
struct bio_vec *bvec,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
if (ib_uses_virt_dma(dev))
|
||||
return (uintptr_t)bvec_virt(bvec);
|
||||
return dma_map_phys(dev->dma_device, bvec_phys(bvec),
|
||||
bvec->bv_len, direction, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* ib_dma_unmap_bvec - Unmap a bio_vec DMA mapping
|
||||
* @dev: The device for which the DMA address was created
|
||||
* @addr: The DMA address returned by ib_dma_map_bvec()
|
||||
* @size: The size of the region in bytes
|
||||
* @direction: The direction of the DMA
|
||||
*
|
||||
* Releases a DMA mapping created by ib_dma_map_bvec(). For software
|
||||
* RDMA devices this is a no-op since no actual mapping occurred.
|
||||
*/
|
||||
static inline void ib_dma_unmap_bvec(struct ib_device *dev,
|
||||
u64 addr, size_t size,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
if (!ib_uses_virt_dma(dev))
|
||||
dma_unmap_phys(dev->dma_device, addr, size, direction, 0);
|
||||
}
|
||||
|
||||
int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
|
||||
static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
|
||||
struct scatterlist *sg, int nents,
|
||||
|
|
@ -4545,8 +4613,6 @@ static inline bool ib_device_try_get(struct ib_device *dev)
|
|||
void ib_device_put(struct ib_device *device);
|
||||
struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
|
||||
enum rdma_driver_id driver_id);
|
||||
struct ib_device *ib_device_get_by_name(const char *name,
|
||||
enum rdma_driver_id driver_id);
|
||||
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port,
|
||||
u16 pkey, const union ib_gid *gid,
|
||||
const struct sockaddr *addr);
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#ifndef _RDMA_RW_H
|
||||
#define _RDMA_RW_H
|
||||
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
|
|
@ -31,6 +32,14 @@ struct rdma_rw_ctx {
|
|||
struct ib_rdma_wr *wrs;
|
||||
} map;
|
||||
|
||||
/* for IOVA-based mapping of bvecs into contiguous DMA range: */
|
||||
struct {
|
||||
struct dma_iova_state state;
|
||||
struct ib_sge sge;
|
||||
struct ib_rdma_wr wr;
|
||||
size_t mapped_len;
|
||||
} iova;
|
||||
|
||||
/* for registering multiple WRs: */
|
||||
struct rdma_rw_reg_ctx {
|
||||
struct ib_sge sge;
|
||||
|
|
@ -38,6 +47,7 @@ struct rdma_rw_ctx {
|
|||
struct ib_reg_wr reg_wr;
|
||||
struct ib_send_wr inv_wr;
|
||||
struct ib_mr *mr;
|
||||
struct sg_table sgt;
|
||||
} *reg;
|
||||
};
|
||||
};
|
||||
|
|
@ -49,6 +59,16 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
|||
u32 port_num, struct scatterlist *sg, u32 sg_cnt,
|
||||
enum dma_data_direction dir);
|
||||
|
||||
struct bio_vec;
|
||||
|
||||
int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec,
|
||||
struct bvec_iter iter, u64 remote_addr, u32 rkey,
|
||||
enum dma_data_direction dir);
|
||||
void rdma_rw_ctx_destroy_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec,
|
||||
enum dma_data_direction dir);
|
||||
|
||||
int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
|
||||
u32 port_num, struct scatterlist *sg, u32 sg_cnt,
|
||||
struct scatterlist *prot_sg, u32 prot_sg_cnt,
|
||||
|
|
@ -66,6 +86,8 @@ int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
|
|||
|
||||
unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
|
||||
unsigned int maxpages);
|
||||
unsigned int rdma_rw_max_send_wr(struct ib_device *dev, u32 port_num,
|
||||
unsigned int max_rdma_ctxs, u32 create_flags);
|
||||
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
|
||||
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
|
||||
void rdma_rw_cleanup_mrs(struct ib_qp *qp);
|
||||
|
|
|
|||
|
|
@ -186,6 +186,7 @@ struct ib_uverbs_file {
|
|||
extern const struct uverbs_obj_type_class uverbs_idr_class;
|
||||
extern const struct uverbs_obj_type_class uverbs_fd_class;
|
||||
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp);
|
||||
int uverbs_uobject_release(struct ib_uobject *uobj);
|
||||
|
||||
#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \
|
||||
sizeof(char))
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ enum {
|
|||
BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL,
|
||||
BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL,
|
||||
BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED = 0x40,
|
||||
BNXT_RE_UCNTX_CMASK_QP_RATE_LIMIT_ENABLED = 0x80ULL,
|
||||
};
|
||||
|
||||
enum bnxt_re_wqe_mode {
|
||||
|
|
@ -215,4 +216,19 @@ enum bnxt_re_toggle_mem_methods {
|
|||
BNXT_RE_METHOD_GET_TOGGLE_MEM = (1U << UVERBS_ID_NS_SHIFT),
|
||||
BNXT_RE_METHOD_RELEASE_TOGGLE_MEM,
|
||||
};
|
||||
|
||||
struct bnxt_re_packet_pacing_caps {
|
||||
__u32 qp_rate_limit_min;
|
||||
__u32 qp_rate_limit_max; /* In kbps */
|
||||
/* Corresponding bit will be set if qp type from
|
||||
* 'enum ib_qp_type' is supported, e.g.
|
||||
* supported_qpts |= 1 << IB_QPT_RC
|
||||
*/
|
||||
__u32 supported_qpts;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
struct bnxt_re_query_device_ex_resp {
|
||||
struct bnxt_re_packet_pacing_caps packet_pacing_caps;
|
||||
};
|
||||
#endif /* __BNXT_RE_UVERBS_ABI_H__*/
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ enum uverbs_default_objects {
|
|||
UVERBS_OBJECT_COUNTERS,
|
||||
UVERBS_OBJECT_ASYNC_EVENT,
|
||||
UVERBS_OBJECT_DMAH,
|
||||
UVERBS_OBJECT_DMABUF,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
|
@ -73,6 +74,7 @@ enum uverbs_methods_device {
|
|||
UVERBS_METHOD_QUERY_CONTEXT,
|
||||
UVERBS_METHOD_QUERY_GID_TABLE,
|
||||
UVERBS_METHOD_QUERY_GID_ENTRY,
|
||||
UVERBS_METHOD_QUERY_PORT_SPEED,
|
||||
};
|
||||
|
||||
enum uverbs_attrs_invoke_write_cmd_attr_ids {
|
||||
|
|
@ -86,6 +88,11 @@ enum uverbs_attrs_query_port_cmd_attr_ids {
|
|||
UVERBS_ATTR_QUERY_PORT_RESP,
|
||||
};
|
||||
|
||||
enum uverbs_attrs_query_port_speed_cmd_attr_ids {
|
||||
UVERBS_ATTR_QUERY_PORT_SPEED_PORT_NUM,
|
||||
UVERBS_ATTR_QUERY_PORT_SPEED_RESP,
|
||||
};
|
||||
|
||||
enum uverbs_attrs_get_context_attr_ids {
|
||||
UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
|
||||
UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
|
||||
|
|
@ -257,6 +264,15 @@ enum uverbs_methods_dmah {
|
|||
UVERBS_METHOD_DMAH_FREE,
|
||||
};
|
||||
|
||||
enum uverbs_attrs_alloc_dmabuf_cmd_attr_ids {
|
||||
UVERBS_ATTR_ALLOC_DMABUF_HANDLE,
|
||||
UVERBS_ATTR_ALLOC_DMABUF_PGOFF,
|
||||
};
|
||||
|
||||
enum uverbs_methods_dmabuf {
|
||||
UVERBS_METHOD_DMABUF_ALLOC,
|
||||
};
|
||||
|
||||
enum uverbs_attrs_reg_dm_mr_cmd_attr_ids {
|
||||
UVERBS_ATTR_REG_DM_MR_HANDLE,
|
||||
UVERBS_ATTR_REG_DM_MR_OFFSET,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,9 @@
|
|||
#define MANA_IB_UVERBS_ABI_VERSION 1
|
||||
|
||||
enum mana_ib_create_cq_flags {
|
||||
/* Reserved for backward compatibility. Legacy
|
||||
* kernel versions use it to create CQs in RNIC
|
||||
*/
|
||||
MANA_IB_CREATE_RNIC_CQ = 1 << 0,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
* Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
|
||||
*/
|
||||
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <rdma/rw.h>
|
||||
|
||||
#include <linux/sunrpc/xdr.h>
|
||||
|
|
@ -20,30 +22,33 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
|
|||
/* Each R/W context contains state for one chain of RDMA Read or
|
||||
* Write Work Requests.
|
||||
*
|
||||
* Each WR chain handles a single contiguous server-side buffer,
|
||||
* because scatterlist entries after the first have to start on
|
||||
* page alignment. xdr_buf iovecs cannot guarantee alignment.
|
||||
* Each WR chain handles a single contiguous server-side buffer.
|
||||
* - each xdr_buf iovec is a single contiguous buffer
|
||||
* - the xdr_buf pages array is a single contiguous buffer because the
|
||||
* second through the last element always start on a page boundary
|
||||
*
|
||||
* Each WR chain handles only one R_key. Each RPC-over-RDMA segment
|
||||
* from a client may contain a unique R_key, so each WR chain moves
|
||||
* up to one segment at a time.
|
||||
*
|
||||
* The scatterlist makes this data structure over 4KB in size. To
|
||||
* make it less likely to fail, and to handle the allocation for
|
||||
* smaller I/O requests without disabling bottom-halves, these
|
||||
* contexts are created on demand, but cached and reused until the
|
||||
* controlling svcxprt_rdma is destroyed.
|
||||
* The inline bvec array is sized to handle most I/O requests without
|
||||
* additional allocation. Larger requests fall back to dynamic allocation.
|
||||
* These contexts are created on demand, but cached and reused until
|
||||
* the controlling svcxprt_rdma is destroyed.
|
||||
*/
|
||||
struct svc_rdma_rw_ctxt {
|
||||
struct llist_node rw_node;
|
||||
struct list_head rw_list;
|
||||
struct rdma_rw_ctx rw_ctx;
|
||||
unsigned int rw_nents;
|
||||
unsigned int rw_first_sgl_nents;
|
||||
struct sg_table rw_sg_table;
|
||||
struct scatterlist rw_first_sgl[];
|
||||
unsigned int rw_first_bvec_nents;
|
||||
struct bio_vec *rw_bvec;
|
||||
struct bio_vec rw_first_bvec[];
|
||||
};
|
||||
|
||||
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
|
||||
struct svc_rdma_rw_ctxt *ctxt);
|
||||
|
||||
static inline struct svc_rdma_rw_ctxt *
|
||||
svc_rdma_next_ctxt(struct list_head *list)
|
||||
{
|
||||
|
|
@ -52,10 +57,10 @@ svc_rdma_next_ctxt(struct list_head *list)
|
|||
}
|
||||
|
||||
static struct svc_rdma_rw_ctxt *
|
||||
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
|
||||
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int nr_bvec)
|
||||
{
|
||||
struct ib_device *dev = rdma->sc_cm_id->device;
|
||||
unsigned int first_sgl_nents = dev->attrs.max_send_sge;
|
||||
unsigned int first_bvec_nents = dev->attrs.max_send_sge;
|
||||
struct svc_rdma_rw_ctxt *ctxt;
|
||||
struct llist_node *node;
|
||||
|
||||
|
|
@ -65,33 +70,44 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
|
|||
if (node) {
|
||||
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
|
||||
} else {
|
||||
ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, first_sgl_nents),
|
||||
ctxt = kmalloc_node(struct_size(ctxt, rw_first_bvec,
|
||||
first_bvec_nents),
|
||||
GFP_KERNEL, ibdev_to_node(dev));
|
||||
if (!ctxt)
|
||||
goto out_noctx;
|
||||
|
||||
INIT_LIST_HEAD(&ctxt->rw_list);
|
||||
ctxt->rw_first_sgl_nents = first_sgl_nents;
|
||||
ctxt->rw_first_bvec_nents = first_bvec_nents;
|
||||
}
|
||||
|
||||
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
|
||||
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
|
||||
ctxt->rw_sg_table.sgl,
|
||||
first_sgl_nents))
|
||||
goto out_free;
|
||||
if (nr_bvec <= ctxt->rw_first_bvec_nents) {
|
||||
ctxt->rw_bvec = ctxt->rw_first_bvec;
|
||||
} else {
|
||||
ctxt->rw_bvec = kmalloc_array_node(nr_bvec,
|
||||
sizeof(*ctxt->rw_bvec),
|
||||
GFP_KERNEL,
|
||||
ibdev_to_node(dev));
|
||||
if (!ctxt->rw_bvec)
|
||||
goto out_free;
|
||||
}
|
||||
return ctxt;
|
||||
|
||||
out_free:
|
||||
kfree(ctxt);
|
||||
/* Return cached contexts to cache; free freshly allocated ones */
|
||||
if (node)
|
||||
svc_rdma_put_rw_ctxt(rdma, ctxt);
|
||||
else
|
||||
kfree(ctxt);
|
||||
out_noctx:
|
||||
trace_svcrdma_rwctx_empty(rdma, sges);
|
||||
trace_svcrdma_rwctx_empty(rdma, nr_bvec);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
|
||||
struct llist_head *list)
|
||||
{
|
||||
sg_free_table_chained(&ctxt->rw_sg_table, ctxt->rw_first_sgl_nents);
|
||||
if (ctxt->rw_bvec != ctxt->rw_first_bvec)
|
||||
kfree(ctxt->rw_bvec);
|
||||
llist_add(&ctxt->rw_node, list);
|
||||
}
|
||||
|
||||
|
|
@ -123,6 +139,7 @@ void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
|
|||
* @ctxt: R/W context to prepare
|
||||
* @offset: RDMA offset
|
||||
* @handle: RDMA tag/handle
|
||||
* @length: total number of bytes in the bvec array
|
||||
* @direction: I/O direction
|
||||
*
|
||||
* Returns on success, the number of WQEs that will be needed
|
||||
|
|
@ -130,14 +147,18 @@ void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
|
|||
*/
|
||||
static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
|
||||
struct svc_rdma_rw_ctxt *ctxt,
|
||||
u64 offset, u32 handle,
|
||||
u64 offset, u32 handle, unsigned int length,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
struct bvec_iter iter = {
|
||||
.bi_size = length,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num,
|
||||
ctxt->rw_sg_table.sgl, ctxt->rw_nents,
|
||||
0, offset, handle, direction);
|
||||
ret = rdma_rw_ctx_init_bvec(&ctxt->rw_ctx, rdma->sc_qp,
|
||||
rdma->sc_port_num,
|
||||
ctxt->rw_bvec, ctxt->rw_nents,
|
||||
iter, offset, handle, direction);
|
||||
if (unlikely(ret < 0)) {
|
||||
trace_svcrdma_dma_map_rw_err(rdma, offset, handle,
|
||||
ctxt->rw_nents, ret);
|
||||
|
|
@ -175,7 +196,6 @@ void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
|
|||
{
|
||||
struct llist_node *first, *last;
|
||||
struct svc_rdma_rw_ctxt *ctxt;
|
||||
LLIST_HEAD(free);
|
||||
|
||||
trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount);
|
||||
|
||||
|
|
@ -183,10 +203,11 @@ void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
|
|||
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
|
||||
list_del(&ctxt->rw_list);
|
||||
|
||||
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
|
||||
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
|
||||
ctxt->rw_nents, dir);
|
||||
__svc_rdma_put_rw_ctxt(ctxt, &free);
|
||||
rdma_rw_ctx_destroy_bvec(&ctxt->rw_ctx, rdma->sc_qp,
|
||||
rdma->sc_port_num,
|
||||
ctxt->rw_bvec, ctxt->rw_nents, dir);
|
||||
if (ctxt->rw_bvec != ctxt->rw_first_bvec)
|
||||
kfree(ctxt->rw_bvec);
|
||||
|
||||
ctxt->rw_node.next = first;
|
||||
first = &ctxt->rw_node;
|
||||
|
|
@ -414,29 +435,26 @@ static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma,
|
|||
return -ENOTCONN;
|
||||
}
|
||||
|
||||
/* Build and DMA-map an SGL that covers one kvec in an xdr_buf
|
||||
/* Build a bvec that covers one kvec in an xdr_buf.
|
||||
*/
|
||||
static void svc_rdma_vec_to_sg(struct svc_rdma_write_info *info,
|
||||
unsigned int len,
|
||||
struct svc_rdma_rw_ctxt *ctxt)
|
||||
static void svc_rdma_vec_to_bvec(struct svc_rdma_write_info *info,
|
||||
unsigned int len,
|
||||
struct svc_rdma_rw_ctxt *ctxt)
|
||||
{
|
||||
struct scatterlist *sg = ctxt->rw_sg_table.sgl;
|
||||
|
||||
sg_set_buf(&sg[0], info->wi_base, len);
|
||||
bvec_set_virt(&ctxt->rw_bvec[0], info->wi_base, len);
|
||||
info->wi_base += len;
|
||||
|
||||
ctxt->rw_nents = 1;
|
||||
}
|
||||
|
||||
/* Build and DMA-map an SGL that covers part of an xdr_buf's pagelist.
|
||||
/* Build a bvec array that covers part of an xdr_buf's pagelist.
|
||||
*/
|
||||
static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info,
|
||||
unsigned int remaining,
|
||||
struct svc_rdma_rw_ctxt *ctxt)
|
||||
static void svc_rdma_pagelist_to_bvec(struct svc_rdma_write_info *info,
|
||||
unsigned int remaining,
|
||||
struct svc_rdma_rw_ctxt *ctxt)
|
||||
{
|
||||
unsigned int sge_no, sge_bytes, page_off, page_no;
|
||||
unsigned int bvec_idx, bvec_len, page_off, page_no;
|
||||
const struct xdr_buf *xdr = info->wi_xdr;
|
||||
struct scatterlist *sg;
|
||||
struct page **page;
|
||||
|
||||
page_off = info->wi_next_off + xdr->page_base;
|
||||
|
|
@ -444,21 +462,19 @@ static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info,
|
|||
page_off = offset_in_page(page_off);
|
||||
page = xdr->pages + page_no;
|
||||
info->wi_next_off += remaining;
|
||||
sg = ctxt->rw_sg_table.sgl;
|
||||
sge_no = 0;
|
||||
bvec_idx = 0;
|
||||
do {
|
||||
sge_bytes = min_t(unsigned int, remaining,
|
||||
PAGE_SIZE - page_off);
|
||||
sg_set_page(sg, *page, sge_bytes, page_off);
|
||||
|
||||
remaining -= sge_bytes;
|
||||
sg = sg_next(sg);
|
||||
bvec_len = min_t(unsigned int, remaining,
|
||||
PAGE_SIZE - page_off);
|
||||
bvec_set_page(&ctxt->rw_bvec[bvec_idx], *page, bvec_len,
|
||||
page_off);
|
||||
remaining -= bvec_len;
|
||||
page_off = 0;
|
||||
sge_no++;
|
||||
bvec_idx++;
|
||||
page++;
|
||||
} while (remaining);
|
||||
|
||||
ctxt->rw_nents = sge_no;
|
||||
ctxt->rw_nents = bvec_idx;
|
||||
}
|
||||
|
||||
/* Construct RDMA Write WRs to send a portion of an xdr_buf containing
|
||||
|
|
@ -496,7 +512,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
|
|||
constructor(info, write_len, ctxt);
|
||||
offset = seg->rs_offset + info->wi_seg_off;
|
||||
ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, seg->rs_handle,
|
||||
DMA_TO_DEVICE);
|
||||
write_len, DMA_TO_DEVICE);
|
||||
if (ret < 0)
|
||||
return -EIO;
|
||||
percpu_counter_inc(&svcrdma_stat_write);
|
||||
|
|
@ -535,7 +551,7 @@ static int svc_rdma_iov_write(struct svc_rdma_write_info *info,
|
|||
const struct kvec *iov)
|
||||
{
|
||||
info->wi_base = iov->iov_base;
|
||||
return svc_rdma_build_writes(info, svc_rdma_vec_to_sg,
|
||||
return svc_rdma_build_writes(info, svc_rdma_vec_to_bvec,
|
||||
iov->iov_len);
|
||||
}
|
||||
|
||||
|
|
@ -559,7 +575,7 @@ static int svc_rdma_pages_write(struct svc_rdma_write_info *info,
|
|||
{
|
||||
info->wi_xdr = xdr;
|
||||
info->wi_next_off = offset - xdr->head[0].iov_len;
|
||||
return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg,
|
||||
return svc_rdma_build_writes(info, svc_rdma_pagelist_to_bvec,
|
||||
length);
|
||||
}
|
||||
|
||||
|
|
@ -734,29 +750,29 @@ static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
|
|||
{
|
||||
struct svcxprt_rdma *rdma = svc_rdma_rqst_rdma(rqstp);
|
||||
struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
|
||||
unsigned int sge_no, seg_len, len;
|
||||
unsigned int bvec_idx, nr_bvec, seg_len, len, total;
|
||||
struct svc_rdma_rw_ctxt *ctxt;
|
||||
struct scatterlist *sg;
|
||||
int ret;
|
||||
|
||||
len = segment->rs_length;
|
||||
sge_no = PAGE_ALIGN(head->rc_pageoff + len) >> PAGE_SHIFT;
|
||||
ctxt = svc_rdma_get_rw_ctxt(rdma, sge_no);
|
||||
if (check_add_overflow(head->rc_pageoff, len, &total))
|
||||
return -EINVAL;
|
||||
nr_bvec = PAGE_ALIGN(total) >> PAGE_SHIFT;
|
||||
ctxt = svc_rdma_get_rw_ctxt(rdma, nr_bvec);
|
||||
if (!ctxt)
|
||||
return -ENOMEM;
|
||||
ctxt->rw_nents = sge_no;
|
||||
ctxt->rw_nents = nr_bvec;
|
||||
|
||||
sg = ctxt->rw_sg_table.sgl;
|
||||
for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
|
||||
for (bvec_idx = 0; bvec_idx < ctxt->rw_nents; bvec_idx++) {
|
||||
seg_len = min_t(unsigned int, len,
|
||||
PAGE_SIZE - head->rc_pageoff);
|
||||
|
||||
if (!head->rc_pageoff)
|
||||
head->rc_page_count++;
|
||||
|
||||
sg_set_page(sg, rqstp->rq_pages[head->rc_curpage],
|
||||
seg_len, head->rc_pageoff);
|
||||
sg = sg_next(sg);
|
||||
bvec_set_page(&ctxt->rw_bvec[bvec_idx],
|
||||
rqstp->rq_pages[head->rc_curpage],
|
||||
seg_len, head->rc_pageoff);
|
||||
|
||||
head->rc_pageoff += seg_len;
|
||||
if (head->rc_pageoff == PAGE_SIZE) {
|
||||
|
|
@ -770,7 +786,8 @@ static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
|
|||
}
|
||||
|
||||
ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset,
|
||||
segment->rs_handle, DMA_FROM_DEVICE);
|
||||
segment->rs_handle, segment->rs_length,
|
||||
DMA_FROM_DEVICE);
|
||||
if (ret < 0)
|
||||
return -EIO;
|
||||
percpu_counter_inc(&svcrdma_stat_read);
|
||||
|
|
|
|||
|
|
@ -462,7 +462,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
|||
newxprt->sc_max_bc_requests = 2;
|
||||
}
|
||||
|
||||
/* Arbitrary estimate of the needed number of rdma_rw contexts.
|
||||
/* Estimate the needed number of rdma_rw contexts. The maximum
|
||||
* Read and Write chunks have one segment each. Each request
|
||||
* can involve one Read chunk and either a Write chunk or Reply
|
||||
* chunk; thus a factor of three.
|
||||
*/
|
||||
maxpayload = min(xprt->xpt_server->sv_max_payload,
|
||||
RPCSVC_MAXPAYLOAD_RDMA);
|
||||
|
|
@ -470,7 +473,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
|||
rdma_rw_mr_factor(dev, newxprt->sc_port_num,
|
||||
maxpayload >> PAGE_SHIFT);
|
||||
|
||||
newxprt->sc_sq_depth = rq_depth + ctxts;
|
||||
newxprt->sc_sq_depth = rq_depth +
|
||||
rdma_rw_max_send_wr(dev, newxprt->sc_port_num, ctxts, 0);
|
||||
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
|
||||
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
|
||||
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue