mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
net/rds: new extension header: rdma bytes
Introduce a new extension header type RDSV3_EXTHDR_RDMA_BYTES for an RDMA initiator to exchange rdma byte counts to its target. Currently, RDMA operations cannot precisely account how many bytes a peer just transferred via RDMA, which limits per-connection statistics and future policy (e.g., monitoring or rate/cgroup accounting of RDMA traffic). In this patch we expand rds_message_add_extension to accept multiple extensions, and add new flag to RDS header: RDS_FLAG_EXTHDR_EXTENSION, along with a new extension to RDS header: rds_ext_header_rdma_bytes. Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com> Signed-off-by: Guangyu Sun <guangyu.sun@oracle.com> Signed-off-by: Allison Henderson <allison.henderson@oracle.com> Link: https://patch.msgid.link/20260203055723.1085751-2-achender@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
acd21dd2da
commit
46f257ee69
4 changed files with 107 additions and 29 deletions
|
|
@ -577,16 +577,42 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
|
|||
/* If it has a RDMA op, tell the peer we did it. This is
|
||||
* used by the peer to release use-once RDMA MRs. */
|
||||
if (rm->rdma.op_active) {
|
||||
struct rds_ext_header_rdma ext_hdr;
|
||||
struct rds_ext_header_rdma ext_hdr = {};
|
||||
struct rds_ext_header_rdma_bytes
|
||||
rdma_bytes_ext_hdr = {};
|
||||
|
||||
ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
|
||||
rds_message_add_extension(&rm->m_inc.i_hdr,
|
||||
RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
|
||||
if (rds_message_add_extension(&rm->m_inc.i_hdr,
|
||||
RDS_EXTHDR_RDMA,
|
||||
&ext_hdr)) {
|
||||
/* prepare the rdma bytes ext header */
|
||||
rdma_bytes_ext_hdr.h_rflags =
|
||||
rm->rdma.op_write ?
|
||||
RDS_FLAG_RDMA_WR_BYTES :
|
||||
RDS_FLAG_RDMA_RD_BYTES;
|
||||
rdma_bytes_ext_hdr.h_rdma_bytes =
|
||||
cpu_to_be32(rm->rdma.op_bytes);
|
||||
} else {
|
||||
rdsdebug("RDS_EXTHDR_RDMA dropped");
|
||||
}
|
||||
|
||||
if (rds_message_add_extension(&rm->m_inc.i_hdr,
|
||||
RDS_EXTHDR_RDMA_BYTES,
|
||||
&rdma_bytes_ext_hdr)) {
|
||||
/* rdma bytes ext header was added successfully,
|
||||
* notify the remote side via flag in header
|
||||
*/
|
||||
rm->m_inc.i_hdr.h_flags |=
|
||||
RDS_FLAG_EXTHDR_EXTENSION;
|
||||
} else {
|
||||
rdsdebug("RDS_EXTHDR_RDMA_BYTES dropped");
|
||||
}
|
||||
}
|
||||
if (rm->m_rdma_cookie) {
|
||||
rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
|
||||
rds_rdma_cookie_key(rm->m_rdma_cookie),
|
||||
rds_rdma_cookie_offset(rm->m_rdma_cookie));
|
||||
if (rm->m_rdma_cookie &&
|
||||
!rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
|
||||
rds_rdma_cookie_key(rm->m_rdma_cookie),
|
||||
rds_rdma_cookie_offset(rm->m_rdma_cookie))) {
|
||||
rdsdebug("RDS_EXTHDR_RDMA_DEST dropped\n");
|
||||
}
|
||||
|
||||
/* Note - rds_ib_piggyb_ack clears the ACK_REQUIRED bit, so
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
|
|||
[RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version),
|
||||
[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
|
||||
[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
|
||||
[RDS_EXTHDR_RDMA_BYTES] = sizeof(struct rds_ext_header_rdma_bytes),
|
||||
[RDS_EXTHDR_NPATHS] = sizeof(__be16),
|
||||
[RDS_EXTHDR_GEN_NUM] = sizeof(__be32),
|
||||
};
|
||||
|
|
@ -191,31 +192,69 @@ void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
|
|||
hdr->h_sport = sport;
|
||||
hdr->h_dport = dport;
|
||||
hdr->h_sequence = cpu_to_be64(seq);
|
||||
hdr->h_exthdr[0] = RDS_EXTHDR_NONE;
|
||||
/* see rds_find_next_ext_space for reason why we memset the
|
||||
* ext header
|
||||
*/
|
||||
memset(hdr->h_exthdr, RDS_EXTHDR_NONE, RDS_HEADER_EXT_SPACE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rds_message_populate_header);
|
||||
|
||||
int rds_message_add_extension(struct rds_header *hdr, unsigned int type,
|
||||
const void *data, unsigned int len)
|
||||
/*
|
||||
* Find the next place we can add an RDS header extension with
|
||||
* specific length. Extension headers are pushed one after the
|
||||
* other. In the following, the number after the colon is the number
|
||||
* of bytes:
|
||||
*
|
||||
* [ type1:1 dta1:len1 [ type2:1 dta2:len2 ] ... ] RDS_EXTHDR_NONE
|
||||
*
|
||||
* If the extension headers fill the complete extension header space
|
||||
* (16 bytes), the trailing RDS_EXTHDR_NONE is omitted.
|
||||
*/
|
||||
static int rds_find_next_ext_space(struct rds_header *hdr, unsigned int len,
|
||||
u8 **ext_start)
|
||||
{
|
||||
unsigned int ext_len;
|
||||
unsigned int type;
|
||||
int ind = 0;
|
||||
|
||||
while ((ind + 1 + len) <= RDS_HEADER_EXT_SPACE) {
|
||||
if (hdr->h_exthdr[ind] == RDS_EXTHDR_NONE) {
|
||||
*ext_start = hdr->h_exthdr + ind;
|
||||
return 0;
|
||||
}
|
||||
|
||||
type = hdr->h_exthdr[ind];
|
||||
|
||||
ext_len = (type < __RDS_EXTHDR_MAX) ? rds_exthdr_size[type] : 0;
|
||||
WARN_ONCE(!ext_len, "Unknown ext hdr type %d\n", type);
|
||||
if (!ext_len)
|
||||
return -EINVAL;
|
||||
|
||||
/* ind points to a valid ext hdr with known length */
|
||||
ind += 1 + ext_len;
|
||||
}
|
||||
|
||||
/* no room for extension */
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/* The ext hdr space is prefilled with zero from the kzalloc() */
|
||||
int rds_message_add_extension(struct rds_header *hdr,
|
||||
unsigned int type, const void *data)
|
||||
{
|
||||
unsigned int ext_len = sizeof(u8) + len;
|
||||
unsigned char *dst;
|
||||
unsigned int len;
|
||||
|
||||
/* For now, refuse to add more than one extension header */
|
||||
if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE)
|
||||
len = (type < __RDS_EXTHDR_MAX) ? rds_exthdr_size[type] : 0;
|
||||
if (!len)
|
||||
return 0;
|
||||
|
||||
if (type >= __RDS_EXTHDR_MAX || len != rds_exthdr_size[type])
|
||||
if (rds_find_next_ext_space(hdr, len, &dst))
|
||||
return 0;
|
||||
|
||||
if (ext_len >= RDS_HEADER_EXT_SPACE)
|
||||
return 0;
|
||||
dst = hdr->h_exthdr;
|
||||
|
||||
*dst++ = type;
|
||||
memcpy(dst, data, len);
|
||||
|
||||
dst[len] = RDS_EXTHDR_NONE;
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rds_message_add_extension);
|
||||
|
|
@ -272,7 +311,7 @@ int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 o
|
|||
|
||||
ext_hdr.h_rdma_rkey = cpu_to_be32(r_key);
|
||||
ext_hdr.h_rdma_offset = cpu_to_be32(offset);
|
||||
return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr));
|
||||
return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
|
||||
|
||||
|
|
|
|||
|
|
@ -183,10 +183,11 @@ void rds_conn_net_set(struct rds_connection *conn, struct net *net)
|
|||
write_pnet(&conn->c_net, net);
|
||||
}
|
||||
|
||||
#define RDS_FLAG_CONG_BITMAP 0x01
|
||||
#define RDS_FLAG_ACK_REQUIRED 0x02
|
||||
#define RDS_FLAG_RETRANSMITTED 0x04
|
||||
#define RDS_MAX_ADV_CREDIT 255
|
||||
#define RDS_FLAG_CONG_BITMAP 0x01
|
||||
#define RDS_FLAG_ACK_REQUIRED 0x02
|
||||
#define RDS_FLAG_RETRANSMITTED 0x04
|
||||
#define RDS_FLAG_EXTHDR_EXTENSION 0x20
|
||||
#define RDS_MAX_ADV_CREDIT 255
|
||||
|
||||
/* RDS_FLAG_PROBE_PORT is the reserved sport used for sending a ping
|
||||
* probe to exchange control information before establishing a connection.
|
||||
|
|
@ -258,6 +259,20 @@ struct rds_ext_header_rdma_dest {
|
|||
__be32 h_rdma_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* This extension header tells the peer about delivered RDMA byte count.
|
||||
*/
|
||||
#define RDS_EXTHDR_RDMA_BYTES 4
|
||||
|
||||
struct rds_ext_header_rdma_bytes {
|
||||
__be32 h_rdma_bytes; /* byte count */
|
||||
u8 h_rflags; /* direction of RDMA, write or read */
|
||||
u8 h_pad[3];
|
||||
};
|
||||
|
||||
#define RDS_FLAG_RDMA_WR_BYTES 0x01
|
||||
#define RDS_FLAG_RDMA_RD_BYTES 0x02
|
||||
|
||||
/* Extension header announcing number of paths.
|
||||
* Implicit length = 2 bytes.
|
||||
*/
|
||||
|
|
@ -871,7 +886,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
|
|||
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
|
||||
__be16 dport, u64 seq);
|
||||
int rds_message_add_extension(struct rds_header *hdr,
|
||||
unsigned int type, const void *data, unsigned int len);
|
||||
unsigned int type, const void *data);
|
||||
int rds_message_next_extension(struct rds_header *hdr,
|
||||
unsigned int *pos, void *buf, unsigned int *buflen);
|
||||
int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
|
||||
|
|
|
|||
|
|
@ -1459,12 +1459,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
|
|||
__be32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num);
|
||||
|
||||
rds_message_add_extension(&rm->m_inc.i_hdr,
|
||||
RDS_EXTHDR_NPATHS, &npaths,
|
||||
sizeof(npaths));
|
||||
RDS_EXTHDR_NPATHS, &npaths);
|
||||
rds_message_add_extension(&rm->m_inc.i_hdr,
|
||||
RDS_EXTHDR_GEN_NUM,
|
||||
&my_gen_num,
|
||||
sizeof(u32));
|
||||
&my_gen_num);
|
||||
}
|
||||
spin_unlock_irqrestore(&cp->cp_lock, flags);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue