mirror of
https://github.com/torvalds/linux.git
synced 2026-03-14 02:06:15 +01:00
for-netdev
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmS4IUIACgkQ6rmadz2v bTrVCw/9GG5A5ebqwoh/DrsFXEzpKDmZFIAWd5wB+Fx2i8y+6Jl/Fw6SjkkAtUnc 215T3YX2u3Xg1WFC5zxY9lYm2OeMq2lPHVwjlqgt/pHE8D6b8cZ44eyN+f0ZSiLy wyx0wHLd3oP4KvMyiqm7/ZmhDjAtBpuqMjY5FNsbUxrIGUUI2ZLC4VFVWhnWmzRA eEOQuUge4e1YD62kfkWlT/GEv710ysqFZD2zs4yhevDfmr/6DAIaA7dhfKMYsM/S hCPoCuuXWVoHiqksm0U1BwpEiAQrqR91Sx8RCAakw5Pyp5hkj9dJc9sLwkgMH/k7 2352IIPXddH8cGKQM+hIBrc/io+6MxMbVk7Pe+1OUIBrvP//zQrHWk0zbssF3D8C z6TbxBLdSzbDELPph3gZu5bNaLSkpuODhNjLcIVGSOeSJ5nsgATCQtXFAAPV0E/Q v2O7Te5aTjTOpFMcIrIK1eWXUS56yRA+YwDa1VuWXAiLrr+Rq0tm4tBqxhof3KlH bfCoqFNa12MfpCJURHICcV7DJo53rWbCtDSJPaYwZXb/jJPd3gPb8EVixoLN2A1M dV/ou9rKEEkJXxsZ4Bctuh7t5YwpqxTq74YSdvnkOJ8P1lBDYST2SfHgQVOayQPv XH9MlMO3Qtb9Sl0ZiI7gHbpK7h6v9RvRuHJcnN2e3wwMEx256xE= =VRCb -----END PGP SIGNATURE----- Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next Alexei Starovoitov says: ==================== pull-request: bpf-next 2023-07-19 We've added 45 non-merge commits during the last 3 day(s) which contain a total of 71 files changed, 7808 insertions(+), 592 deletions(-). The main changes are: 1) multi-buffer support in AF_XDP, from Maciej Fijalkowski, Magnus Karlsson, Tirthendu Sarkar. 2) BPF link support for tc BPF programs, from Daniel Borkmann. 3) Enable bpf_map_sum_elem_count kfunc for all program types, from Anton Protopopov. 4) Add 'owner' field to bpf_rb_node to fix races in shared ownership, Dave Marchevsky. 5) Prevent potential skb_header_pointer() misuse, from Alexei Starovoitov. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (45 commits) bpf, net: Introduce skb_pointer_if_linear(). bpf: sync tools/ uapi header with selftests/bpf: Add mprog API tests for BPF tcx links selftests/bpf: Add mprog API tests for BPF tcx opts bpftool: Extend net dump with tcx progs libbpf: Add helper macro to clear opts structs libbpf: Add link-based API for tcx libbpf: Add opts-based attach/detach/query API for tcx bpf: Add fd-based tcx multi-prog infra with link support bpf: Add generic attach/detach/query API for multi-progs selftests/xsk: reset NIC settings to default after running test suite selftests/xsk: add test for too many frags selftests/xsk: add metadata copy test for multi-buff selftests/xsk: add invalid descriptor test for multi-buffer selftests/xsk: add unaligned mode test for multi-buffer selftests/xsk: add basic multi-buffer test selftests/xsk: transmit and receive multi-buffer packets xsk: add multi-buffer documentation i40e: xsk: add TX multi-buffer support ice: xsk: Tx multi-buffer support ... ==================== Link: https://lore.kernel.org/r/20230719175424.75717-1-alexei.starovoitov@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
e93165d5e7
71 changed files with 7830 additions and 614 deletions
|
|
@ -62,6 +62,12 @@ attribute-sets:
|
|||
type: u64
|
||||
enum: xdp-act
|
||||
enum-as-flags: true
|
||||
-
|
||||
name: xdp_zc_max_segs
|
||||
doc: max fragment count supported by ZC driver
|
||||
type: u32
|
||||
checks:
|
||||
min: 1
|
||||
|
||||
operations:
|
||||
list:
|
||||
|
|
|
|||
|
|
@ -462,8 +462,92 @@ XDP_OPTIONS getsockopt
|
|||
Gets options from an XDP socket. The only one supported so far is
|
||||
XDP_OPTIONS_ZEROCOPY which tells you if zero-copy is on or not.
|
||||
|
||||
Multi-Buffer Support
|
||||
====================
|
||||
|
||||
With multi-buffer support, programs using AF_XDP sockets can receive
|
||||
and transmit packets consisting of multiple buffers both in copy and
|
||||
zero-copy mode. For example, a packet can consist of two
|
||||
frames/buffers, one with the header and the other one with the data,
|
||||
or a 9K Ethernet jumbo frame can be constructed by chaining together
|
||||
three 4K frames.
|
||||
|
||||
Some definitions:
|
||||
|
||||
* A packet consists of one or more frames
|
||||
|
||||
* A descriptor in one of the AF_XDP rings always refers to a single
|
||||
frame. In the case the packet consists of a single frame, the
|
||||
descriptor refers to the whole packet.
|
||||
|
||||
To enable multi-buffer support for an AF_XDP socket, use the new bind
|
||||
flag XDP_USE_SG. If this is not provided, all multi-buffer packets
|
||||
will be dropped just as before. Note that the XDP program loaded also
|
||||
needs to be in multi-buffer mode. This can be accomplished by using
|
||||
"xdp.frags" as the section name of the XDP program used.
|
||||
|
||||
To represent a packet consisting of multiple frames, a new flag called
|
||||
XDP_PKT_CONTD is introduced in the options field of the Rx and Tx
|
||||
descriptors. If it is true (1) the packet continues with the next
|
||||
descriptor and if it is false (0) it means this is the last descriptor
|
||||
of the packet. Why the reverse logic of end-of-packet (eop) flag found
|
||||
in many NICs? Just to preserve compatibility with non-multi-buffer
|
||||
applications that have this bit set to false for all packets on Rx,
|
||||
and the apps set the options field to zero for Tx, as anything else
|
||||
will be treated as an invalid descriptor.
|
||||
|
||||
These are the semantics for producing packets onto AF_XDP Tx ring
|
||||
consisting of multiple frames:
|
||||
|
||||
* When an invalid descriptor is found, all the other
|
||||
descriptors/frames of this packet are marked as invalid and not
|
||||
completed. The next descriptor is treated as the start of a new
|
||||
packet, even if this was not the intent (because we cannot guess
|
||||
the intent). As before, if your program is producing invalid
|
||||
descriptors you have a bug that must be fixed.
|
||||
|
||||
* Zero length descriptors are treated as invalid descriptors.
|
||||
|
||||
* For copy mode, the maximum supported number of frames in a packet is
|
||||
equal to CONFIG_MAX_SKB_FRAGS + 1. If it is exceeded, all
|
||||
descriptors accumulated so far are dropped and treated as
|
||||
invalid. To produce an application that will work on any system
|
||||
regardless of this config setting, limit the number of frags to 18,
|
||||
as the minimum value of the config is 17.
|
||||
|
||||
* For zero-copy mode, the limit is up to what the NIC HW
|
||||
supports. Usually at least five on the NICs we have checked. We
|
||||
consciously chose to not enforce a rigid limit (such as
|
||||
CONFIG_MAX_SKB_FRAGS + 1) for zero-copy mode, as it would have
|
||||
resulted in copy actions under the hood to fit into what limit the
|
||||
NIC supports. Kind of defeats the purpose of zero-copy mode. How to
|
||||
probe for this limit is explained in the "probe for multi-buffer
|
||||
support" section.
|
||||
|
||||
On the Rx path in copy-mode, the xsk core copies the XDP data into
|
||||
multiple descriptors, if needed, and sets the XDP_PKT_CONTD flag as
|
||||
detailed before. Zero-copy mode works the same, though the data is not
|
||||
copied. When the application gets a descriptor with the XDP_PKT_CONTD
|
||||
flag set to one, it means that the packet consists of multiple buffers
|
||||
and it continues with the next buffer in the following
|
||||
descriptor. When a descriptor with XDP_PKT_CONTD == 0 is received, it
|
||||
means that this is the last buffer of the packet. AF_XDP guarantees
|
||||
that only a complete packet (all frames in the packet) is sent to the
|
||||
application. If there is not enough space in the AF_XDP Rx ring, all
|
||||
frames of the packet will be dropped.
|
||||
|
||||
If application reads a batch of descriptors, using for example the libxdp
|
||||
interfaces, it is not guaranteed that the batch will end with a full
|
||||
packet. It might end in the middle of a packet and the rest of the
|
||||
buffers of that packet will arrive at the beginning of the next batch,
|
||||
since the libxdp interface does not read the whole ring (unless you
|
||||
have an enormous batch size or a very small ring size).
|
||||
|
||||
An example program each for Rx and Tx multi-buffer support can be found
|
||||
later in this document.
|
||||
|
||||
Usage
|
||||
=====
|
||||
-----
|
||||
|
||||
In order to use AF_XDP sockets two parts are needed. The
|
||||
user-space application and the XDP program. For a complete setup and
|
||||
|
|
@ -541,6 +625,131 @@ like this:
|
|||
But please use the libbpf functions as they are optimized and ready to
|
||||
use. Will make your life easier.
|
||||
|
||||
Usage Multi-Buffer Rx
|
||||
---------------------
|
||||
|
||||
Here is a simple Rx path pseudo-code example (using libxdp interfaces
|
||||
for simplicity). Error paths have been excluded to keep it short:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
void rx_packets(struct xsk_socket_info *xsk)
|
||||
{
|
||||
static bool new_packet = true;
|
||||
u32 idx_rx = 0, idx_fq = 0;
|
||||
static char *pkt;
|
||||
|
||||
int rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
|
||||
|
||||
xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
|
||||
|
||||
for (int i = 0; i < rcvd; i++) {
|
||||
struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
|
||||
char *frag = xsk_umem__get_data(xsk->umem->buffer, desc->addr);
|
||||
bool eop = !(desc->options & XDP_PKT_CONTD);
|
||||
|
||||
if (new_packet)
|
||||
pkt = frag;
|
||||
else
|
||||
add_frag_to_pkt(pkt, frag);
|
||||
|
||||
if (eop)
|
||||
process_pkt(pkt);
|
||||
|
||||
new_packet = eop;
|
||||
|
||||
*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = desc->addr;
|
||||
}
|
||||
|
||||
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
|
||||
xsk_ring_cons__release(&xsk->rx, rcvd);
|
||||
}
|
||||
|
||||
Usage Multi-Buffer Tx
|
||||
---------------------
|
||||
|
||||
Here is an example Tx path pseudo-code (using libxdp interfaces for
|
||||
simplicity) ignoring that the umem is finite in size, and that we
|
||||
eventually will run out of packets to send. Also assumes pkts.addr
|
||||
points to a valid location in the umem.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
void tx_packets(struct xsk_socket_info *xsk, struct pkt *pkts,
|
||||
int batch_size)
|
||||
{
|
||||
u32 idx, i, pkt_nb = 0;
|
||||
|
||||
xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx);
|
||||
|
||||
for (i = 0; i < batch_size;) {
|
||||
u64 addr = pkts[pkt_nb].addr;
|
||||
u32 len = pkts[pkt_nb].size;
|
||||
|
||||
do {
|
||||
struct xdp_desc *tx_desc;
|
||||
|
||||
tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i++);
|
||||
tx_desc->addr = addr;
|
||||
|
||||
if (len > xsk_frame_size) {
|
||||
tx_desc->len = xsk_frame_size;
|
||||
tx_desc->options = XDP_PKT_CONTD;
|
||||
} else {
|
||||
tx_desc->len = len;
|
||||
tx_desc->options = 0;
|
||||
pkt_nb++;
|
||||
}
|
||||
len -= tx_desc->len;
|
||||
addr += xsk_frame_size;
|
||||
|
||||
if (i == batch_size) {
|
||||
/* Remember len, addr, pkt_nb for next iteration.
|
||||
* Skipped for simplicity.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
} while (len);
|
||||
}
|
||||
|
||||
xsk_ring_prod__submit(&xsk->tx, i);
|
||||
}
|
||||
|
||||
Probing for Multi-Buffer Support
|
||||
--------------------------------
|
||||
|
||||
To discover if a driver supports multi-buffer AF_XDP in SKB or DRV
|
||||
mode, use the XDP_FEATURES feature of netlink in linux/netdev.h to
|
||||
query for NETDEV_XDP_ACT_RX_SG support. This is the same flag as for
|
||||
querying for XDP multi-buffer support. If XDP supports multi-buffer in
|
||||
a driver, then AF_XDP will also support that in SKB and DRV mode.
|
||||
|
||||
To discover if a driver supports multi-buffer AF_XDP in zero-copy
|
||||
mode, use XDP_FEATURES and first check the NETDEV_XDP_ACT_XSK_ZEROCOPY
|
||||
flag. If it is set, it means that at least zero-copy is supported and
|
||||
you should go and check the netlink attribute
|
||||
NETDEV_A_DEV_XDP_ZC_MAX_SEGS in linux/netdev.h. An unsigned integer
|
||||
value will be returned stating the max number of frags that are
|
||||
supported by this device in zero-copy mode. These are the possible
|
||||
return values:
|
||||
|
||||
1: Multi-buffer for zero-copy is not supported by this device, as max
|
||||
one fragment supported means that multi-buffer is not possible.
|
||||
|
||||
>=2: Multi-buffer is supported in zero-copy mode for this device. The
|
||||
returned number signifies the max number of frags supported.
|
||||
|
||||
For an example on how these are used through libbpf, please take a
|
||||
look at tools/testing/selftests/bpf/xskxceiver.c.
|
||||
|
||||
Multi-Buffer Support for Zero-Copy Drivers
|
||||
------------------------------------------
|
||||
|
||||
Zero-copy drivers usually use the batched APIs for Rx and Tx
|
||||
processing. Note that the Tx batch API guarantees that it will provide
|
||||
a batch of Tx descriptors that ends with full packet at the end. This
|
||||
to facilitate extending a zero-copy driver with multi-buffer support.
|
||||
|
||||
Sample application
|
||||
==================
|
||||
|
||||
|
|
|
|||
|
|
@ -3684,6 +3684,7 @@ F: include/linux/filter.h
|
|||
F: include/linux/tnum.h
|
||||
F: kernel/bpf/core.c
|
||||
F: kernel/bpf/dispatcher.c
|
||||
F: kernel/bpf/mprog.c
|
||||
F: kernel/bpf/syscall.c
|
||||
F: kernel/bpf/tnum.c
|
||||
F: kernel/bpf/trampoline.c
|
||||
|
|
@ -3777,13 +3778,15 @@ L: netdev@vger.kernel.org
|
|||
S: Maintained
|
||||
F: kernel/bpf/bpf_struct*
|
||||
|
||||
BPF [NETWORKING] (tc BPF, sock_addr)
|
||||
BPF [NETWORKING] (tcx & tc BPF, sock_addr)
|
||||
M: Martin KaFai Lau <martin.lau@linux.dev>
|
||||
M: Daniel Borkmann <daniel@iogearbox.net>
|
||||
R: John Fastabend <john.fastabend@gmail.com>
|
||||
L: bpf@vger.kernel.org
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: include/net/tcx.h
|
||||
F: kernel/bpf/tcx.c
|
||||
F: net/core/filter.c
|
||||
F: net/sched/act_bpf.c
|
||||
F: net/sched/cls_bpf.c
|
||||
|
|
|
|||
|
|
@ -1925,7 +1925,7 @@ static int get_nr_used_regs(const struct btf_func_model *m)
|
|||
static void save_args(const struct btf_func_model *m, u8 **prog,
|
||||
int stack_size, bool for_call_origin)
|
||||
{
|
||||
int arg_regs, first_off, nr_regs = 0, nr_stack_slots = 0;
|
||||
int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0;
|
||||
int i, j;
|
||||
|
||||
/* Store function arguments to stack.
|
||||
|
|
|
|||
|
|
@ -3585,11 +3585,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
|
|||
if (ring->xsk_pool) {
|
||||
ring->rx_buf_len =
|
||||
xsk_pool_get_rx_frame_size(ring->xsk_pool);
|
||||
/* For AF_XDP ZC, we disallow packets to span on
|
||||
* multiple buffers, thus letting us skip that
|
||||
* handling in the fast-path.
|
||||
*/
|
||||
chain_len = 1;
|
||||
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
|
||||
MEM_TYPE_XSK_BUFF_POOL,
|
||||
NULL);
|
||||
|
|
@ -13822,6 +13817,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
|
|||
NETDEV_XDP_ACT_REDIRECT |
|
||||
NETDEV_XDP_ACT_XSK_ZEROCOPY |
|
||||
NETDEV_XDP_ACT_RX_SG;
|
||||
netdev->xdp_zc_max_segs = I40E_MAX_BUFFER_TXD;
|
||||
} else {
|
||||
/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
|
||||
* are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
|
||||
|
|
|
|||
|
|
@ -2284,8 +2284,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
|
|||
* If the buffer is an EOP buffer, this function exits returning false,
|
||||
* otherwise return true indicating that this is in fact a non-EOP buffer.
|
||||
*/
|
||||
static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
|
||||
union i40e_rx_desc *rx_desc)
|
||||
bool i40e_is_non_eop(struct i40e_ring *rx_ring,
|
||||
union i40e_rx_desc *rx_desc)
|
||||
{
|
||||
/* if we are the last buffer then there is nothing else to do */
|
||||
#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)
|
||||
|
|
|
|||
|
|
@ -473,6 +473,8 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
|
|||
bool __i40e_chk_linearize(struct sk_buff *skb);
|
||||
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
|
||||
u32 flags);
|
||||
bool i40e_is_non_eop(struct i40e_ring *rx_ring,
|
||||
union i40e_rx_desc *rx_desc);
|
||||
|
||||
/**
|
||||
* i40e_get_head - Retrieve head from head writeback
|
||||
|
|
|
|||
|
|
@ -294,8 +294,14 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
|
|||
{
|
||||
unsigned int totalsize = xdp->data_end - xdp->data_meta;
|
||||
unsigned int metasize = xdp->data - xdp->data_meta;
|
||||
struct skb_shared_info *sinfo = NULL;
|
||||
struct sk_buff *skb;
|
||||
u32 nr_frags = 0;
|
||||
|
||||
if (unlikely(xdp_buff_has_frags(xdp))) {
|
||||
sinfo = xdp_get_shared_info_from_buff(xdp);
|
||||
nr_frags = sinfo->nr_frags;
|
||||
}
|
||||
net_prefetch(xdp->data_meta);
|
||||
|
||||
/* allocate a skb to store the frags */
|
||||
|
|
@ -312,6 +318,28 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
|
|||
__skb_pull(skb, metasize);
|
||||
}
|
||||
|
||||
if (likely(!xdp_buff_has_frags(xdp)))
|
||||
goto out;
|
||||
|
||||
for (int i = 0; i < nr_frags; i++) {
|
||||
struct skb_shared_info *skinfo = skb_shinfo(skb);
|
||||
skb_frag_t *frag = &sinfo->frags[i];
|
||||
struct page *page;
|
||||
void *addr;
|
||||
|
||||
page = dev_alloc_page();
|
||||
if (!page) {
|
||||
dev_kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
addr = page_to_virt(page);
|
||||
|
||||
memcpy(addr, skb_frag_page(frag), skb_frag_size(frag));
|
||||
|
||||
__skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++,
|
||||
addr, 0, skb_frag_size(frag));
|
||||
}
|
||||
|
||||
out:
|
||||
xsk_buff_free(xdp);
|
||||
return skb;
|
||||
|
|
@ -322,14 +350,13 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
|
|||
union i40e_rx_desc *rx_desc,
|
||||
unsigned int *rx_packets,
|
||||
unsigned int *rx_bytes,
|
||||
unsigned int size,
|
||||
unsigned int xdp_res,
|
||||
bool *failure)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
|
||||
*rx_packets = 1;
|
||||
*rx_bytes = size;
|
||||
*rx_bytes = xdp_get_buff_len(xdp_buff);
|
||||
|
||||
if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX)
|
||||
return;
|
||||
|
|
@ -363,7 +390,6 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
|
|||
return;
|
||||
}
|
||||
|
||||
*rx_bytes = skb->len;
|
||||
i40e_process_skb_fields(rx_ring, rx_desc, skb);
|
||||
napi_gro_receive(&rx_ring->q_vector->napi, skb);
|
||||
return;
|
||||
|
|
@ -374,6 +400,31 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
|
|||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
static int
|
||||
i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first,
|
||||
struct xdp_buff *xdp, const unsigned int size)
|
||||
{
|
||||
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first);
|
||||
|
||||
if (!xdp_buff_has_frags(first)) {
|
||||
sinfo->nr_frags = 0;
|
||||
sinfo->xdp_frags_size = 0;
|
||||
xdp_buff_set_frags_flag(first);
|
||||
}
|
||||
|
||||
if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
|
||||
xsk_buff_free(first);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
|
||||
virt_to_page(xdp->data_hard_start), 0, size);
|
||||
sinfo->xdp_frags_size += size;
|
||||
xsk_buff_add_frag(xdp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
|
||||
* @rx_ring: Rx ring
|
||||
|
|
@ -384,13 +435,18 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
|
|||
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
|
||||
{
|
||||
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
|
||||
u16 next_to_process = rx_ring->next_to_process;
|
||||
u16 next_to_clean = rx_ring->next_to_clean;
|
||||
u16 count_mask = rx_ring->count - 1;
|
||||
unsigned int xdp_res, xdp_xmit = 0;
|
||||
struct xdp_buff *first = NULL;
|
||||
struct bpf_prog *xdp_prog;
|
||||
bool failure = false;
|
||||
u16 cleaned_count;
|
||||
|
||||
if (next_to_process != next_to_clean)
|
||||
first = *i40e_rx_bi(rx_ring, next_to_clean);
|
||||
|
||||
/* NB! xdp_prog will always be !NULL, due to the fact that
|
||||
* this path is enabled by setting an XDP program.
|
||||
*/
|
||||
|
|
@ -404,7 +460,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
|
|||
unsigned int size;
|
||||
u64 qword;
|
||||
|
||||
rx_desc = I40E_RX_DESC(rx_ring, next_to_clean);
|
||||
rx_desc = I40E_RX_DESC(rx_ring, next_to_process);
|
||||
qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
|
||||
|
||||
/* This memory barrier is needed to keep us from reading
|
||||
|
|
@ -417,9 +473,9 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
|
|||
i40e_clean_programming_status(rx_ring,
|
||||
rx_desc->raw.qword[0],
|
||||
qword);
|
||||
bi = *i40e_rx_bi(rx_ring, next_to_clean);
|
||||
bi = *i40e_rx_bi(rx_ring, next_to_process);
|
||||
xsk_buff_free(bi);
|
||||
next_to_clean = (next_to_clean + 1) & count_mask;
|
||||
next_to_process = (next_to_process + 1) & count_mask;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -428,22 +484,35 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
|
|||
if (!size)
|
||||
break;
|
||||
|
||||
bi = *i40e_rx_bi(rx_ring, next_to_clean);
|
||||
bi = *i40e_rx_bi(rx_ring, next_to_process);
|
||||
xsk_buff_set_size(bi, size);
|
||||
xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool);
|
||||
|
||||
xdp_res = i40e_run_xdp_zc(rx_ring, bi, xdp_prog);
|
||||
i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets,
|
||||
&rx_bytes, size, xdp_res, &failure);
|
||||
if (!first)
|
||||
first = bi;
|
||||
else if (i40e_add_xsk_frag(rx_ring, first, bi, size))
|
||||
break;
|
||||
|
||||
next_to_process = (next_to_process + 1) & count_mask;
|
||||
|
||||
if (i40e_is_non_eop(rx_ring, rx_desc))
|
||||
continue;
|
||||
|
||||
xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog);
|
||||
i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets,
|
||||
&rx_bytes, xdp_res, &failure);
|
||||
first->flags = 0;
|
||||
next_to_clean = next_to_process;
|
||||
if (failure)
|
||||
break;
|
||||
total_rx_packets += rx_packets;
|
||||
total_rx_bytes += rx_bytes;
|
||||
xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR);
|
||||
next_to_clean = (next_to_clean + 1) & count_mask;
|
||||
first = NULL;
|
||||
}
|
||||
|
||||
rx_ring->next_to_clean = next_to_clean;
|
||||
rx_ring->next_to_process = next_to_process;
|
||||
cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask;
|
||||
|
||||
if (cleaned_count >= I40E_RX_BUFFER_WRITE)
|
||||
|
|
@ -466,6 +535,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
|
|||
static void i40e_xmit_pkt(struct i40e_ring *xdp_ring, struct xdp_desc *desc,
|
||||
unsigned int *total_bytes)
|
||||
{
|
||||
u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(desc);
|
||||
struct i40e_tx_desc *tx_desc;
|
||||
dma_addr_t dma;
|
||||
|
||||
|
|
@ -474,8 +544,7 @@ static void i40e_xmit_pkt(struct i40e_ring *xdp_ring, struct xdp_desc *desc,
|
|||
|
||||
tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
|
||||
tx_desc->buffer_addr = cpu_to_le64(dma);
|
||||
tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC | I40E_TX_DESC_CMD_EOP,
|
||||
0, desc->len, 0);
|
||||
tx_desc->cmd_type_offset_bsz = build_ctob(cmd, 0, desc->len, 0);
|
||||
|
||||
*total_bytes += desc->len;
|
||||
}
|
||||
|
|
@ -489,14 +558,14 @@ static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *des
|
|||
u32 i;
|
||||
|
||||
loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
|
||||
u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(&desc[i]);
|
||||
|
||||
dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc[i].addr);
|
||||
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc[i].len);
|
||||
|
||||
tx_desc = I40E_TX_DESC(xdp_ring, ntu++);
|
||||
tx_desc->buffer_addr = cpu_to_le64(dma);
|
||||
tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC |
|
||||
I40E_TX_DESC_CMD_EOP,
|
||||
0, desc[i].len, 0);
|
||||
tx_desc->cmd_type_offset_bsz = build_ctob(cmd, 0, desc[i].len, 0);
|
||||
|
||||
*total_bytes += desc[i].len;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -408,7 +408,6 @@ static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
|
|||
*/
|
||||
static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
|
||||
{
|
||||
int chain_len = ICE_MAX_CHAINED_RX_BUFS;
|
||||
struct ice_vsi *vsi = ring->vsi;
|
||||
u32 rxdid = ICE_RXDID_FLEX_NIC;
|
||||
struct ice_rlan_ctx rlan_ctx;
|
||||
|
|
@ -472,17 +471,11 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
|
|||
*/
|
||||
rlan_ctx.showiv = 0;
|
||||
|
||||
/* For AF_XDP ZC, we disallow packets to span on
|
||||
* multiple buffers, thus letting us skip that
|
||||
* handling in the fast-path.
|
||||
*/
|
||||
if (ring->xsk_pool)
|
||||
chain_len = 1;
|
||||
/* Max packet size for this queue - must not be set to a larger value
|
||||
* than 5 x DBUF
|
||||
*/
|
||||
rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
|
||||
chain_len * ring->rx_buf_len);
|
||||
ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len);
|
||||
|
||||
/* Rx queue threshold in units of 64 */
|
||||
rlan_ctx.lrxqthresh = 1;
|
||||
|
|
|
|||
|
|
@ -3392,6 +3392,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
|
|||
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
|
||||
NETDEV_XDP_ACT_XSK_ZEROCOPY |
|
||||
NETDEV_XDP_ACT_RX_SG;
|
||||
netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -545,19 +545,6 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
|
|||
return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
|
||||
}
|
||||
|
||||
/**
|
||||
* ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
|
||||
* @rx_ring: Rx ring
|
||||
*/
|
||||
static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
|
||||
{
|
||||
int ntc = rx_ring->next_to_clean + 1;
|
||||
|
||||
ntc = (ntc < rx_ring->count) ? ntc : 0;
|
||||
rx_ring->next_to_clean = ntc;
|
||||
prefetch(ICE_RX_DESC(rx_ring, ntc));
|
||||
}
|
||||
|
||||
/**
|
||||
* ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
|
||||
* @rx_ring: Rx ring
|
||||
|
|
@ -572,8 +559,14 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
|
|||
{
|
||||
unsigned int totalsize = xdp->data_end - xdp->data_meta;
|
||||
unsigned int metasize = xdp->data - xdp->data_meta;
|
||||
struct skb_shared_info *sinfo = NULL;
|
||||
struct sk_buff *skb;
|
||||
u32 nr_frags = 0;
|
||||
|
||||
if (unlikely(xdp_buff_has_frags(xdp))) {
|
||||
sinfo = xdp_get_shared_info_from_buff(xdp);
|
||||
nr_frags = sinfo->nr_frags;
|
||||
}
|
||||
net_prefetch(xdp->data_meta);
|
||||
|
||||
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
|
||||
|
|
@ -589,6 +582,29 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
|
|||
__skb_pull(skb, metasize);
|
||||
}
|
||||
|
||||
if (likely(!xdp_buff_has_frags(xdp)))
|
||||
goto out;
|
||||
|
||||
for (int i = 0; i < nr_frags; i++) {
|
||||
struct skb_shared_info *skinfo = skb_shinfo(skb);
|
||||
skb_frag_t *frag = &sinfo->frags[i];
|
||||
struct page *page;
|
||||
void *addr;
|
||||
|
||||
page = dev_alloc_page();
|
||||
if (!page) {
|
||||
dev_kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
addr = page_to_virt(page);
|
||||
|
||||
memcpy(addr, skb_frag_page(frag), skb_frag_size(frag));
|
||||
|
||||
__skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++,
|
||||
addr, 0, skb_frag_size(frag));
|
||||
}
|
||||
|
||||
out:
|
||||
xsk_buff_free(xdp);
|
||||
return skb;
|
||||
}
|
||||
|
|
@ -597,7 +613,7 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
|
|||
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
|
||||
* @xdp_ring: XDP Tx ring
|
||||
*/
|
||||
static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
|
||||
static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
|
||||
{
|
||||
u16 ntc = xdp_ring->next_to_clean;
|
||||
struct ice_tx_desc *tx_desc;
|
||||
|
|
@ -619,7 +635,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
|
|||
}
|
||||
|
||||
if (!completed_frames)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
if (likely(!xdp_ring->xdp_tx_active)) {
|
||||
xsk_frames = completed_frames;
|
||||
|
|
@ -649,6 +665,8 @@ skip:
|
|||
xdp_ring->next_to_clean -= cnt;
|
||||
if (xsk_frames)
|
||||
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
|
||||
|
||||
return completed_frames;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -666,37 +684,72 @@ skip:
|
|||
static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
|
||||
struct ice_tx_ring *xdp_ring)
|
||||
{
|
||||
struct skb_shared_info *sinfo = NULL;
|
||||
u32 size = xdp->data_end - xdp->data;
|
||||
u32 ntu = xdp_ring->next_to_use;
|
||||
struct ice_tx_desc *tx_desc;
|
||||
struct ice_tx_buf *tx_buf;
|
||||
dma_addr_t dma;
|
||||
struct xdp_buff *head;
|
||||
u32 nr_frags = 0;
|
||||
u32 free_space;
|
||||
u32 frag = 0;
|
||||
|
||||
if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) {
|
||||
ice_clean_xdp_irq_zc(xdp_ring);
|
||||
if (!ICE_DESC_UNUSED(xdp_ring)) {
|
||||
xdp_ring->ring_stats->tx_stats.tx_busy++;
|
||||
return ICE_XDP_CONSUMED;
|
||||
}
|
||||
free_space = ICE_DESC_UNUSED(xdp_ring);
|
||||
if (free_space < ICE_RING_QUARTER(xdp_ring))
|
||||
free_space += ice_clean_xdp_irq_zc(xdp_ring);
|
||||
|
||||
if (unlikely(!free_space))
|
||||
goto busy;
|
||||
|
||||
if (unlikely(xdp_buff_has_frags(xdp))) {
|
||||
sinfo = xdp_get_shared_info_from_buff(xdp);
|
||||
nr_frags = sinfo->nr_frags;
|
||||
if (free_space < nr_frags + 1)
|
||||
goto busy;
|
||||
}
|
||||
|
||||
dma = xsk_buff_xdp_get_dma(xdp);
|
||||
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
|
||||
|
||||
tx_buf = &xdp_ring->tx_buf[ntu];
|
||||
tx_buf->xdp = xdp;
|
||||
tx_buf->type = ICE_TX_BUF_XSK_TX;
|
||||
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
|
||||
tx_desc->buf_addr = cpu_to_le64(dma);
|
||||
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
|
||||
0, size, 0);
|
||||
xdp_ring->xdp_tx_active++;
|
||||
tx_buf = &xdp_ring->tx_buf[ntu];
|
||||
head = xdp;
|
||||
|
||||
for (;;) {
|
||||
dma_addr_t dma;
|
||||
|
||||
dma = xsk_buff_xdp_get_dma(xdp);
|
||||
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
|
||||
|
||||
tx_buf->xdp = xdp;
|
||||
tx_buf->type = ICE_TX_BUF_XSK_TX;
|
||||
tx_desc->buf_addr = cpu_to_le64(dma);
|
||||
tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
|
||||
/* account for each xdp_buff from xsk_buff_pool */
|
||||
xdp_ring->xdp_tx_active++;
|
||||
|
||||
if (++ntu == xdp_ring->count)
|
||||
ntu = 0;
|
||||
|
||||
if (frag == nr_frags)
|
||||
break;
|
||||
|
||||
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
|
||||
tx_buf = &xdp_ring->tx_buf[ntu];
|
||||
|
||||
xdp = xsk_buff_get_frag(head);
|
||||
size = skb_frag_size(&sinfo->frags[frag]);
|
||||
frag++;
|
||||
}
|
||||
|
||||
if (++ntu == xdp_ring->count)
|
||||
ntu = 0;
|
||||
xdp_ring->next_to_use = ntu;
|
||||
/* update last descriptor from a frame with EOP */
|
||||
tx_desc->cmd_type_offset_bsz |=
|
||||
cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S);
|
||||
|
||||
return ICE_XDP_TX;
|
||||
|
||||
busy:
|
||||
xdp_ring->ring_stats->tx_stats.tx_busy++;
|
||||
|
||||
return ICE_XDP_CONSUMED;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -752,6 +805,34 @@ out_failure:
|
|||
return result;
|
||||
}
|
||||
|
||||
static int
|
||||
ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first,
|
||||
struct xdp_buff *xdp, const unsigned int size)
|
||||
{
|
||||
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first);
|
||||
|
||||
if (!size)
|
||||
return 0;
|
||||
|
||||
if (!xdp_buff_has_frags(first)) {
|
||||
sinfo->nr_frags = 0;
|
||||
sinfo->xdp_frags_size = 0;
|
||||
xdp_buff_set_frags_flag(first);
|
||||
}
|
||||
|
||||
if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
|
||||
xsk_buff_free(first);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
|
||||
virt_to_page(xdp->data_hard_start), 0, size);
|
||||
sinfo->xdp_frags_size += size;
|
||||
xsk_buff_add_frag(xdp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ice_clean_rx_irq_zc - consumes packets from the hardware ring
|
||||
* @rx_ring: AF_XDP Rx ring
|
||||
|
|
@ -762,9 +843,14 @@ out_failure:
|
|||
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
|
||||
{
|
||||
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
|
||||
struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool;
|
||||
u32 ntc = rx_ring->next_to_clean;
|
||||
u32 ntu = rx_ring->next_to_use;
|
||||
struct xdp_buff *first = NULL;
|
||||
struct ice_tx_ring *xdp_ring;
|
||||
unsigned int xdp_xmit = 0;
|
||||
struct bpf_prog *xdp_prog;
|
||||
u32 cnt = rx_ring->count;
|
||||
bool failure = false;
|
||||
int entries_to_alloc;
|
||||
|
||||
|
|
@ -774,6 +860,9 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
|
|||
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
|
||||
xdp_ring = rx_ring->xdp_ring;
|
||||
|
||||
if (ntc != rx_ring->first_desc)
|
||||
first = *ice_xdp_buf(rx_ring, rx_ring->first_desc);
|
||||
|
||||
while (likely(total_rx_packets < (unsigned int)budget)) {
|
||||
union ice_32b_rx_flex_desc *rx_desc;
|
||||
unsigned int size, xdp_res = 0;
|
||||
|
|
@ -783,7 +872,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
|
|||
u16 vlan_tag = 0;
|
||||
u16 rx_ptype;
|
||||
|
||||
rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
|
||||
rx_desc = ICE_RX_DESC(rx_ring, ntc);
|
||||
|
||||
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
|
||||
if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
|
||||
|
|
@ -795,51 +884,61 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
|
|||
*/
|
||||
dma_rmb();
|
||||
|
||||
if (unlikely(rx_ring->next_to_clean == rx_ring->next_to_use))
|
||||
if (unlikely(ntc == ntu))
|
||||
break;
|
||||
|
||||
xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
|
||||
xdp = *ice_xdp_buf(rx_ring, ntc);
|
||||
|
||||
size = le16_to_cpu(rx_desc->wb.pkt_len) &
|
||||
ICE_RX_FLX_DESC_PKT_LEN_M;
|
||||
if (!size) {
|
||||
xdp->data = NULL;
|
||||
xdp->data_end = NULL;
|
||||
xdp->data_hard_start = NULL;
|
||||
xdp->data_meta = NULL;
|
||||
goto construct_skb;
|
||||
}
|
||||
|
||||
xsk_buff_set_size(xdp, size);
|
||||
xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(xdp, xsk_pool);
|
||||
|
||||
xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
|
||||
if (!first) {
|
||||
first = xdp;
|
||||
xdp_buff_clear_frags_flag(first);
|
||||
} else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (++ntc == cnt)
|
||||
ntc = 0;
|
||||
|
||||
if (ice_is_non_eop(rx_ring, rx_desc))
|
||||
continue;
|
||||
|
||||
xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
|
||||
if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
|
||||
xdp_xmit |= xdp_res;
|
||||
} else if (xdp_res == ICE_XDP_EXIT) {
|
||||
failure = true;
|
||||
first = NULL;
|
||||
rx_ring->first_desc = ntc;
|
||||
break;
|
||||
} else if (xdp_res == ICE_XDP_CONSUMED) {
|
||||
xsk_buff_free(xdp);
|
||||
xsk_buff_free(first);
|
||||
} else if (xdp_res == ICE_XDP_PASS) {
|
||||
goto construct_skb;
|
||||
}
|
||||
|
||||
total_rx_bytes += size;
|
||||
total_rx_bytes += xdp_get_buff_len(first);
|
||||
total_rx_packets++;
|
||||
|
||||
ice_bump_ntc(rx_ring);
|
||||
first = NULL;
|
||||
rx_ring->first_desc = ntc;
|
||||
continue;
|
||||
|
||||
construct_skb:
|
||||
/* XDP_PASS path */
|
||||
skb = ice_construct_skb_zc(rx_ring, xdp);
|
||||
skb = ice_construct_skb_zc(rx_ring, first);
|
||||
if (!skb) {
|
||||
rx_ring->ring_stats->rx_stats.alloc_buf_failed++;
|
||||
break;
|
||||
}
|
||||
|
||||
ice_bump_ntc(rx_ring);
|
||||
first = NULL;
|
||||
rx_ring->first_desc = ntc;
|
||||
|
||||
if (eth_skb_pad(skb)) {
|
||||
skb = NULL;
|
||||
|
|
@ -858,18 +957,22 @@ construct_skb:
|
|||
ice_receive_skb(rx_ring, skb, vlan_tag);
|
||||
}
|
||||
|
||||
entries_to_alloc = ICE_DESC_UNUSED(rx_ring);
|
||||
rx_ring->next_to_clean = ntc;
|
||||
entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring);
|
||||
if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
|
||||
failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
|
||||
|
||||
ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0);
|
||||
ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
|
||||
|
||||
if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
|
||||
if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
|
||||
xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
|
||||
if (xsk_uses_need_wakeup(xsk_pool)) {
|
||||
/* ntu could have changed when allocating entries above, so
|
||||
* use rx_ring value instead of stack based one
|
||||
*/
|
||||
if (failure || ntc == rx_ring->next_to_use)
|
||||
xsk_set_rx_need_wakeup(xsk_pool);
|
||||
else
|
||||
xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
|
||||
xsk_clear_rx_need_wakeup(xsk_pool);
|
||||
|
||||
return (int)total_rx_packets;
|
||||
}
|
||||
|
|
@ -894,7 +997,7 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
|
|||
|
||||
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
|
||||
tx_desc->buf_addr = cpu_to_le64(dma);
|
||||
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
|
||||
tx_desc->cmd_type_offset_bsz = ice_build_ctob(xsk_is_eop_desc(desc),
|
||||
0, desc->len, 0);
|
||||
|
||||
*total_bytes += desc->len;
|
||||
|
|
@ -921,7 +1024,7 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
|
|||
|
||||
tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
|
||||
tx_desc->buf_addr = cpu_to_le64(dma);
|
||||
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
|
||||
tx_desc->cmd_type_offset_bsz = ice_build_ctob(xsk_is_eop_desc(&descs[i]),
|
||||
0, descs[i].len, 0);
|
||||
|
||||
*total_bytes += descs[i].len;
|
||||
|
|
|
|||
|
|
@ -228,6 +228,18 @@ struct btf_record {
|
|||
struct btf_field fields[];
|
||||
};
|
||||
|
||||
/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */
|
||||
struct bpf_rb_node_kern {
|
||||
struct rb_node rb_node;
|
||||
void *owner;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */
|
||||
struct bpf_list_node_kern {
|
||||
struct list_head list_head;
|
||||
void *owner;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_map {
|
||||
/* The first two cachelines with read-mostly members of which some
|
||||
* are also accessed in fast-path (e.g. ops, max_entries).
|
||||
|
|
|
|||
327
include/linux/bpf_mprog.h
Normal file
327
include/linux/bpf_mprog.h
Normal file
|
|
@ -0,0 +1,327 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2023 Isovalent */
|
||||
#ifndef __BPF_MPROG_H
|
||||
#define __BPF_MPROG_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
|
||||
/* bpf_mprog framework:
|
||||
*
|
||||
* bpf_mprog is a generic layer for multi-program attachment. In-kernel users
|
||||
* of the bpf_mprog don't need to care about the dependency resolution
|
||||
* internals, they can just consume it with few API calls. Currently available
|
||||
* dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of
|
||||
* a BPF program or BPF link relative to an existing BPF program or BPF link
|
||||
* inside the multi-program array as well as prepend and append behavior if
|
||||
* no relative object was specified, see corresponding selftests for concrete
|
||||
* examples (e.g. tc_links and tc_opts test cases of test_progs).
|
||||
*
|
||||
* Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code:
|
||||
*
|
||||
* Attach case:
|
||||
*
|
||||
* struct bpf_mprog_entry *entry, *entry_new;
|
||||
* int ret;
|
||||
*
|
||||
* // bpf_mprog user-side lock
|
||||
* // fetch active @entry from attach location
|
||||
* [...]
|
||||
* ret = bpf_mprog_attach(entry, &entry_new, [...]);
|
||||
* if (!ret) {
|
||||
* if (entry != entry_new) {
|
||||
* // swap @entry to @entry_new at attach location
|
||||
* // ensure there are no inflight users of @entry:
|
||||
* synchronize_rcu();
|
||||
* }
|
||||
* bpf_mprog_commit(entry);
|
||||
* } else {
|
||||
* // error path, bail out, propagate @ret
|
||||
* }
|
||||
* // bpf_mprog user-side unlock
|
||||
*
|
||||
* Detach case:
|
||||
*
|
||||
* struct bpf_mprog_entry *entry, *entry_new;
|
||||
* int ret;
|
||||
*
|
||||
* // bpf_mprog user-side lock
|
||||
* // fetch active @entry from attach location
|
||||
* [...]
|
||||
* ret = bpf_mprog_detach(entry, &entry_new, [...]);
|
||||
* if (!ret) {
|
||||
* // all (*) marked is optional and depends on the use-case
|
||||
* // whether bpf_mprog_bundle should be freed or not
|
||||
* if (!bpf_mprog_total(entry_new)) (*)
|
||||
* entry_new = NULL (*)
|
||||
* // swap @entry to @entry_new at attach location
|
||||
* // ensure there are no inflight users of @entry:
|
||||
* synchronize_rcu();
|
||||
* bpf_mprog_commit(entry);
|
||||
* if (!entry_new) (*)
|
||||
* // free bpf_mprog_bundle (*)
|
||||
* } else {
|
||||
* // error path, bail out, propagate @ret
|
||||
* }
|
||||
* // bpf_mprog user-side unlock
|
||||
*
|
||||
* Query case:
|
||||
*
|
||||
* struct bpf_mprog_entry *entry;
|
||||
* int ret;
|
||||
*
|
||||
* // bpf_mprog user-side lock
|
||||
* // fetch active @entry from attach location
|
||||
* [...]
|
||||
* ret = bpf_mprog_query(attr, uattr, entry);
|
||||
* // bpf_mprog user-side unlock
|
||||
*
|
||||
* Data/fast path:
|
||||
*
|
||||
* struct bpf_mprog_entry *entry;
|
||||
* struct bpf_mprog_fp *fp;
|
||||
* struct bpf_prog *prog;
|
||||
* int ret = [...];
|
||||
*
|
||||
* rcu_read_lock();
|
||||
* // fetch active @entry from attach location
|
||||
* [...]
|
||||
* bpf_mprog_foreach_prog(entry, fp, prog) {
|
||||
* ret = bpf_prog_run(prog, [...]);
|
||||
* // process @ret from program
|
||||
* }
|
||||
* [...]
|
||||
* rcu_read_unlock();
|
||||
*
|
||||
* bpf_mprog locking considerations:
|
||||
*
|
||||
* bpf_mprog_{attach,detach,query}() must be protected by an external lock
|
||||
* (like RTNL in case of tcx).
|
||||
*
|
||||
* bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx
|
||||
* the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets
|
||||
* updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of
|
||||
* the bpf_mprog_bundle.
|
||||
*
|
||||
* Fast path accesses the active bpf_mprog_entry within RCU critical section
|
||||
* (in case of tcx it runs in NAPI which provides RCU protection there,
|
||||
* other users might need explicit rcu_read_lock()). The bpf_mprog_commit()
|
||||
* assumes that for the old bpf_mprog_entry there are no inflight users
|
||||
* anymore.
|
||||
*
|
||||
* The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for
|
||||
* the replacement case where we don't swap the bpf_mprog_entry.
|
||||
*/
|
||||
|
||||
#define bpf_mprog_foreach_tuple(entry, fp, cp, t) \
|
||||
for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\
|
||||
({ \
|
||||
t.prog = READ_ONCE(fp->prog); \
|
||||
t.link = cp->link; \
|
||||
t.prog; \
|
||||
}); \
|
||||
fp++, cp++)
|
||||
|
||||
#define bpf_mprog_foreach_prog(entry, fp, p) \
|
||||
for (fp = &entry->fp_items[0]; \
|
||||
(p = READ_ONCE(fp->prog)); \
|
||||
fp++)
|
||||
|
||||
#define BPF_MPROG_MAX 64
|
||||
|
||||
struct bpf_mprog_fp {
|
||||
struct bpf_prog *prog;
|
||||
};
|
||||
|
||||
struct bpf_mprog_cp {
|
||||
struct bpf_link *link;
|
||||
};
|
||||
|
||||
struct bpf_mprog_entry {
|
||||
struct bpf_mprog_fp fp_items[BPF_MPROG_MAX];
|
||||
struct bpf_mprog_bundle *parent;
|
||||
};
|
||||
|
||||
struct bpf_mprog_bundle {
|
||||
struct bpf_mprog_entry a;
|
||||
struct bpf_mprog_entry b;
|
||||
struct bpf_mprog_cp cp_items[BPF_MPROG_MAX];
|
||||
struct bpf_prog *ref;
|
||||
atomic64_t revision;
|
||||
u32 count;
|
||||
};
|
||||
|
||||
struct bpf_tuple {
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_link *link;
|
||||
};
|
||||
|
||||
static inline struct bpf_mprog_entry *
|
||||
bpf_mprog_peer(const struct bpf_mprog_entry *entry)
|
||||
{
|
||||
if (entry == &entry->parent->a)
|
||||
return &entry->parent->b;
|
||||
else
|
||||
return &entry->parent->a;
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64));
|
||||
BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) !=
|
||||
ARRAY_SIZE(bundle->cp_items));
|
||||
|
||||
memset(bundle, 0, sizeof(*bundle));
|
||||
atomic64_set(&bundle->revision, 1);
|
||||
bundle->a.parent = bundle;
|
||||
bundle->b.parent = bundle;
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
entry->parent->count++;
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
entry->parent->count--;
|
||||
}
|
||||
|
||||
static inline int bpf_mprog_max(void)
|
||||
{
|
||||
return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1;
|
||||
}
|
||||
|
||||
static inline int bpf_mprog_total(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
int total = entry->parent->count;
|
||||
|
||||
WARN_ON_ONCE(total > bpf_mprog_max());
|
||||
return total;
|
||||
}
|
||||
|
||||
static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
const struct bpf_mprog_fp *fp;
|
||||
const struct bpf_prog *tmp;
|
||||
|
||||
bpf_mprog_foreach_prog(entry, fp, tmp) {
|
||||
if (tmp == prog)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry,
|
||||
struct bpf_tuple *tuple)
|
||||
{
|
||||
WARN_ON_ONCE(entry->parent->ref);
|
||||
if (!tuple->link)
|
||||
entry->parent->ref = tuple->prog;
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
/* In the non-link case prog deletions can only drop the reference
|
||||
* to the prog after the bpf_mprog_entry got swapped and the
|
||||
* bpf_mprog ensured that there are no inflight users anymore.
|
||||
*
|
||||
* Paired with bpf_mprog_mark_for_release().
|
||||
*/
|
||||
if (entry->parent->ref) {
|
||||
bpf_prog_put(entry->parent->ref);
|
||||
entry->parent->ref = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
atomic64_inc(&entry->parent->revision);
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
bpf_mprog_complete_release(entry);
|
||||
bpf_mprog_revision_new(entry);
|
||||
}
|
||||
|
||||
static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
return atomic64_read(&entry->parent->revision);
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst,
|
||||
struct bpf_mprog_entry *src)
|
||||
{
|
||||
memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items));
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx)
|
||||
{
|
||||
int total = bpf_mprog_total(entry);
|
||||
|
||||
memmove(entry->fp_items + idx + 1,
|
||||
entry->fp_items + idx,
|
||||
(total - idx) * sizeof(struct bpf_mprog_fp));
|
||||
|
||||
memmove(entry->parent->cp_items + idx + 1,
|
||||
entry->parent->cp_items + idx,
|
||||
(total - idx) * sizeof(struct bpf_mprog_cp));
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx)
|
||||
{
|
||||
/* Total array size is needed in this case to enure the NULL
|
||||
* entry is copied at the end.
|
||||
*/
|
||||
int total = ARRAY_SIZE(entry->fp_items);
|
||||
|
||||
memmove(entry->fp_items + idx,
|
||||
entry->fp_items + idx + 1,
|
||||
(total - idx - 1) * sizeof(struct bpf_mprog_fp));
|
||||
|
||||
memmove(entry->parent->cp_items + idx,
|
||||
entry->parent->cp_items + idx + 1,
|
||||
(total - idx - 1) * sizeof(struct bpf_mprog_cp));
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx,
|
||||
struct bpf_mprog_fp **fp,
|
||||
struct bpf_mprog_cp **cp)
|
||||
{
|
||||
*fp = &entry->fp_items[idx];
|
||||
*cp = &entry->parent->cp_items[idx];
|
||||
}
|
||||
|
||||
static inline void bpf_mprog_write(struct bpf_mprog_fp *fp,
|
||||
struct bpf_mprog_cp *cp,
|
||||
struct bpf_tuple *tuple)
|
||||
{
|
||||
WRITE_ONCE(fp->prog, tuple->prog);
|
||||
cp->link = tuple->link;
|
||||
}
|
||||
|
||||
int bpf_mprog_attach(struct bpf_mprog_entry *entry,
|
||||
struct bpf_mprog_entry **entry_new,
|
||||
struct bpf_prog *prog_new, struct bpf_link *link,
|
||||
struct bpf_prog *prog_old,
|
||||
u32 flags, u32 id_or_fd, u64 revision);
|
||||
|
||||
int bpf_mprog_detach(struct bpf_mprog_entry *entry,
|
||||
struct bpf_mprog_entry **entry_new,
|
||||
struct bpf_prog *prog, struct bpf_link *link,
|
||||
u32 flags, u32 id_or_fd, u64 revision);
|
||||
|
||||
int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
|
||||
struct bpf_mprog_entry *entry);
|
||||
|
||||
static inline bool bpf_mprog_supported(enum bpf_prog_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BPF_PROG_TYPE_SCHED_CLS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif /* __BPF_MPROG_H */
|
||||
|
|
@ -267,5 +267,6 @@ MAX_BTF_TRACING_TYPE,
|
|||
extern u32 btf_tracing_ids[];
|
||||
extern u32 bpf_cgroup_btf_id[];
|
||||
extern u32 bpf_local_storage_map_btf_id[];
|
||||
extern u32 btf_bpf_map_id[];
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1930,8 +1930,7 @@ enum netdev_ml_priv_type {
|
|||
*
|
||||
* @rx_handler: handler for received packets
|
||||
* @rx_handler_data: XXX: need comments on this one
|
||||
* @miniq_ingress: ingress/clsact qdisc specific data for
|
||||
* ingress processing
|
||||
* @tcx_ingress: BPF & clsact qdisc specific data for ingress processing
|
||||
* @ingress_queue: XXX: need comments on this one
|
||||
* @nf_hooks_ingress: netfilter hooks executed for ingress packets
|
||||
* @broadcast: hw bcast address
|
||||
|
|
@ -1952,8 +1951,7 @@ enum netdev_ml_priv_type {
|
|||
* @xps_maps: all CPUs/RXQs maps for XPS device
|
||||
*
|
||||
* @xps_maps: XXX: need comments on this one
|
||||
* @miniq_egress: clsact qdisc specific data for
|
||||
* egress processing
|
||||
* @tcx_egress: BPF & clsact qdisc specific data for egress processing
|
||||
* @nf_hooks_egress: netfilter hooks executed for egress packets
|
||||
* @qdisc_hash: qdisc hash table
|
||||
* @watchdog_timeo: Represents the timeout that is used by
|
||||
|
|
@ -2250,11 +2248,11 @@ struct net_device {
|
|||
#define GRO_MAX_SIZE (8 * 65535u)
|
||||
unsigned int gro_max_size;
|
||||
unsigned int gro_ipv4_max_size;
|
||||
unsigned int xdp_zc_max_segs;
|
||||
rx_handler_func_t __rcu *rx_handler;
|
||||
void __rcu *rx_handler_data;
|
||||
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
struct mini_Qdisc __rcu *miniq_ingress;
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
struct bpf_mprog_entry __rcu *tcx_ingress;
|
||||
#endif
|
||||
struct netdev_queue __rcu *ingress_queue;
|
||||
#ifdef CONFIG_NETFILTER_INGRESS
|
||||
|
|
@ -2282,8 +2280,8 @@ struct net_device {
|
|||
#ifdef CONFIG_XPS
|
||||
struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
|
||||
#endif
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
struct mini_Qdisc __rcu *miniq_egress;
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
struct bpf_mprog_entry __rcu *tcx_egress;
|
||||
#endif
|
||||
#ifdef CONFIG_NETFILTER_EGRESS
|
||||
struct nf_hook_entries __rcu *nf_hooks_egress;
|
||||
|
|
|
|||
|
|
@ -944,7 +944,7 @@ struct sk_buff {
|
|||
__u8 __mono_tc_offset[0];
|
||||
/* public: */
|
||||
__u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
|
||||
__u8 tc_skip_classify:1;
|
||||
#endif
|
||||
|
|
@ -993,7 +993,7 @@ struct sk_buff {
|
|||
__u8 csum_not_inet:1;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NET_SCHED
|
||||
#if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
|
||||
__u16 tc_index; /* traffic control index */
|
||||
#endif
|
||||
|
||||
|
|
@ -4023,7 +4023,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
|
|||
if (likely(hlen - offset >= len))
|
||||
return (void *)data + offset;
|
||||
|
||||
if (!skb || !buffer || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
|
||||
if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
|
||||
return NULL;
|
||||
|
||||
return buffer;
|
||||
|
|
@ -4036,6 +4036,14 @@ skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer)
|
|||
skb_headlen(skb), buffer);
|
||||
}
|
||||
|
||||
static inline void * __must_check
|
||||
skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len)
|
||||
{
|
||||
if (likely(skb_headlen(skb) - offset >= len))
|
||||
return skb->data + offset;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_needs_linearize - check if we need to linearize a given skb
|
||||
* depending on the given device features.
|
||||
|
|
|
|||
|
|
@ -703,7 +703,7 @@ int skb_do_redirect(struct sk_buff *);
|
|||
|
||||
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
return skb->tc_at_ingress;
|
||||
#else
|
||||
return false;
|
||||
|
|
|
|||
206
include/net/tcx.h
Normal file
206
include/net/tcx.h
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2023 Isovalent */
|
||||
#ifndef __NET_TCX_H
|
||||
#define __NET_TCX_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_mprog.h>
|
||||
|
||||
#include <net/sch_generic.h>
|
||||
|
||||
struct mini_Qdisc;
|
||||
|
||||
struct tcx_entry {
|
||||
struct mini_Qdisc __rcu *miniq;
|
||||
struct bpf_mprog_bundle bundle;
|
||||
bool miniq_active;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct tcx_link {
|
||||
struct bpf_link link;
|
||||
struct net_device *dev;
|
||||
u32 location;
|
||||
};
|
||||
|
||||
static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress)
|
||||
{
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
skb->tc_at_ingress = ingress;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
struct bpf_mprog_bundle *bundle = entry->parent;
|
||||
|
||||
return container_of(bundle, struct tcx_entry, bundle);
|
||||
}
|
||||
|
||||
static inline struct tcx_link *tcx_link(struct bpf_link *link)
|
||||
{
|
||||
return container_of(link, struct tcx_link, link);
|
||||
}
|
||||
|
||||
static inline const struct tcx_link *tcx_link_const(const struct bpf_link *link)
|
||||
{
|
||||
return tcx_link((struct bpf_link *)link);
|
||||
}
|
||||
|
||||
void tcx_inc(void);
|
||||
void tcx_dec(void);
|
||||
|
||||
static inline void tcx_entry_sync(void)
|
||||
{
|
||||
/* bpf_mprog_entry got a/b swapped, therefore ensure that
|
||||
* there are no inflight users on the old one anymore.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
static inline void
|
||||
tcx_entry_update(struct net_device *dev, struct bpf_mprog_entry *entry,
|
||||
bool ingress)
|
||||
{
|
||||
ASSERT_RTNL();
|
||||
if (ingress)
|
||||
rcu_assign_pointer(dev->tcx_ingress, entry);
|
||||
else
|
||||
rcu_assign_pointer(dev->tcx_egress, entry);
|
||||
}
|
||||
|
||||
static inline struct bpf_mprog_entry *
|
||||
tcx_entry_fetch(struct net_device *dev, bool ingress)
|
||||
{
|
||||
ASSERT_RTNL();
|
||||
if (ingress)
|
||||
return rcu_dereference_rtnl(dev->tcx_ingress);
|
||||
else
|
||||
return rcu_dereference_rtnl(dev->tcx_egress);
|
||||
}
|
||||
|
||||
static inline struct bpf_mprog_entry *tcx_entry_create(void)
|
||||
{
|
||||
struct tcx_entry *tcx = kzalloc(sizeof(*tcx), GFP_KERNEL);
|
||||
|
||||
if (tcx) {
|
||||
bpf_mprog_bundle_init(&tcx->bundle);
|
||||
return &tcx->bundle.a;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void tcx_entry_free(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
kfree_rcu(tcx_entry(entry), rcu);
|
||||
}
|
||||
|
||||
static inline struct bpf_mprog_entry *
|
||||
tcx_entry_fetch_or_create(struct net_device *dev, bool ingress, bool *created)
|
||||
{
|
||||
struct bpf_mprog_entry *entry = tcx_entry_fetch(dev, ingress);
|
||||
|
||||
*created = false;
|
||||
if (!entry) {
|
||||
entry = tcx_entry_create();
|
||||
if (!entry)
|
||||
return NULL;
|
||||
*created = true;
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
static inline void tcx_skeys_inc(bool ingress)
|
||||
{
|
||||
tcx_inc();
|
||||
if (ingress)
|
||||
net_inc_ingress_queue();
|
||||
else
|
||||
net_inc_egress_queue();
|
||||
}
|
||||
|
||||
static inline void tcx_skeys_dec(bool ingress)
|
||||
{
|
||||
if (ingress)
|
||||
net_dec_ingress_queue();
|
||||
else
|
||||
net_dec_egress_queue();
|
||||
tcx_dec();
|
||||
}
|
||||
|
||||
static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
|
||||
const bool active)
|
||||
{
|
||||
ASSERT_RTNL();
|
||||
tcx_entry(entry)->miniq_active = active;
|
||||
}
|
||||
|
||||
static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
|
||||
{
|
||||
ASSERT_RTNL();
|
||||
return bpf_mprog_total(entry) || tcx_entry(entry)->miniq_active;
|
||||
}
|
||||
|
||||
static inline enum tcx_action_base tcx_action_code(struct sk_buff *skb,
|
||||
int code)
|
||||
{
|
||||
switch (code) {
|
||||
case TCX_PASS:
|
||||
skb->tc_index = qdisc_skb_cb(skb)->tc_classid;
|
||||
fallthrough;
|
||||
case TCX_DROP:
|
||||
case TCX_REDIRECT:
|
||||
return code;
|
||||
case TCX_NEXT:
|
||||
default:
|
||||
return TCX_NEXT;
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_NET_XGRESS */
|
||||
|
||||
#if defined(CONFIG_NET_XGRESS) && defined(CONFIG_BPF_SYSCALL)
|
||||
int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
void tcx_uninstall(struct net_device *dev, bool ingress);
|
||||
|
||||
int tcx_prog_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr);
|
||||
|
||||
static inline void dev_tcx_uninstall(struct net_device *dev)
|
||||
{
|
||||
ASSERT_RTNL();
|
||||
tcx_uninstall(dev, true);
|
||||
tcx_uninstall(dev, false);
|
||||
}
|
||||
#else
|
||||
static inline int tcx_prog_attach(const union bpf_attr *attr,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline int tcx_link_attach(const union bpf_attr *attr,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline int tcx_prog_detach(const union bpf_attr *attr,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline int tcx_prog_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline void dev_tcx_uninstall(struct net_device *dev)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_NET_XGRESS && CONFIG_BPF_SYSCALL */
|
||||
#endif /* __NET_TCX_H */
|
||||
|
|
@ -52,6 +52,7 @@ struct xdp_sock {
|
|||
struct xsk_buff_pool *pool;
|
||||
u16 queue_id;
|
||||
bool zc;
|
||||
bool sg;
|
||||
enum {
|
||||
XSK_READY = 0,
|
||||
XSK_BOUND,
|
||||
|
|
@ -67,6 +68,12 @@ struct xdp_sock {
|
|||
u64 rx_dropped;
|
||||
u64 rx_queue_full;
|
||||
|
||||
/* When __xsk_generic_xmit() must return before it sees the EOP descriptor for the current
|
||||
* packet, the partially built skb is saved here so that packet building can resume in next
|
||||
* call of __xsk_generic_xmit().
|
||||
*/
|
||||
struct sk_buff *skb;
|
||||
|
||||
struct list_head map_list;
|
||||
/* Protects map_list */
|
||||
spinlock_t map_list_lock;
|
||||
|
|
|
|||
|
|
@ -89,6 +89,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
|
|||
return xp_alloc(pool);
|
||||
}
|
||||
|
||||
static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
|
||||
{
|
||||
return !xp_mb_desc(desc);
|
||||
}
|
||||
|
||||
/* Returns as many entries as possible up to max. 0 <= N <= max. */
|
||||
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
|
||||
{
|
||||
|
|
@ -103,10 +108,45 @@ static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
|
|||
static inline void xsk_buff_free(struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
||||
struct list_head *xskb_list = &xskb->pool->xskb_list;
|
||||
struct xdp_buff_xsk *pos, *tmp;
|
||||
|
||||
if (likely(!xdp_buff_has_frags(xdp)))
|
||||
goto out;
|
||||
|
||||
list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
|
||||
list_del(&pos->xskb_list_node);
|
||||
xp_free(pos);
|
||||
}
|
||||
|
||||
xdp_get_shared_info_from_buff(xdp)->nr_frags = 0;
|
||||
out:
|
||||
xp_free(xskb);
|
||||
}
|
||||
|
||||
static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
|
||||
|
||||
list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
|
||||
}
|
||||
|
||||
static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
|
||||
{
|
||||
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
|
||||
struct xdp_buff *ret = NULL;
|
||||
struct xdp_buff_xsk *frag;
|
||||
|
||||
frag = list_first_entry_or_null(&xskb->pool->xskb_list,
|
||||
struct xdp_buff_xsk, xskb_list_node);
|
||||
if (frag) {
|
||||
list_del(&frag->xskb_list_node);
|
||||
ret = &frag->xdp;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
|
||||
{
|
||||
xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
|
||||
|
|
@ -241,6 +281,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
|
||||
{
|
||||
return 0;
|
||||
|
|
@ -255,6 +300,15 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
|
|||
{
|
||||
}
|
||||
|
||||
static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
|
||||
{
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ struct xdp_buff_xsk {
|
|||
struct xsk_buff_pool *pool;
|
||||
u64 orig_addr;
|
||||
struct list_head free_list_node;
|
||||
struct list_head xskb_list_node;
|
||||
};
|
||||
|
||||
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
|
||||
|
|
@ -54,6 +55,7 @@ struct xsk_buff_pool {
|
|||
struct xdp_umem *umem;
|
||||
struct work_struct work;
|
||||
struct list_head free_list;
|
||||
struct list_head xskb_list;
|
||||
u32 heads_cnt;
|
||||
u16 queue_id;
|
||||
|
||||
|
|
@ -184,6 +186,11 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
|
|||
!(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
|
||||
}
|
||||
|
||||
static inline bool xp_mb_desc(struct xdp_desc *desc)
|
||||
{
|
||||
return desc->options & XDP_PKT_CONTD;
|
||||
}
|
||||
|
||||
static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
|
||||
{
|
||||
return addr & pool->chunk_mask;
|
||||
|
|
|
|||
|
|
@ -1036,6 +1036,8 @@ enum bpf_attach_type {
|
|||
BPF_LSM_CGROUP,
|
||||
BPF_STRUCT_OPS,
|
||||
BPF_NETFILTER,
|
||||
BPF_TCX_INGRESS,
|
||||
BPF_TCX_EGRESS,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
|
|
@ -1053,7 +1055,7 @@ enum bpf_link_type {
|
|||
BPF_LINK_TYPE_KPROBE_MULTI = 8,
|
||||
BPF_LINK_TYPE_STRUCT_OPS = 9,
|
||||
BPF_LINK_TYPE_NETFILTER = 10,
|
||||
|
||||
BPF_LINK_TYPE_TCX = 11,
|
||||
MAX_BPF_LINK_TYPE,
|
||||
};
|
||||
|
||||
|
|
@ -1113,7 +1115,12 @@ enum bpf_perf_event_type {
|
|||
*/
|
||||
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
|
||||
#define BPF_F_ALLOW_MULTI (1U << 1)
|
||||
/* Generic attachment flags. */
|
||||
#define BPF_F_REPLACE (1U << 2)
|
||||
#define BPF_F_BEFORE (1U << 3)
|
||||
#define BPF_F_AFTER (1U << 4)
|
||||
#define BPF_F_ID (1U << 5)
|
||||
#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
|
||||
|
||||
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
|
||||
* verifier will perform strict alignment checking as if the kernel
|
||||
|
|
@ -1444,14 +1451,19 @@ union bpf_attr {
|
|||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
|
||||
__u32 target_fd; /* container object to attach to */
|
||||
__u32 attach_bpf_fd; /* eBPF program to attach */
|
||||
union {
|
||||
__u32 target_fd; /* target object to attach to or ... */
|
||||
__u32 target_ifindex; /* target ifindex */
|
||||
};
|
||||
__u32 attach_bpf_fd;
|
||||
__u32 attach_type;
|
||||
__u32 attach_flags;
|
||||
__u32 replace_bpf_fd; /* previously attached eBPF
|
||||
* program to replace if
|
||||
* BPF_F_REPLACE is used
|
||||
*/
|
||||
__u32 replace_bpf_fd;
|
||||
union {
|
||||
__u32 relative_fd;
|
||||
__u32 relative_id;
|
||||
};
|
||||
__u64 expected_revision;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
|
||||
|
|
@ -1497,16 +1509,26 @@ union bpf_attr {
|
|||
} info;
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_QUERY command */
|
||||
__u32 target_fd; /* container object to query */
|
||||
union {
|
||||
__u32 target_fd; /* target object to query or ... */
|
||||
__u32 target_ifindex; /* target ifindex */
|
||||
};
|
||||
__u32 attach_type;
|
||||
__u32 query_flags;
|
||||
__u32 attach_flags;
|
||||
__aligned_u64 prog_ids;
|
||||
__u32 prog_cnt;
|
||||
union {
|
||||
__u32 prog_cnt;
|
||||
__u32 count;
|
||||
};
|
||||
__u32 :32;
|
||||
/* output: per-program attach_flags.
|
||||
* not allowed to be set during effective query.
|
||||
*/
|
||||
__aligned_u64 prog_attach_flags;
|
||||
__aligned_u64 link_ids;
|
||||
__aligned_u64 link_attach_flags;
|
||||
__u64 revision;
|
||||
} query;
|
||||
|
||||
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
|
||||
|
|
@ -1549,13 +1571,13 @@ union bpf_attr {
|
|||
__u32 map_fd; /* struct_ops to attach */
|
||||
};
|
||||
union {
|
||||
__u32 target_fd; /* object to attach to */
|
||||
__u32 target_ifindex; /* target ifindex */
|
||||
__u32 target_fd; /* target object to attach to or ... */
|
||||
__u32 target_ifindex; /* target ifindex */
|
||||
};
|
||||
__u32 attach_type; /* attach type */
|
||||
__u32 flags; /* extra flags */
|
||||
union {
|
||||
__u32 target_btf_id; /* btf_id of target to attach to */
|
||||
__u32 target_btf_id; /* btf_id of target to attach to */
|
||||
struct {
|
||||
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
|
||||
__u32 iter_info_len; /* iter_info length */
|
||||
|
|
@ -1589,6 +1611,13 @@ union bpf_attr {
|
|||
__s32 priority;
|
||||
__u32 flags;
|
||||
} netfilter;
|
||||
struct {
|
||||
union {
|
||||
__u32 relative_fd;
|
||||
__u32 relative_id;
|
||||
};
|
||||
__u64 expected_revision;
|
||||
} tcx;
|
||||
};
|
||||
} link_create;
|
||||
|
||||
|
|
@ -6197,6 +6226,19 @@ struct bpf_sock_tuple {
|
|||
};
|
||||
};
|
||||
|
||||
/* (Simplified) user return codes for tcx prog type.
|
||||
* A valid tcx program must return one of these defined values. All other
|
||||
* return codes are reserved for future use. Must remain compatible with
|
||||
* their TC_ACT_* counter-parts. For compatibility in behavior, unknown
|
||||
* return codes are mapped to TCX_NEXT.
|
||||
*/
|
||||
enum tcx_action_base {
|
||||
TCX_NEXT = -1,
|
||||
TCX_PASS = 0,
|
||||
TCX_DROP = 2,
|
||||
TCX_REDIRECT = 7,
|
||||
};
|
||||
|
||||
struct bpf_xdp_sock {
|
||||
__u32 queue_id;
|
||||
};
|
||||
|
|
@ -6479,6 +6521,10 @@ struct bpf_link_info {
|
|||
} event; /* BPF_PERF_EVENT_EVENT */
|
||||
};
|
||||
} perf_event;
|
||||
struct {
|
||||
__u32 ifindex;
|
||||
__u32 attach_type;
|
||||
} tcx;
|
||||
};
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
|
|
@ -7052,6 +7098,7 @@ struct bpf_list_head {
|
|||
struct bpf_list_node {
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_rb_root {
|
||||
|
|
@ -7063,6 +7110,7 @@ struct bpf_rb_node {
|
|||
__u64 :64;
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_refcount {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,12 @@
|
|||
* application.
|
||||
*/
|
||||
#define XDP_USE_NEED_WAKEUP (1 << 3)
|
||||
/* By setting this option, userspace application indicates that it can
|
||||
* handle multiple descriptors per packet thus enabling AF_XDP to split
|
||||
* multi-buffer XDP frames into multiple Rx descriptors. Without this set
|
||||
* such frames will be dropped.
|
||||
*/
|
||||
#define XDP_USE_SG (1 << 4)
|
||||
|
||||
/* Flags for xsk_umem_config flags */
|
||||
#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
|
||||
|
|
@ -108,4 +114,11 @@ struct xdp_desc {
|
|||
|
||||
/* UMEM descriptor is __u64 */
|
||||
|
||||
/* Flag indicating that the packet continues with the buffer pointed out by the
|
||||
* next frame in the ring. The end of the packet is signalled by setting this
|
||||
* bit to zero. For single buffer packets, every descriptor has 'options' set
|
||||
* to 0 and this maintains backward compatibility.
|
||||
*/
|
||||
#define XDP_PKT_CONTD (1 << 0)
|
||||
|
||||
#endif /* _LINUX_IF_XDP_H */
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ enum {
|
|||
NETDEV_A_DEV_IFINDEX = 1,
|
||||
NETDEV_A_DEV_PAD,
|
||||
NETDEV_A_DEV_XDP_FEATURES,
|
||||
NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
|
||||
|
||||
__NETDEV_A_DEV_MAX,
|
||||
NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ config BPF_SYSCALL
|
|||
select TASKS_TRACE_RCU
|
||||
select BINARY_PRINTF
|
||||
select NET_SOCK_MSG if NET
|
||||
select NET_XGRESS if NET
|
||||
select PAGE_POOL if NET
|
||||
default n
|
||||
help
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list
|
|||
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
|
||||
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
|
||||
obj-$(CONFIG_BPF_JIT) += trampoline.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o
|
||||
obj-$(CONFIG_BPF_JIT) += dispatcher.o
|
||||
|
|
@ -21,6 +21,7 @@ obj-$(CONFIG_BPF_SYSCALL) += devmap.o
|
|||
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += offload.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += tcx.o
|
||||
endif
|
||||
ifeq ($(CONFIG_PERF_EVENTS),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
|
||||
|
|
|
|||
|
|
@ -1942,23 +1942,29 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta
|
|||
return (void *)p__refcounted_kptr;
|
||||
}
|
||||
|
||||
static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head,
|
||||
static int __bpf_list_add(struct bpf_list_node_kern *node,
|
||||
struct bpf_list_head *head,
|
||||
bool tail, struct btf_record *rec, u64 off)
|
||||
{
|
||||
struct list_head *n = (void *)node, *h = (void *)head;
|
||||
struct list_head *n = &node->list_head, *h = (void *)head;
|
||||
|
||||
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
|
||||
* called on its fields, so init here
|
||||
*/
|
||||
if (unlikely(!h->next))
|
||||
INIT_LIST_HEAD(h);
|
||||
if (!list_empty(n)) {
|
||||
|
||||
/* node->owner != NULL implies !list_empty(n), no need to separately
|
||||
* check the latter
|
||||
*/
|
||||
if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
|
||||
/* Only called from BPF prog, no need to migrate_disable */
|
||||
__bpf_obj_drop_impl((void *)n - off, rec);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tail ? list_add_tail(n, h) : list_add(n, h);
|
||||
WRITE_ONCE(node->owner, head);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1967,25 +1973,26 @@ __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
|
|||
struct bpf_list_node *node,
|
||||
void *meta__ign, u64 off)
|
||||
{
|
||||
struct bpf_list_node_kern *n = (void *)node;
|
||||
struct btf_struct_meta *meta = meta__ign;
|
||||
|
||||
return __bpf_list_add(node, head, false,
|
||||
meta ? meta->record : NULL, off);
|
||||
return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
|
||||
struct bpf_list_node *node,
|
||||
void *meta__ign, u64 off)
|
||||
{
|
||||
struct bpf_list_node_kern *n = (void *)node;
|
||||
struct btf_struct_meta *meta = meta__ign;
|
||||
|
||||
return __bpf_list_add(node, head, true,
|
||||
meta ? meta->record : NULL, off);
|
||||
return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
|
||||
}
|
||||
|
||||
static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
|
||||
{
|
||||
struct list_head *n, *h = (void *)head;
|
||||
struct bpf_list_node_kern *node;
|
||||
|
||||
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
|
||||
* called on its fields, so init here
|
||||
|
|
@ -1994,8 +2001,14 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai
|
|||
INIT_LIST_HEAD(h);
|
||||
if (list_empty(h))
|
||||
return NULL;
|
||||
|
||||
n = tail ? h->prev : h->next;
|
||||
node = container_of(n, struct bpf_list_node_kern, list_head);
|
||||
if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
|
||||
return NULL;
|
||||
|
||||
list_del_init(n);
|
||||
WRITE_ONCE(node->owner, NULL);
|
||||
return (struct bpf_list_node *)n;
|
||||
}
|
||||
|
||||
|
|
@ -2012,29 +2025,38 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
|
|||
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
|
||||
struct bpf_rb_node *node)
|
||||
{
|
||||
struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
|
||||
struct rb_root_cached *r = (struct rb_root_cached *)root;
|
||||
struct rb_node *n = (struct rb_node *)node;
|
||||
struct rb_node *n = &node_internal->rb_node;
|
||||
|
||||
if (RB_EMPTY_NODE(n))
|
||||
/* node_internal->owner != root implies either RB_EMPTY_NODE(n) or
|
||||
* n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
|
||||
*/
|
||||
if (READ_ONCE(node_internal->owner) != root)
|
||||
return NULL;
|
||||
|
||||
rb_erase_cached(n, r);
|
||||
RB_CLEAR_NODE(n);
|
||||
WRITE_ONCE(node_internal->owner, NULL);
|
||||
return (struct bpf_rb_node *)n;
|
||||
}
|
||||
|
||||
/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
|
||||
* program
|
||||
*/
|
||||
static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
|
||||
static int __bpf_rbtree_add(struct bpf_rb_root *root,
|
||||
struct bpf_rb_node_kern *node,
|
||||
void *less, struct btf_record *rec, u64 off)
|
||||
{
|
||||
struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
|
||||
struct rb_node *parent = NULL, *n = (struct rb_node *)node;
|
||||
struct rb_node *parent = NULL, *n = &node->rb_node;
|
||||
bpf_callback_t cb = (bpf_callback_t)less;
|
||||
bool leftmost = true;
|
||||
|
||||
if (!RB_EMPTY_NODE(n)) {
|
||||
/* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately
|
||||
* check the latter
|
||||
*/
|
||||
if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
|
||||
/* Only called from BPF prog, no need to migrate_disable */
|
||||
__bpf_obj_drop_impl((void *)n - off, rec);
|
||||
return -EINVAL;
|
||||
|
|
@ -2052,6 +2074,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
|
|||
|
||||
rb_link_node(n, parent, link);
|
||||
rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
|
||||
WRITE_ONCE(node->owner, root);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -2060,8 +2083,9 @@ __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node
|
|||
void *meta__ign, u64 off)
|
||||
{
|
||||
struct btf_struct_meta *meta = meta__ign;
|
||||
struct bpf_rb_node_kern *n = (void *)node;
|
||||
|
||||
return __bpf_rbtree_add(root, node, (void *)less, meta ? meta->record : NULL, off);
|
||||
return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
|
||||
}
|
||||
|
||||
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
|
||||
|
|
@ -2239,7 +2263,10 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
|
|||
case BPF_DYNPTR_TYPE_RINGBUF:
|
||||
return ptr->data + ptr->offset + offset;
|
||||
case BPF_DYNPTR_TYPE_SKB:
|
||||
return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
|
||||
if (buffer__opt)
|
||||
return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
|
||||
else
|
||||
return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len);
|
||||
case BPF_DYNPTR_TYPE_XDP:
|
||||
{
|
||||
void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
|
||||
|
|
|
|||
|
|
@ -78,8 +78,7 @@ static const struct seq_operations bpf_map_seq_ops = {
|
|||
.show = bpf_map_seq_show,
|
||||
};
|
||||
|
||||
BTF_ID_LIST(btf_bpf_map_id)
|
||||
BTF_ID(struct, bpf_map)
|
||||
BTF_ID_LIST_GLOBAL_SINGLE(btf_bpf_map_id, struct, bpf_map)
|
||||
|
||||
static const struct bpf_iter_seq_info bpf_map_seq_info = {
|
||||
.seq_ops = &bpf_map_seq_ops,
|
||||
|
|
@ -198,7 +197,7 @@ __diag_push();
|
|||
__diag_ignore_all("-Wmissing-prototypes",
|
||||
"Global functions as their definitions will be in vmlinux BTF");
|
||||
|
||||
__bpf_kfunc s64 bpf_map_sum_elem_count(struct bpf_map *map)
|
||||
__bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map)
|
||||
{
|
||||
s64 *pcount;
|
||||
s64 ret = 0;
|
||||
|
|
@ -227,6 +226,6 @@ static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = {
|
|||
|
||||
static int init_subsystem(void)
|
||||
{
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_map_iter_kfunc_set);
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_map_iter_kfunc_set);
|
||||
}
|
||||
late_initcall(init_subsystem);
|
||||
|
|
|
|||
445
kernel/bpf/mprog.c
Normal file
445
kernel/bpf/mprog.c
Normal file
|
|
@ -0,0 +1,445 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2023 Isovalent */
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_mprog.h>
|
||||
|
||||
static int bpf_mprog_link(struct bpf_tuple *tuple,
|
||||
u32 id_or_fd, u32 flags,
|
||||
enum bpf_prog_type type)
|
||||
{
|
||||
struct bpf_link *link = ERR_PTR(-EINVAL);
|
||||
bool id = flags & BPF_F_ID;
|
||||
|
||||
if (id)
|
||||
link = bpf_link_by_id(id_or_fd);
|
||||
else if (id_or_fd)
|
||||
link = bpf_link_get_from_fd(id_or_fd);
|
||||
if (IS_ERR(link))
|
||||
return PTR_ERR(link);
|
||||
if (type && link->prog->type != type) {
|
||||
bpf_link_put(link);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tuple->link = link;
|
||||
tuple->prog = link->prog;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_mprog_prog(struct bpf_tuple *tuple,
|
||||
u32 id_or_fd, u32 flags,
|
||||
enum bpf_prog_type type)
|
||||
{
|
||||
struct bpf_prog *prog = ERR_PTR(-EINVAL);
|
||||
bool id = flags & BPF_F_ID;
|
||||
|
||||
if (id)
|
||||
prog = bpf_prog_by_id(id_or_fd);
|
||||
else if (id_or_fd)
|
||||
prog = bpf_prog_get(id_or_fd);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
if (type && prog->type != type) {
|
||||
bpf_prog_put(prog);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tuple->link = NULL;
|
||||
tuple->prog = prog;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_mprog_tuple_relative(struct bpf_tuple *tuple,
|
||||
u32 id_or_fd, u32 flags,
|
||||
enum bpf_prog_type type)
|
||||
{
|
||||
bool link = flags & BPF_F_LINK;
|
||||
bool id = flags & BPF_F_ID;
|
||||
|
||||
memset(tuple, 0, sizeof(*tuple));
|
||||
if (link)
|
||||
return bpf_mprog_link(tuple, id_or_fd, flags, type);
|
||||
/* If no relevant flag is set and no id_or_fd was passed, then
|
||||
* tuple link/prog is just NULLed. This is the case when before/
|
||||
* after selects first/last position without passing fd.
|
||||
*/
|
||||
if (!id && !id_or_fd)
|
||||
return 0;
|
||||
return bpf_mprog_prog(tuple, id_or_fd, flags, type);
|
||||
}
|
||||
|
||||
static void bpf_mprog_tuple_put(struct bpf_tuple *tuple)
|
||||
{
|
||||
if (tuple->link)
|
||||
bpf_link_put(tuple->link);
|
||||
else if (tuple->prog)
|
||||
bpf_prog_put(tuple->prog);
|
||||
}
|
||||
|
||||
/* The bpf_mprog_{replace,delete}() operate on exact idx position with the
|
||||
* one exception that for deletion we support delete from front/back. In
|
||||
* case of front idx is -1, in case of back idx is bpf_mprog_total(entry).
|
||||
* Adjustment to first and last entry is trivial. The bpf_mprog_insert()
|
||||
* we have to deal with the following cases:
|
||||
*
|
||||
* idx + before:
|
||||
*
|
||||
* Insert P4 before P3: idx for old array is 1, idx for new array is 2,
|
||||
* hence we adjust target idx for the new array, so that memmove copies
|
||||
* P1 and P2 to the new entry, and we insert P4 into idx 2. Inserting
|
||||
* before P1 would have old idx -1 and new idx 0.
|
||||
*
|
||||
* +--+--+--+ +--+--+--+--+ +--+--+--+--+
|
||||
* |P1|P2|P3| ==> |P1|P2| |P3| ==> |P1|P2|P4|P3|
|
||||
* +--+--+--+ +--+--+--+--+ +--+--+--+--+
|
||||
*
|
||||
* idx + after:
|
||||
*
|
||||
* Insert P4 after P2: idx for old array is 2, idx for new array is 2.
|
||||
* Again, memmove copies P1 and P2 to the new entry, and we insert P4
|
||||
* into idx 2. Inserting after P3 would have both old/new idx at 4 aka
|
||||
* bpf_mprog_total(entry).
|
||||
*
|
||||
* +--+--+--+ +--+--+--+--+ +--+--+--+--+
|
||||
* |P1|P2|P3| ==> |P1|P2| |P3| ==> |P1|P2|P4|P3|
|
||||
* +--+--+--+ +--+--+--+--+ +--+--+--+--+
|
||||
*/
|
||||
static int bpf_mprog_replace(struct bpf_mprog_entry *entry,
|
||||
struct bpf_mprog_entry **entry_new,
|
||||
struct bpf_tuple *ntuple, int idx)
|
||||
{
|
||||
struct bpf_mprog_fp *fp;
|
||||
struct bpf_mprog_cp *cp;
|
||||
struct bpf_prog *oprog;
|
||||
|
||||
bpf_mprog_read(entry, idx, &fp, &cp);
|
||||
oprog = READ_ONCE(fp->prog);
|
||||
bpf_mprog_write(fp, cp, ntuple);
|
||||
if (!ntuple->link) {
|
||||
WARN_ON_ONCE(cp->link);
|
||||
bpf_prog_put(oprog);
|
||||
}
|
||||
*entry_new = entry;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_mprog_insert(struct bpf_mprog_entry *entry,
|
||||
struct bpf_mprog_entry **entry_new,
|
||||
struct bpf_tuple *ntuple, int idx, u32 flags)
|
||||
{
|
||||
int total = bpf_mprog_total(entry);
|
||||
struct bpf_mprog_entry *peer;
|
||||
struct bpf_mprog_fp *fp;
|
||||
struct bpf_mprog_cp *cp;
|
||||
|
||||
peer = bpf_mprog_peer(entry);
|
||||
bpf_mprog_entry_copy(peer, entry);
|
||||
if (idx == total)
|
||||
goto insert;
|
||||
else if (flags & BPF_F_BEFORE)
|
||||
idx += 1;
|
||||
bpf_mprog_entry_grow(peer, idx);
|
||||
insert:
|
||||
bpf_mprog_read(peer, idx, &fp, &cp);
|
||||
bpf_mprog_write(fp, cp, ntuple);
|
||||
bpf_mprog_inc(peer);
|
||||
*entry_new = peer;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_mprog_delete(struct bpf_mprog_entry *entry,
|
||||
struct bpf_mprog_entry **entry_new,
|
||||
struct bpf_tuple *dtuple, int idx)
|
||||
{
|
||||
int total = bpf_mprog_total(entry);
|
||||
struct bpf_mprog_entry *peer;
|
||||
|
||||
peer = bpf_mprog_peer(entry);
|
||||
bpf_mprog_entry_copy(peer, entry);
|
||||
if (idx == -1)
|
||||
idx = 0;
|
||||
else if (idx == total)
|
||||
idx = total - 1;
|
||||
bpf_mprog_entry_shrink(peer, idx);
|
||||
bpf_mprog_dec(peer);
|
||||
bpf_mprog_mark_for_release(peer, dtuple);
|
||||
*entry_new = peer;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* In bpf_mprog_pos_*() we evaluate the target position for the BPF
|
||||
* program/link that needs to be replaced, inserted or deleted for
|
||||
* each "rule" independently. If all rules agree on that position
|
||||
* or existing element, then enact replacement, addition or deletion.
|
||||
* If this is not the case, then the request cannot be satisfied and
|
||||
* we bail out with an error.
|
||||
*/
|
||||
static int bpf_mprog_pos_exact(struct bpf_mprog_entry *entry,
|
||||
struct bpf_tuple *tuple)
|
||||
{
|
||||
struct bpf_mprog_fp *fp;
|
||||
struct bpf_mprog_cp *cp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bpf_mprog_total(entry); i++) {
|
||||
bpf_mprog_read(entry, i, &fp, &cp);
|
||||
if (tuple->prog == READ_ONCE(fp->prog))
|
||||
return tuple->link == cp->link ? i : -EBUSY;
|
||||
}
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int bpf_mprog_pos_before(struct bpf_mprog_entry *entry,
|
||||
struct bpf_tuple *tuple)
|
||||
{
|
||||
struct bpf_mprog_fp *fp;
|
||||
struct bpf_mprog_cp *cp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bpf_mprog_total(entry); i++) {
|
||||
bpf_mprog_read(entry, i, &fp, &cp);
|
||||
if (tuple->prog == READ_ONCE(fp->prog) &&
|
||||
(!tuple->link || tuple->link == cp->link))
|
||||
return i - 1;
|
||||
}
|
||||
return tuple->prog ? -ENOENT : -1;
|
||||
}
|
||||
|
||||
static int bpf_mprog_pos_after(struct bpf_mprog_entry *entry,
|
||||
struct bpf_tuple *tuple)
|
||||
{
|
||||
struct bpf_mprog_fp *fp;
|
||||
struct bpf_mprog_cp *cp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bpf_mprog_total(entry); i++) {
|
||||
bpf_mprog_read(entry, i, &fp, &cp);
|
||||
if (tuple->prog == READ_ONCE(fp->prog) &&
|
||||
(!tuple->link || tuple->link == cp->link))
|
||||
return i + 1;
|
||||
}
|
||||
return tuple->prog ? -ENOENT : bpf_mprog_total(entry);
|
||||
}
|
||||
|
||||
int bpf_mprog_attach(struct bpf_mprog_entry *entry,
|
||||
struct bpf_mprog_entry **entry_new,
|
||||
struct bpf_prog *prog_new, struct bpf_link *link,
|
||||
struct bpf_prog *prog_old,
|
||||
u32 flags, u32 id_or_fd, u64 revision)
|
||||
{
|
||||
struct bpf_tuple rtuple, ntuple = {
|
||||
.prog = prog_new,
|
||||
.link = link,
|
||||
}, otuple = {
|
||||
.prog = prog_old,
|
||||
.link = link,
|
||||
};
|
||||
int ret, idx = -ERANGE, tidx;
|
||||
|
||||
if (revision && revision != bpf_mprog_revision(entry))
|
||||
return -ESTALE;
|
||||
if (bpf_mprog_exists(entry, prog_new))
|
||||
return -EEXIST;
|
||||
ret = bpf_mprog_tuple_relative(&rtuple, id_or_fd,
|
||||
flags & ~BPF_F_REPLACE,
|
||||
prog_new->type);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (flags & BPF_F_REPLACE) {
|
||||
tidx = bpf_mprog_pos_exact(entry, &otuple);
|
||||
if (tidx < 0) {
|
||||
ret = tidx;
|
||||
goto out;
|
||||
}
|
||||
idx = tidx;
|
||||
}
|
||||
if (flags & BPF_F_BEFORE) {
|
||||
tidx = bpf_mprog_pos_before(entry, &rtuple);
|
||||
if (tidx < -1 || (idx >= -1 && tidx != idx)) {
|
||||
ret = tidx < -1 ? tidx : -ERANGE;
|
||||
goto out;
|
||||
}
|
||||
idx = tidx;
|
||||
}
|
||||
if (flags & BPF_F_AFTER) {
|
||||
tidx = bpf_mprog_pos_after(entry, &rtuple);
|
||||
if (tidx < -1 || (idx >= -1 && tidx != idx)) {
|
||||
ret = tidx < 0 ? tidx : -ERANGE;
|
||||
goto out;
|
||||
}
|
||||
idx = tidx;
|
||||
}
|
||||
if (idx < -1) {
|
||||
if (rtuple.prog || flags) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
idx = bpf_mprog_total(entry);
|
||||
flags = BPF_F_AFTER;
|
||||
}
|
||||
if (idx >= bpf_mprog_max()) {
|
||||
ret = -ERANGE;
|
||||
goto out;
|
||||
}
|
||||
if (flags & BPF_F_REPLACE)
|
||||
ret = bpf_mprog_replace(entry, entry_new, &ntuple, idx);
|
||||
else
|
||||
ret = bpf_mprog_insert(entry, entry_new, &ntuple, idx, flags);
|
||||
out:
|
||||
bpf_mprog_tuple_put(&rtuple);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bpf_mprog_fetch(struct bpf_mprog_entry *entry,
|
||||
struct bpf_tuple *tuple, int idx)
|
||||
{
|
||||
int total = bpf_mprog_total(entry);
|
||||
struct bpf_mprog_cp *cp;
|
||||
struct bpf_mprog_fp *fp;
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_link *link;
|
||||
|
||||
if (idx == -1)
|
||||
idx = 0;
|
||||
else if (idx == total)
|
||||
idx = total - 1;
|
||||
bpf_mprog_read(entry, idx, &fp, &cp);
|
||||
prog = READ_ONCE(fp->prog);
|
||||
link = cp->link;
|
||||
/* The deletion request can either be without filled tuple in which
|
||||
* case it gets populated here based on idx, or with filled tuple
|
||||
* where the only thing we end up doing is the WARN_ON_ONCE() assert.
|
||||
* If we hit a BPF link at the given index, it must not be removed
|
||||
* from opts path.
|
||||
*/
|
||||
if (link && !tuple->link)
|
||||
return -EBUSY;
|
||||
WARN_ON_ONCE(tuple->prog && tuple->prog != prog);
|
||||
WARN_ON_ONCE(tuple->link && tuple->link != link);
|
||||
tuple->prog = prog;
|
||||
tuple->link = link;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_mprog_detach(struct bpf_mprog_entry *entry,
|
||||
struct bpf_mprog_entry **entry_new,
|
||||
struct bpf_prog *prog, struct bpf_link *link,
|
||||
u32 flags, u32 id_or_fd, u64 revision)
|
||||
{
|
||||
struct bpf_tuple rtuple, dtuple = {
|
||||
.prog = prog,
|
||||
.link = link,
|
||||
};
|
||||
int ret, idx = -ERANGE, tidx;
|
||||
|
||||
if (flags & BPF_F_REPLACE)
|
||||
return -EINVAL;
|
||||
if (revision && revision != bpf_mprog_revision(entry))
|
||||
return -ESTALE;
|
||||
ret = bpf_mprog_tuple_relative(&rtuple, id_or_fd, flags,
|
||||
prog ? prog->type :
|
||||
BPF_PROG_TYPE_UNSPEC);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (dtuple.prog) {
|
||||
tidx = bpf_mprog_pos_exact(entry, &dtuple);
|
||||
if (tidx < 0) {
|
||||
ret = tidx;
|
||||
goto out;
|
||||
}
|
||||
idx = tidx;
|
||||
}
|
||||
if (flags & BPF_F_BEFORE) {
|
||||
tidx = bpf_mprog_pos_before(entry, &rtuple);
|
||||
if (tidx < -1 || (idx >= -1 && tidx != idx)) {
|
||||
ret = tidx < -1 ? tidx : -ERANGE;
|
||||
goto out;
|
||||
}
|
||||
idx = tidx;
|
||||
}
|
||||
if (flags & BPF_F_AFTER) {
|
||||
tidx = bpf_mprog_pos_after(entry, &rtuple);
|
||||
if (tidx < -1 || (idx >= -1 && tidx != idx)) {
|
||||
ret = tidx < 0 ? tidx : -ERANGE;
|
||||
goto out;
|
||||
}
|
||||
idx = tidx;
|
||||
}
|
||||
if (idx < -1) {
|
||||
if (rtuple.prog || flags) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
idx = bpf_mprog_total(entry);
|
||||
flags = BPF_F_AFTER;
|
||||
}
|
||||
if (idx >= bpf_mprog_max()) {
|
||||
ret = -ERANGE;
|
||||
goto out;
|
||||
}
|
||||
ret = bpf_mprog_fetch(entry, &dtuple, idx);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = bpf_mprog_delete(entry, entry_new, &dtuple, idx);
|
||||
out:
|
||||
bpf_mprog_tuple_put(&rtuple);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
|
||||
struct bpf_mprog_entry *entry)
|
||||
{
|
||||
u32 __user *uprog_flags, *ulink_flags;
|
||||
u32 __user *uprog_id, *ulink_id;
|
||||
struct bpf_mprog_fp *fp;
|
||||
struct bpf_mprog_cp *cp;
|
||||
struct bpf_prog *prog;
|
||||
const u32 flags = 0;
|
||||
int i, ret = 0;
|
||||
u32 id, count;
|
||||
u64 revision;
|
||||
|
||||
if (attr->query.query_flags || attr->query.attach_flags)
|
||||
return -EINVAL;
|
||||
revision = bpf_mprog_revision(entry);
|
||||
count = bpf_mprog_total(entry);
|
||||
if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
|
||||
return -EFAULT;
|
||||
if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision)))
|
||||
return -EFAULT;
|
||||
if (copy_to_user(&uattr->query.count, &count, sizeof(count)))
|
||||
return -EFAULT;
|
||||
uprog_id = u64_to_user_ptr(attr->query.prog_ids);
|
||||
uprog_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
|
||||
ulink_id = u64_to_user_ptr(attr->query.link_ids);
|
||||
ulink_flags = u64_to_user_ptr(attr->query.link_attach_flags);
|
||||
if (attr->query.count == 0 || !uprog_id || !count)
|
||||
return 0;
|
||||
if (attr->query.count < count) {
|
||||
count = attr->query.count;
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
for (i = 0; i < bpf_mprog_max(); i++) {
|
||||
bpf_mprog_read(entry, i, &fp, &cp);
|
||||
prog = READ_ONCE(fp->prog);
|
||||
if (!prog)
|
||||
break;
|
||||
id = prog->aux->id;
|
||||
if (copy_to_user(uprog_id + i, &id, sizeof(id)))
|
||||
return -EFAULT;
|
||||
if (uprog_flags &&
|
||||
copy_to_user(uprog_flags + i, &flags, sizeof(flags)))
|
||||
return -EFAULT;
|
||||
id = cp->link ? cp->link->id : 0;
|
||||
if (ulink_id &&
|
||||
copy_to_user(ulink_id + i, &id, sizeof(id)))
|
||||
return -EFAULT;
|
||||
if (ulink_flags &&
|
||||
copy_to_user(ulink_flags + i, &flags, sizeof(flags)))
|
||||
return -EFAULT;
|
||||
if (i + 1 == count)
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -37,6 +37,8 @@
|
|||
#include <linux/trace_events.h>
|
||||
#include <net/netfilter/nf_bpf_link.h>
|
||||
|
||||
#include <net/tcx.h>
|
||||
|
||||
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
|
||||
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
|
||||
(map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
|
||||
|
|
@ -3740,31 +3742,45 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
|
|||
return BPF_PROG_TYPE_XDP;
|
||||
case BPF_LSM_CGROUP:
|
||||
return BPF_PROG_TYPE_LSM;
|
||||
case BPF_TCX_INGRESS:
|
||||
case BPF_TCX_EGRESS:
|
||||
return BPF_PROG_TYPE_SCHED_CLS;
|
||||
default:
|
||||
return BPF_PROG_TYPE_UNSPEC;
|
||||
}
|
||||
}
|
||||
|
||||
#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
|
||||
#define BPF_PROG_ATTACH_LAST_FIELD expected_revision
|
||||
|
||||
#define BPF_F_ATTACH_MASK \
|
||||
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
|
||||
#define BPF_F_ATTACH_MASK_BASE \
|
||||
(BPF_F_ALLOW_OVERRIDE | \
|
||||
BPF_F_ALLOW_MULTI | \
|
||||
BPF_F_REPLACE)
|
||||
|
||||
#define BPF_F_ATTACH_MASK_MPROG \
|
||||
(BPF_F_REPLACE | \
|
||||
BPF_F_BEFORE | \
|
||||
BPF_F_AFTER | \
|
||||
BPF_F_ID | \
|
||||
BPF_F_LINK)
|
||||
|
||||
static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
{
|
||||
enum bpf_prog_type ptype;
|
||||
struct bpf_prog *prog;
|
||||
u32 mask;
|
||||
int ret;
|
||||
|
||||
if (CHECK_ATTR(BPF_PROG_ATTACH))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
ptype = attach_type_to_prog_type(attr->attach_type);
|
||||
if (ptype == BPF_PROG_TYPE_UNSPEC)
|
||||
return -EINVAL;
|
||||
mask = bpf_mprog_supported(ptype) ?
|
||||
BPF_F_ATTACH_MASK_MPROG : BPF_F_ATTACH_MASK_BASE;
|
||||
if (attr->attach_flags & ~mask)
|
||||
return -EINVAL;
|
||||
|
||||
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
|
||||
if (IS_ERR(prog))
|
||||
|
|
@ -3800,6 +3816,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
|||
else
|
||||
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
|
||||
break;
|
||||
case BPF_PROG_TYPE_SCHED_CLS:
|
||||
ret = tcx_prog_attach(attr, prog);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
|
@ -3809,25 +3828,41 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
#define BPF_PROG_DETACH_LAST_FIELD attach_type
|
||||
#define BPF_PROG_DETACH_LAST_FIELD expected_revision
|
||||
|
||||
static int bpf_prog_detach(const union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_prog *prog = NULL;
|
||||
enum bpf_prog_type ptype;
|
||||
int ret;
|
||||
|
||||
if (CHECK_ATTR(BPF_PROG_DETACH))
|
||||
return -EINVAL;
|
||||
|
||||
ptype = attach_type_to_prog_type(attr->attach_type);
|
||||
if (bpf_mprog_supported(ptype)) {
|
||||
if (ptype == BPF_PROG_TYPE_UNSPEC)
|
||||
return -EINVAL;
|
||||
if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
|
||||
return -EINVAL;
|
||||
if (attr->attach_bpf_fd) {
|
||||
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
}
|
||||
}
|
||||
|
||||
switch (ptype) {
|
||||
case BPF_PROG_TYPE_SK_MSG:
|
||||
case BPF_PROG_TYPE_SK_SKB:
|
||||
return sock_map_prog_detach(attr, ptype);
|
||||
ret = sock_map_prog_detach(attr, ptype);
|
||||
break;
|
||||
case BPF_PROG_TYPE_LIRC_MODE2:
|
||||
return lirc_prog_detach(attr);
|
||||
ret = lirc_prog_detach(attr);
|
||||
break;
|
||||
case BPF_PROG_TYPE_FLOW_DISSECTOR:
|
||||
return netns_bpf_prog_detach(attr, ptype);
|
||||
ret = netns_bpf_prog_detach(attr, ptype);
|
||||
break;
|
||||
case BPF_PROG_TYPE_CGROUP_DEVICE:
|
||||
case BPF_PROG_TYPE_CGROUP_SKB:
|
||||
case BPF_PROG_TYPE_CGROUP_SOCK:
|
||||
|
|
@ -3836,13 +3871,21 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
|||
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
||||
case BPF_PROG_TYPE_SOCK_OPS:
|
||||
case BPF_PROG_TYPE_LSM:
|
||||
return cgroup_bpf_prog_detach(attr, ptype);
|
||||
ret = cgroup_bpf_prog_detach(attr, ptype);
|
||||
break;
|
||||
case BPF_PROG_TYPE_SCHED_CLS:
|
||||
ret = tcx_prog_detach(attr, prog);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (prog)
|
||||
bpf_prog_put(prog);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags
|
||||
#define BPF_PROG_QUERY_LAST_FIELD query.link_attach_flags
|
||||
|
||||
static int bpf_prog_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
|
|
@ -3890,6 +3933,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
|
|||
case BPF_SK_MSG_VERDICT:
|
||||
case BPF_SK_SKB_VERDICT:
|
||||
return sock_map_bpf_prog_query(attr, uattr);
|
||||
case BPF_TCX_INGRESS:
|
||||
case BPF_TCX_EGRESS:
|
||||
return tcx_prog_query(attr, uattr);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
@ -4852,6 +4898,13 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
|
|||
goto out;
|
||||
}
|
||||
break;
|
||||
case BPF_PROG_TYPE_SCHED_CLS:
|
||||
if (attr->link_create.attach_type != BPF_TCX_INGRESS &&
|
||||
attr->link_create.attach_type != BPF_TCX_EGRESS) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ptype = attach_type_to_prog_type(attr->link_create.attach_type);
|
||||
if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
|
||||
|
|
@ -4903,6 +4956,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
|
|||
case BPF_PROG_TYPE_XDP:
|
||||
ret = bpf_xdp_link_attach(attr, prog);
|
||||
break;
|
||||
case BPF_PROG_TYPE_SCHED_CLS:
|
||||
ret = tcx_link_attach(attr, prog);
|
||||
break;
|
||||
case BPF_PROG_TYPE_NETFILTER:
|
||||
ret = bpf_nf_link_attach(attr, prog);
|
||||
break;
|
||||
|
|
|
|||
348
kernel/bpf/tcx.c
Normal file
348
kernel/bpf/tcx.c
Normal file
|
|
@ -0,0 +1,348 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2023 Isovalent */
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_mprog.h>
|
||||
#include <linux/netdevice.h>
|
||||
|
||||
#include <net/tcx.h>
|
||||
|
||||
int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||
{
|
||||
bool created, ingress = attr->attach_type == BPF_TCX_INGRESS;
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
struct bpf_mprog_entry *entry, *entry_new;
|
||||
struct bpf_prog *replace_prog = NULL;
|
||||
struct net_device *dev;
|
||||
int ret;
|
||||
|
||||
rtnl_lock();
|
||||
dev = __dev_get_by_index(net, attr->target_ifindex);
|
||||
if (!dev) {
|
||||
ret = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
if (attr->attach_flags & BPF_F_REPLACE) {
|
||||
replace_prog = bpf_prog_get_type(attr->replace_bpf_fd,
|
||||
prog->type);
|
||||
if (IS_ERR(replace_prog)) {
|
||||
ret = PTR_ERR(replace_prog);
|
||||
replace_prog = NULL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
entry = tcx_entry_fetch_or_create(dev, ingress, &created);
|
||||
if (!entry) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
ret = bpf_mprog_attach(entry, &entry_new, prog, NULL, replace_prog,
|
||||
attr->attach_flags, attr->relative_fd,
|
||||
attr->expected_revision);
|
||||
if (!ret) {
|
||||
if (entry != entry_new) {
|
||||
tcx_entry_update(dev, entry_new, ingress);
|
||||
tcx_entry_sync();
|
||||
tcx_skeys_inc(ingress);
|
||||
}
|
||||
bpf_mprog_commit(entry);
|
||||
} else if (created) {
|
||||
tcx_entry_free(entry);
|
||||
}
|
||||
out:
|
||||
if (replace_prog)
|
||||
bpf_prog_put(replace_prog);
|
||||
rtnl_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||
{
|
||||
bool ingress = attr->attach_type == BPF_TCX_INGRESS;
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
struct bpf_mprog_entry *entry, *entry_new;
|
||||
struct net_device *dev;
|
||||
int ret;
|
||||
|
||||
rtnl_lock();
|
||||
dev = __dev_get_by_index(net, attr->target_ifindex);
|
||||
if (!dev) {
|
||||
ret = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
entry = tcx_entry_fetch(dev, ingress);
|
||||
if (!entry) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
ret = bpf_mprog_detach(entry, &entry_new, prog, NULL, attr->attach_flags,
|
||||
attr->relative_fd, attr->expected_revision);
|
||||
if (!ret) {
|
||||
if (!tcx_entry_is_active(entry_new))
|
||||
entry_new = NULL;
|
||||
tcx_entry_update(dev, entry_new, ingress);
|
||||
tcx_entry_sync();
|
||||
tcx_skeys_dec(ingress);
|
||||
bpf_mprog_commit(entry);
|
||||
if (!entry_new)
|
||||
tcx_entry_free(entry);
|
||||
}
|
||||
out:
|
||||
rtnl_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void tcx_uninstall(struct net_device *dev, bool ingress)
|
||||
{
|
||||
struct bpf_tuple tuple = {};
|
||||
struct bpf_mprog_entry *entry;
|
||||
struct bpf_mprog_fp *fp;
|
||||
struct bpf_mprog_cp *cp;
|
||||
|
||||
entry = tcx_entry_fetch(dev, ingress);
|
||||
if (!entry)
|
||||
return;
|
||||
tcx_entry_update(dev, NULL, ingress);
|
||||
tcx_entry_sync();
|
||||
bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
|
||||
if (tuple.link)
|
||||
tcx_link(tuple.link)->dev = NULL;
|
||||
else
|
||||
bpf_prog_put(tuple.prog);
|
||||
tcx_skeys_dec(ingress);
|
||||
}
|
||||
WARN_ON_ONCE(tcx_entry(entry)->miniq_active);
|
||||
tcx_entry_free(entry);
|
||||
}
|
||||
|
||||
int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
|
||||
{
|
||||
bool ingress = attr->query.attach_type == BPF_TCX_INGRESS;
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
struct bpf_mprog_entry *entry;
|
||||
struct net_device *dev;
|
||||
int ret;
|
||||
|
||||
rtnl_lock();
|
||||
dev = __dev_get_by_index(net, attr->query.target_ifindex);
|
||||
if (!dev) {
|
||||
ret = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
entry = tcx_entry_fetch(dev, ingress);
|
||||
if (!entry) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
ret = bpf_mprog_query(attr, uattr, entry);
|
||||
out:
|
||||
rtnl_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tcx_link_prog_attach(struct bpf_link *link, u32 flags, u32 id_or_fd,
|
||||
u64 revision)
|
||||
{
|
||||
struct tcx_link *tcx = tcx_link(link);
|
||||
bool created, ingress = tcx->location == BPF_TCX_INGRESS;
|
||||
struct bpf_mprog_entry *entry, *entry_new;
|
||||
struct net_device *dev = tcx->dev;
|
||||
int ret;
|
||||
|
||||
ASSERT_RTNL();
|
||||
entry = tcx_entry_fetch_or_create(dev, ingress, &created);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags,
|
||||
id_or_fd, revision);
|
||||
if (!ret) {
|
||||
if (entry != entry_new) {
|
||||
tcx_entry_update(dev, entry_new, ingress);
|
||||
tcx_entry_sync();
|
||||
tcx_skeys_inc(ingress);
|
||||
}
|
||||
bpf_mprog_commit(entry);
|
||||
} else if (created) {
|
||||
tcx_entry_free(entry);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void tcx_link_release(struct bpf_link *link)
|
||||
{
|
||||
struct tcx_link *tcx = tcx_link(link);
|
||||
bool ingress = tcx->location == BPF_TCX_INGRESS;
|
||||
struct bpf_mprog_entry *entry, *entry_new;
|
||||
struct net_device *dev;
|
||||
int ret = 0;
|
||||
|
||||
rtnl_lock();
|
||||
dev = tcx->dev;
|
||||
if (!dev)
|
||||
goto out;
|
||||
entry = tcx_entry_fetch(dev, ingress);
|
||||
if (!entry) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0);
|
||||
if (!ret) {
|
||||
if (!tcx_entry_is_active(entry_new))
|
||||
entry_new = NULL;
|
||||
tcx_entry_update(dev, entry_new, ingress);
|
||||
tcx_entry_sync();
|
||||
tcx_skeys_dec(ingress);
|
||||
bpf_mprog_commit(entry);
|
||||
if (!entry_new)
|
||||
tcx_entry_free(entry);
|
||||
tcx->dev = NULL;
|
||||
}
|
||||
out:
|
||||
WARN_ON_ONCE(ret);
|
||||
rtnl_unlock();
|
||||
}
|
||||
|
||||
static int tcx_link_update(struct bpf_link *link, struct bpf_prog *nprog,
|
||||
struct bpf_prog *oprog)
|
||||
{
|
||||
struct tcx_link *tcx = tcx_link(link);
|
||||
bool ingress = tcx->location == BPF_TCX_INGRESS;
|
||||
struct bpf_mprog_entry *entry, *entry_new;
|
||||
struct net_device *dev;
|
||||
int ret = 0;
|
||||
|
||||
rtnl_lock();
|
||||
dev = tcx->dev;
|
||||
if (!dev) {
|
||||
ret = -ENOLINK;
|
||||
goto out;
|
||||
}
|
||||
if (oprog && link->prog != oprog) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
oprog = link->prog;
|
||||
if (oprog == nprog) {
|
||||
bpf_prog_put(nprog);
|
||||
goto out;
|
||||
}
|
||||
entry = tcx_entry_fetch(dev, ingress);
|
||||
if (!entry) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog,
|
||||
BPF_F_REPLACE | BPF_F_ID,
|
||||
link->prog->aux->id, 0);
|
||||
if (!ret) {
|
||||
WARN_ON_ONCE(entry != entry_new);
|
||||
oprog = xchg(&link->prog, nprog);
|
||||
bpf_prog_put(oprog);
|
||||
bpf_mprog_commit(entry);
|
||||
}
|
||||
out:
|
||||
rtnl_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void tcx_link_dealloc(struct bpf_link *link)
|
||||
{
|
||||
kfree(tcx_link(link));
|
||||
}
|
||||
|
||||
static void tcx_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
|
||||
{
|
||||
const struct tcx_link *tcx = tcx_link_const(link);
|
||||
u32 ifindex = 0;
|
||||
|
||||
rtnl_lock();
|
||||
if (tcx->dev)
|
||||
ifindex = tcx->dev->ifindex;
|
||||
rtnl_unlock();
|
||||
|
||||
seq_printf(seq, "ifindex:\t%u\n", ifindex);
|
||||
seq_printf(seq, "attach_type:\t%u (%s)\n",
|
||||
tcx->location,
|
||||
tcx->location == BPF_TCX_INGRESS ? "ingress" : "egress");
|
||||
}
|
||||
|
||||
static int tcx_link_fill_info(const struct bpf_link *link,
|
||||
struct bpf_link_info *info)
|
||||
{
|
||||
const struct tcx_link *tcx = tcx_link_const(link);
|
||||
u32 ifindex = 0;
|
||||
|
||||
rtnl_lock();
|
||||
if (tcx->dev)
|
||||
ifindex = tcx->dev->ifindex;
|
||||
rtnl_unlock();
|
||||
|
||||
info->tcx.ifindex = ifindex;
|
||||
info->tcx.attach_type = tcx->location;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tcx_link_detach(struct bpf_link *link)
|
||||
{
|
||||
tcx_link_release(link);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct bpf_link_ops tcx_link_lops = {
|
||||
.release = tcx_link_release,
|
||||
.detach = tcx_link_detach,
|
||||
.dealloc = tcx_link_dealloc,
|
||||
.update_prog = tcx_link_update,
|
||||
.show_fdinfo = tcx_link_fdinfo,
|
||||
.fill_link_info = tcx_link_fill_info,
|
||||
};
|
||||
|
||||
static int tcx_link_init(struct tcx_link *tcx,
|
||||
struct bpf_link_primer *link_primer,
|
||||
const union bpf_attr *attr,
|
||||
struct net_device *dev,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
bpf_link_init(&tcx->link, BPF_LINK_TYPE_TCX, &tcx_link_lops, prog);
|
||||
tcx->location = attr->link_create.attach_type;
|
||||
tcx->dev = dev;
|
||||
return bpf_link_prime(&tcx->link, link_primer);
|
||||
}
|
||||
|
||||
int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||
{
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
struct bpf_link_primer link_primer;
|
||||
struct net_device *dev;
|
||||
struct tcx_link *tcx;
|
||||
int ret;
|
||||
|
||||
rtnl_lock();
|
||||
dev = __dev_get_by_index(net, attr->link_create.target_ifindex);
|
||||
if (!dev) {
|
||||
ret = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
tcx = kzalloc(sizeof(*tcx), GFP_USER);
|
||||
if (!tcx) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
ret = tcx_link_init(tcx, &link_primer, attr, dev, prog);
|
||||
if (ret) {
|
||||
kfree(tcx);
|
||||
goto out;
|
||||
}
|
||||
ret = tcx_link_prog_attach(&tcx->link, attr->link_create.flags,
|
||||
attr->link_create.tcx.relative_fd,
|
||||
attr->link_create.tcx.expected_revision);
|
||||
if (ret) {
|
||||
tcx->dev = NULL;
|
||||
bpf_link_cleanup(&link_primer);
|
||||
goto out;
|
||||
}
|
||||
ret = bpf_link_settle(&link_primer);
|
||||
out:
|
||||
rtnl_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -5413,12 +5413,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
|
|||
return reg->type == PTR_TO_FLOW_KEYS;
|
||||
}
|
||||
|
||||
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
|
||||
#ifdef CONFIG_NET
|
||||
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
|
||||
[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
|
||||
[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
|
||||
#endif
|
||||
[CONST_PTR_TO_MAP] = btf_bpf_map_id,
|
||||
};
|
||||
|
||||
static bool is_trusted_reg(const struct bpf_reg_state *reg)
|
||||
{
|
||||
/* A referenced register is always trusted. */
|
||||
if (reg->ref_obj_id)
|
||||
return true;
|
||||
|
||||
/* Types listed in the reg2btf_ids are always trusted */
|
||||
if (reg2btf_ids[base_type(reg->type)])
|
||||
return true;
|
||||
|
||||
/* If a register is not referenced, it is trusted if it has the
|
||||
* MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
|
||||
* other type modifiers may be safe, but we elect to take an opt-in
|
||||
|
|
@ -10052,15 +10065,6 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
|
||||
#ifdef CONFIG_NET
|
||||
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
|
||||
[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
|
||||
[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
|
||||
#endif
|
||||
};
|
||||
|
||||
enum kfunc_ptr_arg_type {
|
||||
KF_ARG_PTR_TO_CTX,
|
||||
KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
|
||||
|
|
|
|||
|
|
@ -52,6 +52,11 @@ config NET_INGRESS
|
|||
config NET_EGRESS
|
||||
bool
|
||||
|
||||
config NET_XGRESS
|
||||
select NET_INGRESS
|
||||
select NET_EGRESS
|
||||
bool
|
||||
|
||||
config NET_REDIRECT
|
||||
bool
|
||||
|
||||
|
|
|
|||
292
net/core/dev.c
292
net/core/dev.c
|
|
@ -107,6 +107,7 @@
|
|||
#include <net/pkt_cls.h>
|
||||
#include <net/checksum.h>
|
||||
#include <net/xfrm.h>
|
||||
#include <net/tcx.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
|
|
@ -154,7 +155,6 @@
|
|||
#include "dev.h"
|
||||
#include "net-sysfs.h"
|
||||
|
||||
|
||||
static DEFINE_SPINLOCK(ptype_lock);
|
||||
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
|
||||
struct list_head ptype_all __read_mostly; /* Taps */
|
||||
|
|
@ -3882,50 +3882,6 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
|
|||
EXPORT_SYMBOL(dev_loopback_xmit);
|
||||
|
||||
#ifdef CONFIG_NET_EGRESS
|
||||
static struct sk_buff *
|
||||
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
|
||||
{
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
|
||||
struct tcf_result cl_res;
|
||||
|
||||
if (!miniq)
|
||||
return skb;
|
||||
|
||||
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
|
||||
tc_skb_cb(skb)->mru = 0;
|
||||
tc_skb_cb(skb)->post_ct = false;
|
||||
mini_qdisc_bstats_cpu_update(miniq, skb);
|
||||
|
||||
switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
|
||||
case TC_ACT_OK:
|
||||
case TC_ACT_RECLASSIFY:
|
||||
skb->tc_index = TC_H_MIN(cl_res.classid);
|
||||
break;
|
||||
case TC_ACT_SHOT:
|
||||
mini_qdisc_qstats_cpu_drop(miniq);
|
||||
*ret = NET_XMIT_DROP;
|
||||
kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
|
||||
return NULL;
|
||||
case TC_ACT_STOLEN:
|
||||
case TC_ACT_QUEUED:
|
||||
case TC_ACT_TRAP:
|
||||
*ret = NET_XMIT_SUCCESS;
|
||||
consume_skb(skb);
|
||||
return NULL;
|
||||
case TC_ACT_REDIRECT:
|
||||
/* No need to push/pop skb's mac_header here on egress! */
|
||||
skb_do_redirect(skb);
|
||||
*ret = NET_XMIT_SUCCESS;
|
||||
return NULL;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_NET_CLS_ACT */
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
static struct netdev_queue *
|
||||
netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
|
||||
{
|
||||
|
|
@ -3946,6 +3902,179 @@ void netdev_xmit_skip_txqueue(bool skip)
|
|||
EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
|
||||
#endif /* CONFIG_NET_EGRESS */
|
||||
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
|
||||
{
|
||||
int ret = TC_ACT_UNSPEC;
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
|
||||
struct tcf_result res;
|
||||
|
||||
if (!miniq)
|
||||
return ret;
|
||||
|
||||
tc_skb_cb(skb)->mru = 0;
|
||||
tc_skb_cb(skb)->post_ct = false;
|
||||
|
||||
mini_qdisc_bstats_cpu_update(miniq, skb);
|
||||
ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
|
||||
/* Only tcf related quirks below. */
|
||||
switch (ret) {
|
||||
case TC_ACT_SHOT:
|
||||
mini_qdisc_qstats_cpu_drop(miniq);
|
||||
break;
|
||||
case TC_ACT_OK:
|
||||
case TC_ACT_RECLASSIFY:
|
||||
skb->tc_index = TC_H_MIN(res.classid);
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_NET_CLS_ACT */
|
||||
return ret;
|
||||
}
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
|
||||
|
||||
void tcx_inc(void)
|
||||
{
|
||||
static_branch_inc(&tcx_needed_key);
|
||||
}
|
||||
|
||||
void tcx_dec(void)
|
||||
{
|
||||
static_branch_dec(&tcx_needed_key);
|
||||
}
|
||||
|
||||
static __always_inline enum tcx_action_base
|
||||
tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
|
||||
const bool needs_mac)
|
||||
{
|
||||
const struct bpf_mprog_fp *fp;
|
||||
const struct bpf_prog *prog;
|
||||
int ret = TCX_NEXT;
|
||||
|
||||
if (needs_mac)
|
||||
__skb_push(skb, skb->mac_len);
|
||||
bpf_mprog_foreach_prog(entry, fp, prog) {
|
||||
bpf_compute_data_pointers(skb);
|
||||
ret = bpf_prog_run(prog, skb);
|
||||
if (ret != TCX_NEXT)
|
||||
break;
|
||||
}
|
||||
if (needs_mac)
|
||||
__skb_pull(skb, skb->mac_len);
|
||||
return tcx_action_code(skb, ret);
|
||||
}
|
||||
|
||||
static __always_inline struct sk_buff *
|
||||
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
||||
struct net_device *orig_dev, bool *another)
|
||||
{
|
||||
struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
|
||||
int sch_ret;
|
||||
|
||||
if (!entry)
|
||||
return skb;
|
||||
if (*pt_prev) {
|
||||
*ret = deliver_skb(skb, *pt_prev, orig_dev);
|
||||
*pt_prev = NULL;
|
||||
}
|
||||
|
||||
qdisc_skb_cb(skb)->pkt_len = skb->len;
|
||||
tcx_set_ingress(skb, true);
|
||||
|
||||
if (static_branch_unlikely(&tcx_needed_key)) {
|
||||
sch_ret = tcx_run(entry, skb, true);
|
||||
if (sch_ret != TC_ACT_UNSPEC)
|
||||
goto ingress_verdict;
|
||||
}
|
||||
sch_ret = tc_run(tcx_entry(entry), skb);
|
||||
ingress_verdict:
|
||||
switch (sch_ret) {
|
||||
case TC_ACT_REDIRECT:
|
||||
/* skb_mac_header check was done by BPF, so we can safely
|
||||
* push the L2 header back before redirecting to another
|
||||
* netdev.
|
||||
*/
|
||||
__skb_push(skb, skb->mac_len);
|
||||
if (skb_do_redirect(skb) == -EAGAIN) {
|
||||
__skb_pull(skb, skb->mac_len);
|
||||
*another = true;
|
||||
break;
|
||||
}
|
||||
*ret = NET_RX_SUCCESS;
|
||||
return NULL;
|
||||
case TC_ACT_SHOT:
|
||||
kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
|
||||
*ret = NET_RX_DROP;
|
||||
return NULL;
|
||||
/* used by tc_run */
|
||||
case TC_ACT_STOLEN:
|
||||
case TC_ACT_QUEUED:
|
||||
case TC_ACT_TRAP:
|
||||
consume_skb(skb);
|
||||
fallthrough;
|
||||
case TC_ACT_CONSUMED:
|
||||
*ret = NET_RX_SUCCESS;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
static __always_inline struct sk_buff *
|
||||
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
|
||||
{
|
||||
struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
|
||||
int sch_ret;
|
||||
|
||||
if (!entry)
|
||||
return skb;
|
||||
|
||||
/* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
|
||||
* already set by the caller.
|
||||
*/
|
||||
if (static_branch_unlikely(&tcx_needed_key)) {
|
||||
sch_ret = tcx_run(entry, skb, false);
|
||||
if (sch_ret != TC_ACT_UNSPEC)
|
||||
goto egress_verdict;
|
||||
}
|
||||
sch_ret = tc_run(tcx_entry(entry), skb);
|
||||
egress_verdict:
|
||||
switch (sch_ret) {
|
||||
case TC_ACT_REDIRECT:
|
||||
/* No need to push/pop skb's mac_header here on egress! */
|
||||
skb_do_redirect(skb);
|
||||
*ret = NET_XMIT_SUCCESS;
|
||||
return NULL;
|
||||
case TC_ACT_SHOT:
|
||||
kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
|
||||
*ret = NET_XMIT_DROP;
|
||||
return NULL;
|
||||
/* used by tc_run */
|
||||
case TC_ACT_STOLEN:
|
||||
case TC_ACT_QUEUED:
|
||||
case TC_ACT_TRAP:
|
||||
*ret = NET_XMIT_SUCCESS;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return skb;
|
||||
}
|
||||
#else
|
||||
static __always_inline struct sk_buff *
|
||||
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
||||
struct net_device *orig_dev, bool *another)
|
||||
{
|
||||
return skb;
|
||||
}
|
||||
|
||||
static __always_inline struct sk_buff *
|
||||
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
|
||||
{
|
||||
return skb;
|
||||
}
|
||||
#endif /* CONFIG_NET_XGRESS */
|
||||
|
||||
#ifdef CONFIG_XPS
|
||||
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
|
||||
struct xps_dev_maps *dev_maps, unsigned int tci)
|
||||
|
|
@ -4128,9 +4257,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
|
|||
skb_update_prio(skb);
|
||||
|
||||
qdisc_pkt_len_init(skb);
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
skb->tc_at_ingress = 0;
|
||||
#endif
|
||||
tcx_set_ingress(skb, false);
|
||||
#ifdef CONFIG_NET_EGRESS
|
||||
if (static_branch_unlikely(&egress_needed_key)) {
|
||||
if (nf_hook_egress_active()) {
|
||||
|
|
@ -5064,72 +5191,6 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
|
|||
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
|
||||
#endif
|
||||
|
||||
static inline struct sk_buff *
|
||||
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
||||
struct net_device *orig_dev, bool *another)
|
||||
{
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
|
||||
struct tcf_result cl_res;
|
||||
|
||||
/* If there's at least one ingress present somewhere (so
|
||||
* we get here via enabled static key), remaining devices
|
||||
* that are not configured with an ingress qdisc will bail
|
||||
* out here.
|
||||
*/
|
||||
if (!miniq)
|
||||
return skb;
|
||||
|
||||
if (*pt_prev) {
|
||||
*ret = deliver_skb(skb, *pt_prev, orig_dev);
|
||||
*pt_prev = NULL;
|
||||
}
|
||||
|
||||
qdisc_skb_cb(skb)->pkt_len = skb->len;
|
||||
tc_skb_cb(skb)->mru = 0;
|
||||
tc_skb_cb(skb)->post_ct = false;
|
||||
skb->tc_at_ingress = 1;
|
||||
mini_qdisc_bstats_cpu_update(miniq, skb);
|
||||
|
||||
switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
|
||||
case TC_ACT_OK:
|
||||
case TC_ACT_RECLASSIFY:
|
||||
skb->tc_index = TC_H_MIN(cl_res.classid);
|
||||
break;
|
||||
case TC_ACT_SHOT:
|
||||
mini_qdisc_qstats_cpu_drop(miniq);
|
||||
kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
|
||||
*ret = NET_RX_DROP;
|
||||
return NULL;
|
||||
case TC_ACT_STOLEN:
|
||||
case TC_ACT_QUEUED:
|
||||
case TC_ACT_TRAP:
|
||||
consume_skb(skb);
|
||||
*ret = NET_RX_SUCCESS;
|
||||
return NULL;
|
||||
case TC_ACT_REDIRECT:
|
||||
/* skb_mac_header check was done by cls/act_bpf, so
|
||||
* we can safely push the L2 header back before
|
||||
* redirecting to another netdev
|
||||
*/
|
||||
__skb_push(skb, skb->mac_len);
|
||||
if (skb_do_redirect(skb) == -EAGAIN) {
|
||||
__skb_pull(skb, skb->mac_len);
|
||||
*another = true;
|
||||
break;
|
||||
}
|
||||
*ret = NET_RX_SUCCESS;
|
||||
return NULL;
|
||||
case TC_ACT_CONSUMED:
|
||||
*ret = NET_RX_SUCCESS;
|
||||
return NULL;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif /* CONFIG_NET_CLS_ACT */
|
||||
return skb;
|
||||
}
|
||||
|
||||
/**
|
||||
* netdev_is_rx_handler_busy - check if receive handler is registered
|
||||
* @dev: device to check
|
||||
|
|
@ -10613,6 +10674,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
|
|||
dev_net_set(dev, &init_net);
|
||||
|
||||
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
|
||||
dev->xdp_zc_max_segs = 1;
|
||||
dev->gso_max_segs = GSO_MAX_SEGS;
|
||||
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
|
||||
dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
|
||||
|
|
@ -10834,7 +10896,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
|
|||
|
||||
/* Shutdown queueing discipline. */
|
||||
dev_shutdown(dev);
|
||||
|
||||
dev_tcx_uninstall(dev);
|
||||
dev_xdp_uninstall(dev);
|
||||
bpf_dev_bound_netdev_unregister(dev);
|
||||
|
||||
|
|
|
|||
|
|
@ -4345,13 +4345,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
|||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
enum bpf_map_type map_type = ri->map_type;
|
||||
|
||||
if (map_type == BPF_MAP_TYPE_XSKMAP) {
|
||||
/* XDP_REDIRECT is not supported AF_XDP yet. */
|
||||
if (unlikely(xdp_buff_has_frags(xdp)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (map_type == BPF_MAP_TYPE_XSKMAP)
|
||||
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
|
||||
}
|
||||
|
||||
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
|
||||
xdp_prog);
|
||||
|
|
@ -9312,7 +9307,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
|
|||
__u8 value_reg = si->dst_reg;
|
||||
__u8 skb_reg = si->src_reg;
|
||||
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
/* If the tstamp_type is read,
|
||||
* the bpf prog is aware the tstamp could have delivery time.
|
||||
* Thus, read skb->tstamp as is if tstamp_type_access is true.
|
||||
|
|
@ -9346,7 +9341,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
|
|||
__u8 value_reg = si->src_reg;
|
||||
__u8 skb_reg = si->dst_reg;
|
||||
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
#ifdef CONFIG_NET_XGRESS
|
||||
/* If the tstamp_type is read,
|
||||
* the bpf prog is aware the tstamp could have delivery time.
|
||||
* Thus, write skb->tstamp as is if tstamp_type_access is true.
|
||||
|
|
|
|||
|
|
@ -25,6 +25,14 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
|
||||
if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
|
||||
netdev->xdp_zc_max_segs)) {
|
||||
genlmsg_cancel(rsp, hdr);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
genlmsg_end(rsp, hdr);
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -51,8 +51,6 @@ static bool is_unsupported(u32 member_offset)
|
|||
return false;
|
||||
}
|
||||
|
||||
extern struct btf *btf_vmlinux;
|
||||
|
||||
static bool bpf_tcp_ca_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
|
|
|
|||
|
|
@ -347,8 +347,7 @@ config NET_SCH_FQ_PIE
|
|||
config NET_SCH_INGRESS
|
||||
tristate "Ingress/classifier-action Qdisc"
|
||||
depends on NET_CLS_ACT
|
||||
select NET_INGRESS
|
||||
select NET_EGRESS
|
||||
select NET_XGRESS
|
||||
help
|
||||
Say Y here if you want to use classifiers for incoming and/or outgoing
|
||||
packets. This qdisc doesn't do anything else besides running classifiers,
|
||||
|
|
@ -679,6 +678,7 @@ config NET_EMATCH_IPT
|
|||
config NET_CLS_ACT
|
||||
bool "Actions"
|
||||
select NET_CLS
|
||||
select NET_XGRESS
|
||||
help
|
||||
Say Y here if you want to use traffic control actions. Actions
|
||||
get attached to classifiers and are invoked after a successful
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include <net/netlink.h>
|
||||
#include <net/pkt_sched.h>
|
||||
#include <net/pkt_cls.h>
|
||||
#include <net/tcx.h>
|
||||
|
||||
struct ingress_sched_data {
|
||||
struct tcf_block *block;
|
||||
|
|
@ -78,6 +79,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
|
|||
{
|
||||
struct ingress_sched_data *q = qdisc_priv(sch);
|
||||
struct net_device *dev = qdisc_dev(sch);
|
||||
struct bpf_mprog_entry *entry;
|
||||
bool created;
|
||||
int err;
|
||||
|
||||
if (sch->parent != TC_H_INGRESS)
|
||||
|
|
@ -85,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
|
|||
|
||||
net_inc_ingress_queue();
|
||||
|
||||
mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
|
||||
entry = tcx_entry_fetch_or_create(dev, true, &created);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
tcx_miniq_set_active(entry, true);
|
||||
mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
|
||||
if (created)
|
||||
tcx_entry_update(dev, entry, true);
|
||||
|
||||
q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
|
||||
q->block_info.chain_head_change = clsact_chain_head_change;
|
||||
|
|
@ -103,11 +112,22 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
|
|||
static void ingress_destroy(struct Qdisc *sch)
|
||||
{
|
||||
struct ingress_sched_data *q = qdisc_priv(sch);
|
||||
struct net_device *dev = qdisc_dev(sch);
|
||||
struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress);
|
||||
|
||||
if (sch->parent != TC_H_INGRESS)
|
||||
return;
|
||||
|
||||
tcf_block_put_ext(q->block, sch, &q->block_info);
|
||||
|
||||
if (entry) {
|
||||
tcx_miniq_set_active(entry, false);
|
||||
if (!tcx_entry_is_active(entry)) {
|
||||
tcx_entry_update(dev, NULL, false);
|
||||
tcx_entry_free(entry);
|
||||
}
|
||||
}
|
||||
|
||||
net_dec_ingress_queue();
|
||||
}
|
||||
|
||||
|
|
@ -223,6 +243,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
|
|||
{
|
||||
struct clsact_sched_data *q = qdisc_priv(sch);
|
||||
struct net_device *dev = qdisc_dev(sch);
|
||||
struct bpf_mprog_entry *entry;
|
||||
bool created;
|
||||
int err;
|
||||
|
||||
if (sch->parent != TC_H_CLSACT)
|
||||
|
|
@ -231,7 +253,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
|
|||
net_inc_ingress_queue();
|
||||
net_inc_egress_queue();
|
||||
|
||||
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
|
||||
entry = tcx_entry_fetch_or_create(dev, true, &created);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
tcx_miniq_set_active(entry, true);
|
||||
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
|
||||
if (created)
|
||||
tcx_entry_update(dev, entry, true);
|
||||
|
||||
q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
|
||||
q->ingress_block_info.chain_head_change = clsact_chain_head_change;
|
||||
|
|
@ -244,7 +272,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
|
|||
|
||||
mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block);
|
||||
|
||||
mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
|
||||
entry = tcx_entry_fetch_or_create(dev, false, &created);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
tcx_miniq_set_active(entry, true);
|
||||
mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
|
||||
if (created)
|
||||
tcx_entry_update(dev, entry, false);
|
||||
|
||||
q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
|
||||
q->egress_block_info.chain_head_change = clsact_chain_head_change;
|
||||
|
|
@ -256,12 +290,31 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
|
|||
static void clsact_destroy(struct Qdisc *sch)
|
||||
{
|
||||
struct clsact_sched_data *q = qdisc_priv(sch);
|
||||
struct net_device *dev = qdisc_dev(sch);
|
||||
struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress);
|
||||
struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress);
|
||||
|
||||
if (sch->parent != TC_H_CLSACT)
|
||||
return;
|
||||
|
||||
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
|
||||
tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
|
||||
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
|
||||
|
||||
if (ingress_entry) {
|
||||
tcx_miniq_set_active(ingress_entry, false);
|
||||
if (!tcx_entry_is_active(ingress_entry)) {
|
||||
tcx_entry_update(dev, NULL, true);
|
||||
tcx_entry_free(ingress_entry);
|
||||
}
|
||||
}
|
||||
|
||||
if (egress_entry) {
|
||||
tcx_miniq_set_active(egress_entry, false);
|
||||
if (!tcx_entry_is_active(egress_entry)) {
|
||||
tcx_entry_update(dev, NULL, false);
|
||||
tcx_entry_free(egress_entry);
|
||||
}
|
||||
}
|
||||
|
||||
net_dec_ingress_queue();
|
||||
net_dec_egress_queue();
|
||||
|
|
|
|||
375
net/xdp/xsk.c
375
net/xdp/xsk.c
|
|
@ -135,14 +135,14 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
|
||||
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
|
||||
u32 flags)
|
||||
{
|
||||
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
||||
u64 addr;
|
||||
int err;
|
||||
|
||||
addr = xp_get_handle(xskb);
|
||||
err = xskq_prod_reserve_desc(xs->rx, addr, len);
|
||||
err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
|
||||
if (err) {
|
||||
xs->rx_queue_full++;
|
||||
return err;
|
||||
|
|
@ -152,48 +152,138 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
|
||||
static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
|
||||
{
|
||||
void *from_buf, *to_buf;
|
||||
u32 metalen;
|
||||
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
||||
u32 frags = xdp_buff_has_frags(xdp);
|
||||
struct xdp_buff_xsk *pos, *tmp;
|
||||
struct list_head *xskb_list;
|
||||
u32 contd = 0;
|
||||
int err;
|
||||
|
||||
if (unlikely(xdp_data_meta_unsupported(from))) {
|
||||
from_buf = from->data;
|
||||
to_buf = to->data;
|
||||
metalen = 0;
|
||||
} else {
|
||||
from_buf = from->data_meta;
|
||||
metalen = from->data - from->data_meta;
|
||||
to_buf = to->data - metalen;
|
||||
if (frags)
|
||||
contd = XDP_PKT_CONTD;
|
||||
|
||||
err = __xsk_rcv_zc(xs, xskb, len, contd);
|
||||
if (err || likely(!frags))
|
||||
goto out;
|
||||
|
||||
xskb_list = &xskb->pool->xskb_list;
|
||||
list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
|
||||
if (list_is_singular(xskb_list))
|
||||
contd = 0;
|
||||
len = pos->xdp.data_end - pos->xdp.data;
|
||||
err = __xsk_rcv_zc(xs, pos, len, contd);
|
||||
if (err)
|
||||
return err;
|
||||
list_del(&pos->xskb_list_node);
|
||||
}
|
||||
|
||||
memcpy(to_buf, from_buf, len + metalen);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
static void *xsk_copy_xdp_start(struct xdp_buff *from)
|
||||
{
|
||||
struct xdp_buff *xsk_xdp;
|
||||
int err;
|
||||
u32 len;
|
||||
if (unlikely(xdp_data_meta_unsupported(from)))
|
||||
return from->data;
|
||||
else
|
||||
return from->data_meta;
|
||||
}
|
||||
|
||||
len = xdp->data_end - xdp->data;
|
||||
if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
|
||||
xs->rx_dropped++;
|
||||
return -ENOSPC;
|
||||
static u32 xsk_copy_xdp(void *to, void **from, u32 to_len,
|
||||
u32 *from_len, skb_frag_t **frag, u32 rem)
|
||||
{
|
||||
u32 copied = 0;
|
||||
|
||||
while (1) {
|
||||
u32 copy_len = min_t(u32, *from_len, to_len);
|
||||
|
||||
memcpy(to, *from, copy_len);
|
||||
copied += copy_len;
|
||||
if (rem == copied)
|
||||
return copied;
|
||||
|
||||
if (*from_len == copy_len) {
|
||||
*from = skb_frag_address(*frag);
|
||||
*from_len = skb_frag_size((*frag)++);
|
||||
} else {
|
||||
*from += copy_len;
|
||||
*from_len -= copy_len;
|
||||
}
|
||||
if (to_len == copy_len)
|
||||
return copied;
|
||||
|
||||
to_len -= copy_len;
|
||||
to += copy_len;
|
||||
}
|
||||
}
|
||||
|
||||
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
|
||||
{
|
||||
u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool);
|
||||
void *copy_from = xsk_copy_xdp_start(xdp), *copy_to;
|
||||
u32 from_len, meta_len, rem, num_desc;
|
||||
struct xdp_buff_xsk *xskb;
|
||||
struct xdp_buff *xsk_xdp;
|
||||
skb_frag_t *frag;
|
||||
|
||||
from_len = xdp->data_end - copy_from;
|
||||
meta_len = xdp->data - copy_from;
|
||||
rem = len + meta_len;
|
||||
|
||||
if (len <= frame_size && !xdp_buff_has_frags(xdp)) {
|
||||
int err;
|
||||
|
||||
xsk_xdp = xsk_buff_alloc(xs->pool);
|
||||
if (!xsk_xdp) {
|
||||
xs->rx_dropped++;
|
||||
return -ENOMEM;
|
||||
}
|
||||
memcpy(xsk_xdp->data - meta_len, copy_from, rem);
|
||||
xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
|
||||
err = __xsk_rcv_zc(xs, xskb, len, 0);
|
||||
if (err) {
|
||||
xsk_buff_free(xsk_xdp);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
xsk_xdp = xsk_buff_alloc(xs->pool);
|
||||
if (!xsk_xdp) {
|
||||
num_desc = (len - 1) / frame_size + 1;
|
||||
|
||||
if (!xsk_buff_can_alloc(xs->pool, num_desc)) {
|
||||
xs->rx_dropped++;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
xsk_copy_xdp(xsk_xdp, xdp, len);
|
||||
err = __xsk_rcv_zc(xs, xsk_xdp, len);
|
||||
if (err) {
|
||||
xsk_buff_free(xsk_xdp);
|
||||
return err;
|
||||
if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
|
||||
xs->rx_queue_full++;
|
||||
return -ENOBUFS;
|
||||
}
|
||||
|
||||
if (xdp_buff_has_frags(xdp)) {
|
||||
struct skb_shared_info *sinfo;
|
||||
|
||||
sinfo = xdp_get_shared_info_from_buff(xdp);
|
||||
frag = &sinfo->frags[0];
|
||||
}
|
||||
|
||||
do {
|
||||
u32 to_len = frame_size + meta_len;
|
||||
u32 copied;
|
||||
|
||||
xsk_xdp = xsk_buff_alloc(xs->pool);
|
||||
copy_to = xsk_xdp->data - meta_len;
|
||||
|
||||
copied = xsk_copy_xdp(copy_to, ©_from, to_len, &from_len, &frag, rem);
|
||||
rem -= copied;
|
||||
|
||||
xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
|
||||
__xsk_rcv_zc(xs, xskb, copied - meta_len, rem ? XDP_PKT_CONTD : 0);
|
||||
meta_len = 0;
|
||||
} while (rem);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -215,7 +305,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
|
|||
return false;
|
||||
}
|
||||
|
||||
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
|
||||
{
|
||||
if (!xsk_is_bound(xs))
|
||||
return -ENXIO;
|
||||
|
|
@ -223,6 +313,11 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
|
|||
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
|
||||
return -EINVAL;
|
||||
|
||||
if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
|
||||
xs->rx_dropped++;
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
sk_mark_napi_id_once_xdp(&xs->sk, xdp);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -236,12 +331,13 @@ static void xsk_flush(struct xdp_sock *xs)
|
|||
|
||||
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
{
|
||||
u32 len = xdp_get_buff_len(xdp);
|
||||
int err;
|
||||
|
||||
spin_lock_bh(&xs->rx_lock);
|
||||
err = xsk_rcv_check(xs, xdp);
|
||||
err = xsk_rcv_check(xs, xdp, len);
|
||||
if (!err) {
|
||||
err = __xsk_rcv(xs, xdp);
|
||||
err = __xsk_rcv(xs, xdp, len);
|
||||
xsk_flush(xs);
|
||||
}
|
||||
spin_unlock_bh(&xs->rx_lock);
|
||||
|
|
@ -250,19 +346,19 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
|||
|
||||
static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
|
||||
{
|
||||
u32 len = xdp_get_buff_len(xdp);
|
||||
int err;
|
||||
u32 len;
|
||||
|
||||
err = xsk_rcv_check(xs, xdp);
|
||||
err = xsk_rcv_check(xs, xdp, len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
|
||||
len = xdp->data_end - xdp->data;
|
||||
return __xsk_rcv_zc(xs, xdp, len);
|
||||
return xsk_rcv_zc(xs, xdp, len);
|
||||
}
|
||||
|
||||
err = __xsk_rcv(xs, xdp);
|
||||
err = __xsk_rcv(xs, xdp, len);
|
||||
if (!err)
|
||||
xdp_return_buff(xdp);
|
||||
return err;
|
||||
|
|
@ -321,7 +417,8 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
|
|||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
|
||||
if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
|
||||
xs->tx->queue_empty_descs++;
|
||||
if (xskq_has_descs(xs->tx))
|
||||
xskq_cons_release(xs->tx);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -408,37 +505,91 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
|
|||
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
|
||||
}
|
||||
|
||||
static void xsk_destruct_skb(struct sk_buff *skb)
|
||||
static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&xs->pool->cq_lock, flags);
|
||||
ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
|
||||
{
|
||||
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
|
||||
struct xdp_sock *xs = xdp_sk(skb->sk);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&xs->pool->cq_lock, flags);
|
||||
xskq_prod_submit_addr(xs->pool->cq, addr);
|
||||
xskq_prod_submit_n(xs->pool->cq, n);
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
}
|
||||
|
||||
static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&xs->pool->cq_lock, flags);
|
||||
xskq_prod_cancel_n(xs->pool->cq, n);
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
}
|
||||
|
||||
static u32 xsk_get_num_desc(struct sk_buff *skb)
|
||||
{
|
||||
return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
|
||||
}
|
||||
|
||||
static void xsk_destruct_skb(struct sk_buff *skb)
|
||||
{
|
||||
xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
|
||||
sock_wfree(skb);
|
||||
}
|
||||
|
||||
static void xsk_set_destructor_arg(struct sk_buff *skb)
|
||||
{
|
||||
long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
|
||||
|
||||
skb_shinfo(skb)->destructor_arg = (void *)num;
|
||||
}
|
||||
|
||||
static void xsk_consume_skb(struct sk_buff *skb)
|
||||
{
|
||||
struct xdp_sock *xs = xdp_sk(skb->sk);
|
||||
|
||||
skb->destructor = sock_wfree;
|
||||
xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
|
||||
/* Free skb without triggering the perf drop trace */
|
||||
consume_skb(skb);
|
||||
xs->skb = NULL;
|
||||
}
|
||||
|
||||
static void xsk_drop_skb(struct sk_buff *skb)
|
||||
{
|
||||
xdp_sk(skb->sk)->tx->invalid_descs += xsk_get_num_desc(skb);
|
||||
xsk_consume_skb(skb);
|
||||
}
|
||||
|
||||
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
|
||||
struct xdp_desc *desc)
|
||||
{
|
||||
struct xsk_buff_pool *pool = xs->pool;
|
||||
u32 hr, len, ts, offset, copy, copied;
|
||||
struct sk_buff *skb;
|
||||
struct sk_buff *skb = xs->skb;
|
||||
struct page *page;
|
||||
void *buffer;
|
||||
int err, i;
|
||||
u64 addr;
|
||||
|
||||
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
|
||||
if (!skb) {
|
||||
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
|
||||
|
||||
skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
|
||||
if (unlikely(!skb))
|
||||
return ERR_PTR(err);
|
||||
skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
|
||||
if (unlikely(!skb))
|
||||
return ERR_PTR(err);
|
||||
|
||||
skb_reserve(skb, hr);
|
||||
skb_reserve(skb, hr);
|
||||
}
|
||||
|
||||
addr = desc->addr;
|
||||
len = desc->len;
|
||||
|
|
@ -448,7 +599,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
|
|||
offset = offset_in_page(buffer);
|
||||
addr = buffer - pool->addrs;
|
||||
|
||||
for (copied = 0, i = 0; copied < len; i++) {
|
||||
for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
|
||||
if (unlikely(i >= MAX_SKB_FRAGS))
|
||||
return ERR_PTR(-EFAULT);
|
||||
|
||||
page = pool->umem->pgs[addr >> PAGE_SHIFT];
|
||||
get_page(page);
|
||||
|
||||
|
|
@ -473,43 +627,77 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
|
|||
struct xdp_desc *desc)
|
||||
{
|
||||
struct net_device *dev = xs->dev;
|
||||
struct sk_buff *skb;
|
||||
struct sk_buff *skb = xs->skb;
|
||||
int err;
|
||||
|
||||
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
|
||||
skb = xsk_build_skb_zerocopy(xs, desc);
|
||||
if (IS_ERR(skb))
|
||||
return skb;
|
||||
if (IS_ERR(skb)) {
|
||||
err = PTR_ERR(skb);
|
||||
goto free_err;
|
||||
}
|
||||
} else {
|
||||
u32 hr, tr, len;
|
||||
void *buffer;
|
||||
int err;
|
||||
|
||||
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
|
||||
tr = dev->needed_tailroom;
|
||||
len = desc->len;
|
||||
|
||||
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
|
||||
if (unlikely(!skb))
|
||||
return ERR_PTR(err);
|
||||
|
||||
skb_reserve(skb, hr);
|
||||
skb_put(skb, len);
|
||||
|
||||
buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
|
||||
err = skb_store_bits(skb, 0, buffer, len);
|
||||
if (unlikely(err)) {
|
||||
kfree_skb(skb);
|
||||
return ERR_PTR(err);
|
||||
len = desc->len;
|
||||
|
||||
if (!skb) {
|
||||
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
|
||||
tr = dev->needed_tailroom;
|
||||
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
|
||||
if (unlikely(!skb))
|
||||
goto free_err;
|
||||
|
||||
skb_reserve(skb, hr);
|
||||
skb_put(skb, len);
|
||||
|
||||
err = skb_store_bits(skb, 0, buffer, len);
|
||||
if (unlikely(err))
|
||||
goto free_err;
|
||||
} else {
|
||||
int nr_frags = skb_shinfo(skb)->nr_frags;
|
||||
struct page *page;
|
||||
u8 *vaddr;
|
||||
|
||||
if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
|
||||
err = -EFAULT;
|
||||
goto free_err;
|
||||
}
|
||||
|
||||
page = alloc_page(xs->sk.sk_allocation);
|
||||
if (unlikely(!page)) {
|
||||
err = -EAGAIN;
|
||||
goto free_err;
|
||||
}
|
||||
|
||||
vaddr = kmap_local_page(page);
|
||||
memcpy(vaddr, buffer, len);
|
||||
kunmap_local(vaddr);
|
||||
|
||||
skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
|
||||
}
|
||||
}
|
||||
|
||||
skb->dev = dev;
|
||||
skb->priority = xs->sk.sk_priority;
|
||||
skb->mark = xs->sk.sk_mark;
|
||||
skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
|
||||
skb->destructor = xsk_destruct_skb;
|
||||
xsk_set_destructor_arg(skb);
|
||||
|
||||
return skb;
|
||||
|
||||
free_err:
|
||||
if (err == -EAGAIN) {
|
||||
xsk_cq_cancel_locked(xs, 1);
|
||||
} else {
|
||||
xsk_set_destructor_arg(skb);
|
||||
xsk_drop_skb(skb);
|
||||
xskq_cons_release(xs->tx);
|
||||
}
|
||||
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int __xsk_generic_xmit(struct sock *sk)
|
||||
|
|
@ -519,7 +707,6 @@ static int __xsk_generic_xmit(struct sock *sk)
|
|||
bool sent_frame = false;
|
||||
struct xdp_desc desc;
|
||||
struct sk_buff *skb;
|
||||
unsigned long flags;
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&xs->mutex);
|
||||
|
|
@ -544,47 +731,51 @@ static int __xsk_generic_xmit(struct sock *sk)
|
|||
* if there is space in it. This avoids having to implement
|
||||
* any buffering in the Tx path.
|
||||
*/
|
||||
spin_lock_irqsave(&xs->pool->cq_lock, flags);
|
||||
if (xskq_prod_reserve(xs->pool->cq)) {
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
if (xsk_cq_reserve_addr_locked(xs, desc.addr))
|
||||
goto out;
|
||||
}
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
|
||||
skb = xsk_build_skb(xs, &desc);
|
||||
if (IS_ERR(skb)) {
|
||||
err = PTR_ERR(skb);
|
||||
spin_lock_irqsave(&xs->pool->cq_lock, flags);
|
||||
xskq_prod_cancel(xs->pool->cq);
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
goto out;
|
||||
if (err == -EAGAIN)
|
||||
goto out;
|
||||
err = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
xskq_cons_release(xs->tx);
|
||||
|
||||
if (xp_mb_desc(&desc)) {
|
||||
xs->skb = skb;
|
||||
continue;
|
||||
}
|
||||
|
||||
err = __dev_direct_xmit(skb, xs->queue_id);
|
||||
if (err == NETDEV_TX_BUSY) {
|
||||
/* Tell user-space to retry the send */
|
||||
skb->destructor = sock_wfree;
|
||||
spin_lock_irqsave(&xs->pool->cq_lock, flags);
|
||||
xskq_prod_cancel(xs->pool->cq);
|
||||
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
|
||||
/* Free skb without triggering the perf drop trace */
|
||||
consume_skb(skb);
|
||||
xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
|
||||
xsk_consume_skb(skb);
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
xskq_cons_release(xs->tx);
|
||||
/* Ignore NET_XMIT_CN as packet might have been sent */
|
||||
if (err == NET_XMIT_DROP) {
|
||||
/* SKB completed but not sent */
|
||||
err = -EBUSY;
|
||||
xs->skb = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sent_frame = true;
|
||||
xs->skb = NULL;
|
||||
}
|
||||
|
||||
xs->tx->queue_empty_descs++;
|
||||
if (xskq_has_descs(xs->tx)) {
|
||||
if (xs->skb)
|
||||
xsk_drop_skb(xs->skb);
|
||||
xskq_cons_release(xs->tx);
|
||||
}
|
||||
|
||||
out:
|
||||
if (sent_frame)
|
||||
|
|
@ -834,6 +1025,9 @@ static int xsk_release(struct socket *sock)
|
|||
|
||||
net = sock_net(sk);
|
||||
|
||||
if (xs->skb)
|
||||
xsk_drop_skb(xs->skb);
|
||||
|
||||
mutex_lock(&net->xdp.lock);
|
||||
sk_del_node_init_rcu(sk);
|
||||
mutex_unlock(&net->xdp.lock);
|
||||
|
|
@ -897,7 +1091,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||
|
||||
flags = sxdp->sxdp_flags;
|
||||
if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
|
||||
XDP_USE_NEED_WAKEUP))
|
||||
XDP_USE_NEED_WAKEUP | XDP_USE_SG))
|
||||
return -EINVAL;
|
||||
|
||||
bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
|
||||
|
|
@ -929,7 +1123,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||
struct socket *sock;
|
||||
|
||||
if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
|
||||
(flags & XDP_USE_NEED_WAKEUP)) {
|
||||
(flags & XDP_USE_NEED_WAKEUP) || (flags & XDP_USE_SG)) {
|
||||
/* Cannot specify flags for shared sockets. */
|
||||
err = -EINVAL;
|
||||
goto out_unlock;
|
||||
|
|
@ -1028,6 +1222,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||
|
||||
xs->dev = dev;
|
||||
xs->zc = xs->umem->zc;
|
||||
xs->sg = !!(flags & XDP_USE_SG);
|
||||
xs->queue_id = qid;
|
||||
xp_add_xsk(xs->pool, xs);
|
||||
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
|
|||
pool->umem = umem;
|
||||
pool->addrs = umem->addrs;
|
||||
INIT_LIST_HEAD(&pool->free_list);
|
||||
INIT_LIST_HEAD(&pool->xskb_list);
|
||||
INIT_LIST_HEAD(&pool->xsk_tx_list);
|
||||
spin_lock_init(&pool->xsk_tx_list_lock);
|
||||
spin_lock_init(&pool->cq_lock);
|
||||
|
|
@ -99,6 +100,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
|
|||
xskb->pool = pool;
|
||||
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
|
||||
INIT_LIST_HEAD(&xskb->free_list_node);
|
||||
INIT_LIST_HEAD(&xskb->xskb_list_node);
|
||||
if (pool->unaligned)
|
||||
pool->free_heads[i] = xskb;
|
||||
else
|
||||
|
|
@ -187,6 +189,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
|
|||
goto err_unreg_pool;
|
||||
}
|
||||
|
||||
if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto err_unreg_pool;
|
||||
}
|
||||
|
||||
bpf.command = XDP_SETUP_XSK_POOL;
|
||||
bpf.xsk.pool = pool;
|
||||
bpf.xsk.queue_id = queue_id;
|
||||
|
|
|
|||
|
|
@ -48,6 +48,11 @@ struct xsk_queue {
|
|||
size_t ring_vmalloc_size;
|
||||
};
|
||||
|
||||
struct parsed_desc {
|
||||
u32 mb;
|
||||
u32 valid;
|
||||
};
|
||||
|
||||
/* The structure of the shared state of the rings are a simple
|
||||
* circular buffer, as outlined in
|
||||
* Documentation/core-api/circular-buffers.rst. For the Rx and
|
||||
|
|
@ -130,18 +135,26 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool xp_unused_options_set(u32 options)
|
||||
{
|
||||
return options & ~XDP_PKT_CONTD;
|
||||
}
|
||||
|
||||
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
|
||||
struct xdp_desc *desc)
|
||||
{
|
||||
u64 offset = desc->addr & (pool->chunk_size - 1);
|
||||
|
||||
if (!desc->len)
|
||||
return false;
|
||||
|
||||
if (offset + desc->len > pool->chunk_size)
|
||||
return false;
|
||||
|
||||
if (desc->addr >= pool->addrs_cnt)
|
||||
return false;
|
||||
|
||||
if (desc->options)
|
||||
if (xp_unused_options_set(desc->options))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
|
@ -151,6 +164,9 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
|
|||
{
|
||||
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
|
||||
|
||||
if (!desc->len)
|
||||
return false;
|
||||
|
||||
if (desc->len > pool->chunk_size)
|
||||
return false;
|
||||
|
||||
|
|
@ -158,7 +174,7 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
|
|||
xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
|
||||
return false;
|
||||
|
||||
if (desc->options)
|
||||
if (xp_unused_options_set(desc->options))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
|
@ -170,6 +186,11 @@ static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
|
|||
xp_aligned_validate_desc(pool, desc);
|
||||
}
|
||||
|
||||
static inline bool xskq_has_descs(struct xsk_queue *q)
|
||||
{
|
||||
return q->cached_cons != q->cached_prod;
|
||||
}
|
||||
|
||||
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
|
||||
struct xdp_desc *d,
|
||||
struct xsk_buff_pool *pool)
|
||||
|
|
@ -185,17 +206,15 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
|
|||
struct xdp_desc *desc,
|
||||
struct xsk_buff_pool *pool)
|
||||
{
|
||||
while (q->cached_cons != q->cached_prod) {
|
||||
if (q->cached_cons != q->cached_prod) {
|
||||
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
|
||||
u32 idx = q->cached_cons & q->ring_mask;
|
||||
|
||||
*desc = ring->desc[idx];
|
||||
if (xskq_cons_is_valid_desc(q, desc, pool))
|
||||
return true;
|
||||
|
||||
q->cached_cons++;
|
||||
return xskq_cons_is_valid_desc(q, desc, pool);
|
||||
}
|
||||
|
||||
q->queue_empty_descs++;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -204,30 +223,52 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
|
|||
q->cached_cons += cnt;
|
||||
}
|
||||
|
||||
static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
|
||||
u32 max)
|
||||
static inline void parse_desc(struct xsk_queue *q, struct xsk_buff_pool *pool,
|
||||
struct xdp_desc *desc, struct parsed_desc *parsed)
|
||||
{
|
||||
parsed->valid = xskq_cons_is_valid_desc(q, desc, pool);
|
||||
parsed->mb = xp_mb_desc(desc);
|
||||
}
|
||||
|
||||
static inline
|
||||
u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
|
||||
u32 max)
|
||||
{
|
||||
u32 cached_cons = q->cached_cons, nb_entries = 0;
|
||||
struct xdp_desc *descs = pool->tx_descs;
|
||||
u32 total_descs = 0, nr_frags = 0;
|
||||
|
||||
/* track first entry, if stumble upon *any* invalid descriptor, rewind
|
||||
* current packet that consists of frags and stop the processing
|
||||
*/
|
||||
while (cached_cons != q->cached_prod && nb_entries < max) {
|
||||
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
|
||||
u32 idx = cached_cons & q->ring_mask;
|
||||
struct parsed_desc parsed;
|
||||
|
||||
descs[nb_entries] = ring->desc[idx];
|
||||
if (unlikely(!xskq_cons_is_valid_desc(q, &descs[nb_entries], pool))) {
|
||||
/* Skip the entry */
|
||||
cached_cons++;
|
||||
continue;
|
||||
}
|
||||
|
||||
nb_entries++;
|
||||
cached_cons++;
|
||||
parse_desc(q, pool, &descs[nb_entries], &parsed);
|
||||
if (unlikely(!parsed.valid))
|
||||
break;
|
||||
|
||||
if (likely(!parsed.mb)) {
|
||||
total_descs += (nr_frags + 1);
|
||||
nr_frags = 0;
|
||||
} else {
|
||||
nr_frags++;
|
||||
if (nr_frags == pool->netdev->xdp_zc_max_segs) {
|
||||
nr_frags = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nb_entries++;
|
||||
}
|
||||
|
||||
cached_cons -= nr_frags;
|
||||
/* Release valid plus any invalid entries */
|
||||
xskq_cons_release_n(q, cached_cons - q->cached_cons);
|
||||
return nb_entries;
|
||||
return total_descs;
|
||||
}
|
||||
|
||||
/* Functions for consumers */
|
||||
|
|
@ -292,6 +333,11 @@ static inline void xskq_cons_release(struct xsk_queue *q)
|
|||
q->cached_cons++;
|
||||
}
|
||||
|
||||
static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt)
|
||||
{
|
||||
q->cached_cons -= cnt;
|
||||
}
|
||||
|
||||
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
|
||||
{
|
||||
/* No barriers needed since data is not accessed */
|
||||
|
|
@ -319,9 +365,9 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
|
|||
return xskq_prod_nb_free(q, 1) ? false : true;
|
||||
}
|
||||
|
||||
static inline void xskq_prod_cancel(struct xsk_queue *q)
|
||||
static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt)
|
||||
{
|
||||
q->cached_prod--;
|
||||
q->cached_prod -= cnt;
|
||||
}
|
||||
|
||||
static inline int xskq_prod_reserve(struct xsk_queue *q)
|
||||
|
|
@ -360,7 +406,7 @@ static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_de
|
|||
}
|
||||
|
||||
static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
|
||||
u64 addr, u32 len)
|
||||
u64 addr, u32 len, u32 flags)
|
||||
{
|
||||
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
|
||||
u32 idx;
|
||||
|
|
@ -372,6 +418,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
|
|||
idx = q->cached_prod++ & q->ring_mask;
|
||||
ring->desc[idx].addr = addr;
|
||||
ring->desc[idx].len = len;
|
||||
ring->desc[idx].options = flags;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -386,16 +433,6 @@ static inline void xskq_prod_submit(struct xsk_queue *q)
|
|||
__xskq_prod_submit(q, q->cached_prod);
|
||||
}
|
||||
|
||||
static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
|
||||
{
|
||||
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
|
||||
u32 idx = q->ring->producer;
|
||||
|
||||
ring->desc[idx++ & q->ring_mask] = addr;
|
||||
|
||||
__xskq_prod_submit(q, idx);
|
||||
}
|
||||
|
||||
static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
|
||||
{
|
||||
__xskq_prod_submit(q, q->ring->producer + nb_entries);
|
||||
|
|
|
|||
|
|
@ -8,11 +8,14 @@ Build dependencies
|
|||
==================
|
||||
|
||||
Compiling requires having installed:
|
||||
* clang >= version 3.4.0
|
||||
* llvm >= version 3.7.1
|
||||
* clang
|
||||
* llvm
|
||||
* pahole
|
||||
|
||||
Note that LLVM's tool 'llc' must support target 'bpf', list version
|
||||
and supported targets with command: ``llc --version``
|
||||
Consult :ref:`Documentation/process/changes.rst <changes>` for the minimum
|
||||
version numbers required and how to update them. Note that LLVM's tool
|
||||
'llc' must support target 'bpf', list version and supported targets with
|
||||
command: ``llc --version``
|
||||
|
||||
Clean and configuration
|
||||
-----------------------
|
||||
|
|
@ -24,7 +27,8 @@ after some changes (on demand)::
|
|||
make -C samples/bpf clean
|
||||
make clean
|
||||
|
||||
Configure kernel, defconfig for instance::
|
||||
Configure kernel, defconfig for instance
|
||||
(see "tools/testing/selftests/bpf/config" for a reference config)::
|
||||
|
||||
make defconfig
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
bpftool-net
|
||||
================
|
||||
-------------------------------------------------------------------------------
|
||||
tool for inspection of netdev/tc related bpf prog attachments
|
||||
tool for inspection of networking related bpf prog attachments
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
:Manual section: 8
|
||||
|
|
@ -37,10 +37,13 @@ DESCRIPTION
|
|||
**bpftool net { show | list }** [ **dev** *NAME* ]
|
||||
List bpf program attachments in the kernel networking subsystem.
|
||||
|
||||
Currently, only device driver xdp attachments and tc filter
|
||||
classification/action attachments are implemented, i.e., for
|
||||
program types **BPF_PROG_TYPE_SCHED_CLS**,
|
||||
**BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**.
|
||||
Currently, device driver xdp attachments, tcx and old-style tc
|
||||
classifier/action attachments, flow_dissector as well as netfilter
|
||||
attachments are implemented, i.e., for
|
||||
program types **BPF_PROG_TYPE_XDP**, **BPF_PROG_TYPE_SCHED_CLS**,
|
||||
**BPF_PROG_TYPE_SCHED_ACT**, **BPF_PROG_TYPE_FLOW_DISSECTOR**,
|
||||
**BPF_PROG_TYPE_NETFILTER**.
|
||||
|
||||
For programs attached to a particular cgroup, e.g.,
|
||||
**BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**,
|
||||
**BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
|
||||
|
|
@ -49,12 +52,13 @@ DESCRIPTION
|
|||
bpf programs, users should consult other tools, e.g., iproute2.
|
||||
|
||||
The current output will start with all xdp program attachments, followed by
|
||||
all tc class/qdisc bpf program attachments. Both xdp programs and
|
||||
tc programs are ordered based on ifindex number. If multiple bpf
|
||||
programs attached to the same networking device through **tc filter**,
|
||||
the order will be first all bpf programs attached to tc classes, then
|
||||
all bpf programs attached to non clsact qdiscs, and finally all
|
||||
bpf programs attached to root and clsact qdisc.
|
||||
all tcx, then tc class/qdisc bpf program attachments, then flow_dissector
|
||||
and finally netfilter programs. Both xdp programs and tcx/tc programs are
|
||||
ordered based on ifindex number. If multiple bpf programs attached
|
||||
to the same networking device through **tc**, the order will be first
|
||||
all bpf programs attached to tcx, then tc classes, then all bpf programs
|
||||
attached to non clsact qdiscs, and finally all bpf programs attached
|
||||
to root and clsact qdisc.
|
||||
|
||||
**bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
|
||||
Attach bpf program *PROG* to network interface *NAME* with
|
||||
|
|
|
|||
|
|
@ -76,6 +76,11 @@ static const char * const attach_type_strings[] = {
|
|||
[NET_ATTACH_TYPE_XDP_OFFLOAD] = "xdpoffload",
|
||||
};
|
||||
|
||||
static const char * const attach_loc_strings[] = {
|
||||
[BPF_TCX_INGRESS] = "tcx/ingress",
|
||||
[BPF_TCX_EGRESS] = "tcx/egress",
|
||||
};
|
||||
|
||||
const size_t net_attach_type_size = ARRAY_SIZE(attach_type_strings);
|
||||
|
||||
static enum net_attach_type parse_attach_type(const char *str)
|
||||
|
|
@ -422,8 +427,89 @@ static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb)
|
|||
filter_info->devname, filter_info->ifindex);
|
||||
}
|
||||
|
||||
static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
|
||||
struct ip_devname_ifindex *dev)
|
||||
static int __show_dev_tc_bpf_name(__u32 id, char *name, size_t len)
|
||||
{
|
||||
struct bpf_prog_info info = {};
|
||||
__u32 ilen = sizeof(info);
|
||||
int fd, ret;
|
||||
|
||||
fd = bpf_prog_get_fd_by_id(id);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
ret = bpf_obj_get_info_by_fd(fd, &info, &ilen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = -ENOENT;
|
||||
if (info.name[0]) {
|
||||
get_prog_full_name(&info, fd, name, len);
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev,
|
||||
const enum bpf_attach_type loc)
|
||||
{
|
||||
__u32 prog_flags[64] = {}, link_flags[64] = {}, i, j;
|
||||
__u32 prog_ids[64] = {}, link_ids[64] = {};
|
||||
LIBBPF_OPTS(bpf_prog_query_opts, optq);
|
||||
char prog_name[MAX_PROG_FULL_NAME];
|
||||
int ret;
|
||||
|
||||
optq.prog_ids = prog_ids;
|
||||
optq.prog_attach_flags = prog_flags;
|
||||
optq.link_ids = link_ids;
|
||||
optq.link_attach_flags = link_flags;
|
||||
optq.count = ARRAY_SIZE(prog_ids);
|
||||
|
||||
ret = bpf_prog_query_opts(dev->ifindex, loc, &optq);
|
||||
if (ret)
|
||||
return;
|
||||
for (i = 0; i < optq.count; i++) {
|
||||
NET_START_OBJECT;
|
||||
NET_DUMP_STR("devname", "%s", dev->devname);
|
||||
NET_DUMP_UINT("ifindex", "(%u)", dev->ifindex);
|
||||
NET_DUMP_STR("kind", " %s", attach_loc_strings[loc]);
|
||||
ret = __show_dev_tc_bpf_name(prog_ids[i], prog_name,
|
||||
sizeof(prog_name));
|
||||
if (!ret)
|
||||
NET_DUMP_STR("name", " %s", prog_name);
|
||||
NET_DUMP_UINT("prog_id", " prog_id %u ", prog_ids[i]);
|
||||
if (prog_flags[i] || json_output) {
|
||||
NET_START_ARRAY("prog_flags", "%s ");
|
||||
for (j = 0; prog_flags[i] && j < 32; j++) {
|
||||
if (!(prog_flags[i] & (1 << j)))
|
||||
continue;
|
||||
NET_DUMP_UINT_ONLY(1 << j);
|
||||
}
|
||||
NET_END_ARRAY("");
|
||||
}
|
||||
if (link_ids[i] || json_output) {
|
||||
NET_DUMP_UINT("link_id", "link_id %u ", link_ids[i]);
|
||||
if (link_flags[i] || json_output) {
|
||||
NET_START_ARRAY("link_flags", "%s ");
|
||||
for (j = 0; link_flags[i] && j < 32; j++) {
|
||||
if (!(link_flags[i] & (1 << j)))
|
||||
continue;
|
||||
NET_DUMP_UINT_ONLY(1 << j);
|
||||
}
|
||||
NET_END_ARRAY("");
|
||||
}
|
||||
}
|
||||
NET_END_OBJECT_FINAL;
|
||||
}
|
||||
}
|
||||
|
||||
static void show_dev_tc_bpf(struct ip_devname_ifindex *dev)
|
||||
{
|
||||
__show_dev_tc_bpf(dev, BPF_TCX_INGRESS);
|
||||
__show_dev_tc_bpf(dev, BPF_TCX_EGRESS);
|
||||
}
|
||||
|
||||
static int show_dev_tc_bpf_classic(int sock, unsigned int nl_pid,
|
||||
struct ip_devname_ifindex *dev)
|
||||
{
|
||||
struct bpf_filter_t filter_info;
|
||||
struct bpf_tcinfo_t tcinfo;
|
||||
|
|
@ -790,8 +876,9 @@ static int do_show(int argc, char **argv)
|
|||
if (!ret) {
|
||||
NET_START_ARRAY("tc", "%s:\n");
|
||||
for (i = 0; i < dev_array.used_len; i++) {
|
||||
ret = show_dev_tc_bpf(sock, nl_pid,
|
||||
&dev_array.devices[i]);
|
||||
show_dev_tc_bpf(&dev_array.devices[i]);
|
||||
ret = show_dev_tc_bpf_classic(sock, nl_pid,
|
||||
&dev_array.devices[i]);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
|
@ -839,7 +926,8 @@ static int do_help(int argc, char **argv)
|
|||
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
|
||||
" " HELP_SPEC_OPTIONS " }\n"
|
||||
"\n"
|
||||
"Note: Only xdp and tc attachments are supported now.\n"
|
||||
"Note: Only xdp, tcx, tc, flow_dissector and netfilter attachments\n"
|
||||
" are currently supported.\n"
|
||||
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
|
||||
" to dump program attachments. For program types\n"
|
||||
" sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
|
||||
|
|
|
|||
|
|
@ -76,6 +76,14 @@
|
|||
fprintf(stdout, fmt_str, val); \
|
||||
}
|
||||
|
||||
#define NET_DUMP_UINT_ONLY(str) \
|
||||
{ \
|
||||
if (json_output) \
|
||||
jsonw_uint(json_wtr, str); \
|
||||
else \
|
||||
fprintf(stdout, "%u ", str); \
|
||||
}
|
||||
|
||||
#define NET_DUMP_STR(name, fmt_str, str) \
|
||||
{ \
|
||||
if (json_output) \
|
||||
|
|
|
|||
|
|
@ -1036,6 +1036,8 @@ enum bpf_attach_type {
|
|||
BPF_LSM_CGROUP,
|
||||
BPF_STRUCT_OPS,
|
||||
BPF_NETFILTER,
|
||||
BPF_TCX_INGRESS,
|
||||
BPF_TCX_EGRESS,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
|
|
@ -1053,7 +1055,7 @@ enum bpf_link_type {
|
|||
BPF_LINK_TYPE_KPROBE_MULTI = 8,
|
||||
BPF_LINK_TYPE_STRUCT_OPS = 9,
|
||||
BPF_LINK_TYPE_NETFILTER = 10,
|
||||
|
||||
BPF_LINK_TYPE_TCX = 11,
|
||||
MAX_BPF_LINK_TYPE,
|
||||
};
|
||||
|
||||
|
|
@ -1113,7 +1115,12 @@ enum bpf_perf_event_type {
|
|||
*/
|
||||
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
|
||||
#define BPF_F_ALLOW_MULTI (1U << 1)
|
||||
/* Generic attachment flags. */
|
||||
#define BPF_F_REPLACE (1U << 2)
|
||||
#define BPF_F_BEFORE (1U << 3)
|
||||
#define BPF_F_AFTER (1U << 4)
|
||||
#define BPF_F_ID (1U << 5)
|
||||
#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
|
||||
|
||||
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
|
||||
* verifier will perform strict alignment checking as if the kernel
|
||||
|
|
@ -1444,14 +1451,19 @@ union bpf_attr {
|
|||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
|
||||
__u32 target_fd; /* container object to attach to */
|
||||
__u32 attach_bpf_fd; /* eBPF program to attach */
|
||||
union {
|
||||
__u32 target_fd; /* target object to attach to or ... */
|
||||
__u32 target_ifindex; /* target ifindex */
|
||||
};
|
||||
__u32 attach_bpf_fd;
|
||||
__u32 attach_type;
|
||||
__u32 attach_flags;
|
||||
__u32 replace_bpf_fd; /* previously attached eBPF
|
||||
* program to replace if
|
||||
* BPF_F_REPLACE is used
|
||||
*/
|
||||
__u32 replace_bpf_fd;
|
||||
union {
|
||||
__u32 relative_fd;
|
||||
__u32 relative_id;
|
||||
};
|
||||
__u64 expected_revision;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
|
||||
|
|
@ -1497,16 +1509,26 @@ union bpf_attr {
|
|||
} info;
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_QUERY command */
|
||||
__u32 target_fd; /* container object to query */
|
||||
union {
|
||||
__u32 target_fd; /* target object to query or ... */
|
||||
__u32 target_ifindex; /* target ifindex */
|
||||
};
|
||||
__u32 attach_type;
|
||||
__u32 query_flags;
|
||||
__u32 attach_flags;
|
||||
__aligned_u64 prog_ids;
|
||||
__u32 prog_cnt;
|
||||
union {
|
||||
__u32 prog_cnt;
|
||||
__u32 count;
|
||||
};
|
||||
__u32 :32;
|
||||
/* output: per-program attach_flags.
|
||||
* not allowed to be set during effective query.
|
||||
*/
|
||||
__aligned_u64 prog_attach_flags;
|
||||
__aligned_u64 link_ids;
|
||||
__aligned_u64 link_attach_flags;
|
||||
__u64 revision;
|
||||
} query;
|
||||
|
||||
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
|
||||
|
|
@ -1549,13 +1571,13 @@ union bpf_attr {
|
|||
__u32 map_fd; /* struct_ops to attach */
|
||||
};
|
||||
union {
|
||||
__u32 target_fd; /* object to attach to */
|
||||
__u32 target_ifindex; /* target ifindex */
|
||||
__u32 target_fd; /* target object to attach to or ... */
|
||||
__u32 target_ifindex; /* target ifindex */
|
||||
};
|
||||
__u32 attach_type; /* attach type */
|
||||
__u32 flags; /* extra flags */
|
||||
union {
|
||||
__u32 target_btf_id; /* btf_id of target to attach to */
|
||||
__u32 target_btf_id; /* btf_id of target to attach to */
|
||||
struct {
|
||||
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
|
||||
__u32 iter_info_len; /* iter_info length */
|
||||
|
|
@ -1589,6 +1611,13 @@ union bpf_attr {
|
|||
__s32 priority;
|
||||
__u32 flags;
|
||||
} netfilter;
|
||||
struct {
|
||||
union {
|
||||
__u32 relative_fd;
|
||||
__u32 relative_id;
|
||||
};
|
||||
__u64 expected_revision;
|
||||
} tcx;
|
||||
};
|
||||
} link_create;
|
||||
|
||||
|
|
@ -6197,6 +6226,19 @@ struct bpf_sock_tuple {
|
|||
};
|
||||
};
|
||||
|
||||
/* (Simplified) user return codes for tcx prog type.
|
||||
* A valid tcx program must return one of these defined values. All other
|
||||
* return codes are reserved for future use. Must remain compatible with
|
||||
* their TC_ACT_* counter-parts. For compatibility in behavior, unknown
|
||||
* return codes are mapped to TCX_NEXT.
|
||||
*/
|
||||
enum tcx_action_base {
|
||||
TCX_NEXT = -1,
|
||||
TCX_PASS = 0,
|
||||
TCX_DROP = 2,
|
||||
TCX_REDIRECT = 7,
|
||||
};
|
||||
|
||||
struct bpf_xdp_sock {
|
||||
__u32 queue_id;
|
||||
};
|
||||
|
|
@ -6479,6 +6521,10 @@ struct bpf_link_info {
|
|||
} event; /* BPF_PERF_EVENT_EVENT */
|
||||
};
|
||||
} perf_event;
|
||||
struct {
|
||||
__u32 ifindex;
|
||||
__u32 attach_type;
|
||||
} tcx;
|
||||
};
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
|
|
@ -7052,6 +7098,7 @@ struct bpf_list_head {
|
|||
struct bpf_list_node {
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_rb_root {
|
||||
|
|
@ -7063,6 +7110,7 @@ struct bpf_rb_node {
|
|||
__u64 :64;
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
__u64 :64;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_refcount {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,12 @@
|
|||
* application.
|
||||
*/
|
||||
#define XDP_USE_NEED_WAKEUP (1 << 3)
|
||||
/* By setting this option, userspace application indicates that it can
|
||||
* handle multiple descriptors per packet thus enabling xsk core to split
|
||||
* multi-buffer XDP frames into multiple Rx descriptors. Without this set
|
||||
* such frames will be dropped by xsk.
|
||||
*/
|
||||
#define XDP_USE_SG (1 << 4)
|
||||
|
||||
/* Flags for xsk_umem_config flags */
|
||||
#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
|
||||
|
|
@ -106,6 +112,9 @@ struct xdp_desc {
|
|||
__u32 options;
|
||||
};
|
||||
|
||||
/* Flag indicating packet constitutes of multiple buffers*/
|
||||
#define XDP_PKT_CONTD (1 << 0)
|
||||
|
||||
/* UMEM descriptor is __u64 */
|
||||
|
||||
#endif /* _LINUX_IF_XDP_H */
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ enum {
|
|||
NETDEV_A_DEV_IFINDEX = 1,
|
||||
NETDEV_A_DEV_PAD,
|
||||
NETDEV_A_DEV_XDP_FEATURES,
|
||||
NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
|
||||
|
||||
__NETDEV_A_DEV_MAX,
|
||||
NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
|
||||
|
|
|
|||
|
|
@ -629,55 +629,89 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
|
|||
return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
|
||||
}
|
||||
|
||||
int bpf_prog_attach_opts(int prog_fd, int target_fd,
|
||||
enum bpf_attach_type type,
|
||||
const struct bpf_prog_attach_opts *opts)
|
||||
int bpf_prog_attach_opts(int prog_fd, int target, enum bpf_attach_type type,
|
||||
const struct bpf_prog_attach_opts *opts)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, expected_revision);
|
||||
__u32 relative_id, flags;
|
||||
int ret, relative_fd;
|
||||
union bpf_attr attr;
|
||||
int ret;
|
||||
|
||||
if (!OPTS_VALID(opts, bpf_prog_attach_opts))
|
||||
return libbpf_err(-EINVAL);
|
||||
|
||||
relative_id = OPTS_GET(opts, relative_id, 0);
|
||||
relative_fd = OPTS_GET(opts, relative_fd, 0);
|
||||
flags = OPTS_GET(opts, flags, 0);
|
||||
|
||||
/* validate we don't have unexpected combinations of non-zero fields */
|
||||
if (relative_fd && relative_id)
|
||||
return libbpf_err(-EINVAL);
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
attr.target_fd = target_fd;
|
||||
attr.attach_bpf_fd = prog_fd;
|
||||
attr.attach_type = type;
|
||||
attr.attach_flags = OPTS_GET(opts, flags, 0);
|
||||
attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
|
||||
attr.target_fd = target;
|
||||
attr.attach_bpf_fd = prog_fd;
|
||||
attr.attach_type = type;
|
||||
attr.replace_bpf_fd = OPTS_GET(opts, replace_fd, 0);
|
||||
attr.expected_revision = OPTS_GET(opts, expected_revision, 0);
|
||||
|
||||
if (relative_id) {
|
||||
attr.attach_flags = flags | BPF_F_ID;
|
||||
attr.relative_id = relative_id;
|
||||
} else {
|
||||
attr.attach_flags = flags;
|
||||
attr.relative_fd = relative_fd;
|
||||
}
|
||||
|
||||
ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz);
|
||||
return libbpf_err_errno(ret);
|
||||
}
|
||||
|
||||
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
|
||||
int bpf_prog_detach_opts(int prog_fd, int target, enum bpf_attach_type type,
|
||||
const struct bpf_prog_detach_opts *opts)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, expected_revision);
|
||||
__u32 relative_id, flags;
|
||||
int ret, relative_fd;
|
||||
union bpf_attr attr;
|
||||
int ret;
|
||||
|
||||
if (!OPTS_VALID(opts, bpf_prog_detach_opts))
|
||||
return libbpf_err(-EINVAL);
|
||||
|
||||
relative_id = OPTS_GET(opts, relative_id, 0);
|
||||
relative_fd = OPTS_GET(opts, relative_fd, 0);
|
||||
flags = OPTS_GET(opts, flags, 0);
|
||||
|
||||
/* validate we don't have unexpected combinations of non-zero fields */
|
||||
if (relative_fd && relative_id)
|
||||
return libbpf_err(-EINVAL);
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
attr.target_fd = target_fd;
|
||||
attr.attach_type = type;
|
||||
attr.target_fd = target;
|
||||
attr.attach_bpf_fd = prog_fd;
|
||||
attr.attach_type = type;
|
||||
attr.expected_revision = OPTS_GET(opts, expected_revision, 0);
|
||||
|
||||
if (relative_id) {
|
||||
attr.attach_flags = flags | BPF_F_ID;
|
||||
attr.relative_id = relative_id;
|
||||
} else {
|
||||
attr.attach_flags = flags;
|
||||
attr.relative_fd = relative_fd;
|
||||
}
|
||||
|
||||
ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
|
||||
return libbpf_err_errno(ret);
|
||||
}
|
||||
|
||||
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
|
||||
{
|
||||
return bpf_prog_detach_opts(0, target_fd, type, NULL);
|
||||
}
|
||||
|
||||
int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
|
||||
union bpf_attr attr;
|
||||
int ret;
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
attr.target_fd = target_fd;
|
||||
attr.attach_bpf_fd = prog_fd;
|
||||
attr.attach_type = type;
|
||||
|
||||
ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
|
||||
return libbpf_err_errno(ret);
|
||||
return bpf_prog_detach_opts(prog_fd, target_fd, type, NULL);
|
||||
}
|
||||
|
||||
int bpf_link_create(int prog_fd, int target_fd,
|
||||
|
|
@ -685,9 +719,9 @@ int bpf_link_create(int prog_fd, int target_fd,
|
|||
const struct bpf_link_create_opts *opts)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, link_create);
|
||||
__u32 target_btf_id, iter_info_len;
|
||||
__u32 target_btf_id, iter_info_len, relative_id;
|
||||
int fd, err, relative_fd;
|
||||
union bpf_attr attr;
|
||||
int fd, err;
|
||||
|
||||
if (!OPTS_VALID(opts, bpf_link_create_opts))
|
||||
return libbpf_err(-EINVAL);
|
||||
|
|
@ -749,6 +783,22 @@ int bpf_link_create(int prog_fd, int target_fd,
|
|||
if (!OPTS_ZEROED(opts, netfilter))
|
||||
return libbpf_err(-EINVAL);
|
||||
break;
|
||||
case BPF_TCX_INGRESS:
|
||||
case BPF_TCX_EGRESS:
|
||||
relative_fd = OPTS_GET(opts, tcx.relative_fd, 0);
|
||||
relative_id = OPTS_GET(opts, tcx.relative_id, 0);
|
||||
if (relative_fd && relative_id)
|
||||
return libbpf_err(-EINVAL);
|
||||
if (relative_id) {
|
||||
attr.link_create.tcx.relative_id = relative_id;
|
||||
attr.link_create.flags |= BPF_F_ID;
|
||||
} else {
|
||||
attr.link_create.tcx.relative_fd = relative_fd;
|
||||
}
|
||||
attr.link_create.tcx.expected_revision = OPTS_GET(opts, tcx.expected_revision, 0);
|
||||
if (!OPTS_ZEROED(opts, tcx))
|
||||
return libbpf_err(-EINVAL);
|
||||
break;
|
||||
default:
|
||||
if (!OPTS_ZEROED(opts, flags))
|
||||
return libbpf_err(-EINVAL);
|
||||
|
|
@ -841,8 +891,7 @@ int bpf_iter_create(int link_fd)
|
|||
return libbpf_err_errno(fd);
|
||||
}
|
||||
|
||||
int bpf_prog_query_opts(int target_fd,
|
||||
enum bpf_attach_type type,
|
||||
int bpf_prog_query_opts(int target, enum bpf_attach_type type,
|
||||
struct bpf_prog_query_opts *opts)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, query);
|
||||
|
|
@ -853,18 +902,20 @@ int bpf_prog_query_opts(int target_fd,
|
|||
return libbpf_err(-EINVAL);
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
|
||||
attr.query.target_fd = target_fd;
|
||||
attr.query.attach_type = type;
|
||||
attr.query.query_flags = OPTS_GET(opts, query_flags, 0);
|
||||
attr.query.prog_cnt = OPTS_GET(opts, prog_cnt, 0);
|
||||
attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
|
||||
attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
|
||||
attr.query.target_fd = target;
|
||||
attr.query.attach_type = type;
|
||||
attr.query.query_flags = OPTS_GET(opts, query_flags, 0);
|
||||
attr.query.count = OPTS_GET(opts, count, 0);
|
||||
attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
|
||||
attr.query.link_ids = ptr_to_u64(OPTS_GET(opts, link_ids, NULL));
|
||||
attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
|
||||
attr.query.link_attach_flags = ptr_to_u64(OPTS_GET(opts, link_attach_flags, NULL));
|
||||
|
||||
ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz);
|
||||
|
||||
OPTS_SET(opts, attach_flags, attr.query.attach_flags);
|
||||
OPTS_SET(opts, prog_cnt, attr.query.prog_cnt);
|
||||
OPTS_SET(opts, revision, attr.query.revision);
|
||||
OPTS_SET(opts, count, attr.query.count);
|
||||
|
||||
return libbpf_err_errno(ret);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -312,22 +312,68 @@ LIBBPF_API int bpf_obj_get(const char *pathname);
|
|||
LIBBPF_API int bpf_obj_get_opts(const char *pathname,
|
||||
const struct bpf_obj_get_opts *opts);
|
||||
|
||||
struct bpf_prog_attach_opts {
|
||||
size_t sz; /* size of this struct for forward/backward compatibility */
|
||||
unsigned int flags;
|
||||
int replace_prog_fd;
|
||||
};
|
||||
#define bpf_prog_attach_opts__last_field replace_prog_fd
|
||||
|
||||
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
|
||||
enum bpf_attach_type type, unsigned int flags);
|
||||
LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
|
||||
enum bpf_attach_type type,
|
||||
const struct bpf_prog_attach_opts *opts);
|
||||
LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
|
||||
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
|
||||
enum bpf_attach_type type);
|
||||
|
||||
struct bpf_prog_attach_opts {
|
||||
size_t sz; /* size of this struct for forward/backward compatibility */
|
||||
__u32 flags;
|
||||
union {
|
||||
int replace_prog_fd;
|
||||
int replace_fd;
|
||||
};
|
||||
int relative_fd;
|
||||
__u32 relative_id;
|
||||
__u64 expected_revision;
|
||||
size_t :0;
|
||||
};
|
||||
#define bpf_prog_attach_opts__last_field expected_revision
|
||||
|
||||
struct bpf_prog_detach_opts {
|
||||
size_t sz; /* size of this struct for forward/backward compatibility */
|
||||
__u32 flags;
|
||||
int relative_fd;
|
||||
__u32 relative_id;
|
||||
__u64 expected_revision;
|
||||
size_t :0;
|
||||
};
|
||||
#define bpf_prog_detach_opts__last_field expected_revision
|
||||
|
||||
/**
|
||||
* @brief **bpf_prog_attach_opts()** attaches the BPF program corresponding to
|
||||
* *prog_fd* to a *target* which can represent a file descriptor or netdevice
|
||||
* ifindex.
|
||||
*
|
||||
* @param prog_fd BPF program file descriptor
|
||||
* @param target attach location file descriptor or ifindex
|
||||
* @param type attach type for the BPF program
|
||||
* @param opts options for configuring the attachment
|
||||
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||
* the error code)
|
||||
*/
|
||||
LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int target,
|
||||
enum bpf_attach_type type,
|
||||
const struct bpf_prog_attach_opts *opts);
|
||||
|
||||
/**
|
||||
* @brief **bpf_prog_detach_opts()** detaches the BPF program corresponding to
|
||||
* *prog_fd* from a *target* which can represent a file descriptor or netdevice
|
||||
* ifindex.
|
||||
*
|
||||
* @param prog_fd BPF program file descriptor
|
||||
* @param target detach location file descriptor or ifindex
|
||||
* @param type detach type for the BPF program
|
||||
* @param opts options for configuring the detachment
|
||||
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||
* the error code)
|
||||
*/
|
||||
LIBBPF_API int bpf_prog_detach_opts(int prog_fd, int target,
|
||||
enum bpf_attach_type type,
|
||||
const struct bpf_prog_detach_opts *opts);
|
||||
|
||||
union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
|
||||
struct bpf_link_create_opts {
|
||||
size_t sz; /* size of this struct for forward/backward compatibility */
|
||||
|
|
@ -355,6 +401,11 @@ struct bpf_link_create_opts {
|
|||
__s32 priority;
|
||||
__u32 flags;
|
||||
} netfilter;
|
||||
struct {
|
||||
__u32 relative_fd;
|
||||
__u32 relative_id;
|
||||
__u64 expected_revision;
|
||||
} tcx;
|
||||
};
|
||||
size_t :0;
|
||||
};
|
||||
|
|
@ -495,13 +546,31 @@ struct bpf_prog_query_opts {
|
|||
__u32 query_flags;
|
||||
__u32 attach_flags; /* output argument */
|
||||
__u32 *prog_ids;
|
||||
__u32 prog_cnt; /* input+output argument */
|
||||
union {
|
||||
/* input+output argument */
|
||||
__u32 prog_cnt;
|
||||
__u32 count;
|
||||
};
|
||||
__u32 *prog_attach_flags;
|
||||
__u32 *link_ids;
|
||||
__u32 *link_attach_flags;
|
||||
__u64 revision;
|
||||
size_t :0;
|
||||
};
|
||||
#define bpf_prog_query_opts__last_field prog_attach_flags
|
||||
#define bpf_prog_query_opts__last_field revision
|
||||
|
||||
LIBBPF_API int bpf_prog_query_opts(int target_fd,
|
||||
enum bpf_attach_type type,
|
||||
/**
|
||||
* @brief **bpf_prog_query_opts()** queries the BPF programs and BPF links
|
||||
* which are attached to *target* which can represent a file descriptor or
|
||||
* netdevice ifindex.
|
||||
*
|
||||
* @param target query location file descriptor or ifindex
|
||||
* @param type attach type for the BPF program
|
||||
* @param opts options for configuring the query
|
||||
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||
* the error code)
|
||||
*/
|
||||
LIBBPF_API int bpf_prog_query_opts(int target, enum bpf_attach_type type,
|
||||
struct bpf_prog_query_opts *opts);
|
||||
LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
|
||||
__u32 query_flags, __u32 *attach_flags,
|
||||
|
|
|
|||
|
|
@ -118,6 +118,8 @@ static const char * const attach_type_name[] = {
|
|||
[BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
|
||||
[BPF_STRUCT_OPS] = "struct_ops",
|
||||
[BPF_NETFILTER] = "netfilter",
|
||||
[BPF_TCX_INGRESS] = "tcx_ingress",
|
||||
[BPF_TCX_EGRESS] = "tcx_egress",
|
||||
};
|
||||
|
||||
static const char * const link_type_name[] = {
|
||||
|
|
@ -132,6 +134,7 @@ static const char * const link_type_name[] = {
|
|||
[BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
|
||||
[BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
|
||||
[BPF_LINK_TYPE_NETFILTER] = "netfilter",
|
||||
[BPF_LINK_TYPE_TCX] = "tcx",
|
||||
};
|
||||
|
||||
static const char * const map_type_name[] = {
|
||||
|
|
@ -8696,9 +8699,13 @@ static const struct bpf_sec_def section_defs[] = {
|
|||
SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
|
||||
SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
|
||||
SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt),
|
||||
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE),
|
||||
SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE),
|
||||
SEC_DEF("action", SCHED_ACT, 0, SEC_NONE),
|
||||
SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
|
||||
SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
|
||||
SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
|
||||
SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
|
||||
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
|
||||
SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
|
||||
SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
|
||||
SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
|
||||
SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
|
||||
SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
|
||||
|
|
@ -11848,11 +11855,10 @@ static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_li
|
|||
}
|
||||
|
||||
static struct bpf_link *
|
||||
bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
|
||||
const char *target_name)
|
||||
bpf_program_attach_fd(const struct bpf_program *prog,
|
||||
int target_fd, const char *target_name,
|
||||
const struct bpf_link_create_opts *opts)
|
||||
{
|
||||
DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
|
||||
.target_btf_id = btf_id);
|
||||
enum bpf_attach_type attach_type;
|
||||
char errmsg[STRERR_BUFSIZE];
|
||||
struct bpf_link *link;
|
||||
|
|
@ -11870,7 +11876,7 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id
|
|||
link->detach = &bpf_link__detach_fd;
|
||||
|
||||
attach_type = bpf_program__expected_attach_type(prog);
|
||||
link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
|
||||
link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
|
||||
if (link_fd < 0) {
|
||||
link_fd = -errno;
|
||||
free(link);
|
||||
|
|
@ -11886,19 +11892,54 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id
|
|||
struct bpf_link *
|
||||
bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
|
||||
{
|
||||
return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
|
||||
return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
|
||||
}
|
||||
|
||||
struct bpf_link *
|
||||
bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
|
||||
{
|
||||
return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
|
||||
return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
|
||||
{
|
||||
/* target_fd/target_ifindex use the same field in LINK_CREATE */
|
||||
return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
|
||||
return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
|
||||
}
|
||||
|
||||
struct bpf_link *
|
||||
bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
|
||||
const struct bpf_tcx_opts *opts)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
|
||||
__u32 relative_id;
|
||||
int relative_fd;
|
||||
|
||||
if (!OPTS_VALID(opts, bpf_tcx_opts))
|
||||
return libbpf_err_ptr(-EINVAL);
|
||||
|
||||
relative_id = OPTS_GET(opts, relative_id, 0);
|
||||
relative_fd = OPTS_GET(opts, relative_fd, 0);
|
||||
|
||||
/* validate we don't have unexpected combinations of non-zero fields */
|
||||
if (!ifindex) {
|
||||
pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
|
||||
prog->name);
|
||||
return libbpf_err_ptr(-EINVAL);
|
||||
}
|
||||
if (relative_fd && relative_id) {
|
||||
pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
|
||||
prog->name);
|
||||
return libbpf_err_ptr(-EINVAL);
|
||||
}
|
||||
|
||||
link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
|
||||
link_create_opts.tcx.relative_fd = relative_fd;
|
||||
link_create_opts.tcx.relative_id = relative_id;
|
||||
link_create_opts.flags = OPTS_GET(opts, flags, 0);
|
||||
|
||||
/* target_fd/target_ifindex use the same field in LINK_CREATE */
|
||||
return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
|
||||
|
|
@ -11920,11 +11961,16 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
|
|||
}
|
||||
|
||||
if (target_fd) {
|
||||
LIBBPF_OPTS(bpf_link_create_opts, target_opts);
|
||||
|
||||
btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
|
||||
if (btf_id < 0)
|
||||
return libbpf_err_ptr(btf_id);
|
||||
|
||||
return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
|
||||
target_opts.target_btf_id = btf_id;
|
||||
|
||||
return bpf_program_attach_fd(prog, target_fd, "freplace",
|
||||
&target_opts);
|
||||
} else {
|
||||
/* no target, so use raw_tracepoint_open for compatibility
|
||||
* with old kernels
|
||||
|
|
|
|||
|
|
@ -733,6 +733,21 @@ LIBBPF_API struct bpf_link *
|
|||
bpf_program__attach_netfilter(const struct bpf_program *prog,
|
||||
const struct bpf_netfilter_opts *opts);
|
||||
|
||||
struct bpf_tcx_opts {
|
||||
/* size of this struct, for forward/backward compatibility */
|
||||
size_t sz;
|
||||
__u32 flags;
|
||||
__u32 relative_fd;
|
||||
__u32 relative_id;
|
||||
__u64 expected_revision;
|
||||
size_t :0;
|
||||
};
|
||||
#define bpf_tcx_opts__last_field expected_revision
|
||||
|
||||
LIBBPF_API struct bpf_link *
|
||||
bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
|
||||
const struct bpf_tcx_opts *opts);
|
||||
|
||||
struct bpf_map;
|
||||
|
||||
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
|
||||
|
|
@ -1105,9 +1120,10 @@ struct bpf_xdp_query_opts {
|
|||
__u32 skb_prog_id; /* output */
|
||||
__u8 attach_mode; /* output */
|
||||
__u64 feature_flags; /* output */
|
||||
__u32 xdp_zc_max_segs; /* output */
|
||||
size_t :0;
|
||||
};
|
||||
#define bpf_xdp_query_opts__last_field feature_flags
|
||||
#define bpf_xdp_query_opts__last_field xdp_zc_max_segs
|
||||
|
||||
LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags,
|
||||
const struct bpf_xdp_attach_opts *opts);
|
||||
|
|
|
|||
|
|
@ -395,5 +395,7 @@ LIBBPF_1.2.0 {
|
|||
LIBBPF_1.3.0 {
|
||||
global:
|
||||
bpf_obj_pin_opts;
|
||||
bpf_prog_detach_opts;
|
||||
bpf_program__attach_netfilter;
|
||||
bpf_program__attach_tcx;
|
||||
} LIBBPF_1.2.0;
|
||||
|
|
|
|||
|
|
@ -70,4 +70,20 @@
|
|||
}; \
|
||||
})
|
||||
|
||||
/* Helper macro to clear and optionally reinitialize libbpf options struct
|
||||
*
|
||||
* Small helper macro to reset all fields and to reinitialize the common
|
||||
* structure size member. Values provided by users in struct initializer-
|
||||
* syntax as varargs can be provided as well to reinitialize options struct
|
||||
* specific members.
|
||||
*/
|
||||
#define LIBBPF_OPTS_RESET(NAME, ...) \
|
||||
do { \
|
||||
memset(&NAME, 0, sizeof(NAME)); \
|
||||
NAME = (typeof(NAME)) { \
|
||||
.sz = sizeof(NAME), \
|
||||
__VA_ARGS__ \
|
||||
}; \
|
||||
} while (0)
|
||||
|
||||
#endif /* __LIBBPF_LIBBPF_COMMON_H */
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ struct xdp_id_md {
|
|||
|
||||
struct xdp_features_md {
|
||||
int ifindex;
|
||||
__u32 xdp_zc_max_segs;
|
||||
__u64 flags;
|
||||
};
|
||||
|
||||
|
|
@ -421,6 +422,9 @@ static int parse_xdp_features(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
|
|||
return NL_CONT;
|
||||
|
||||
md->flags = libbpf_nla_getattr_u64(tb[NETDEV_A_DEV_XDP_FEATURES]);
|
||||
if (tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS])
|
||||
md->xdp_zc_max_segs =
|
||||
libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS]);
|
||||
return NL_DONE;
|
||||
}
|
||||
|
||||
|
|
@ -493,6 +497,7 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
|
|||
return libbpf_err(err);
|
||||
|
||||
opts->feature_flags = md.flags;
|
||||
opts->xdp_zc_max_segs = md.xdp_zc_max_segs;
|
||||
|
||||
skip_feature_flags:
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ static struct {
|
|||
"bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
|
||||
{ #test "_missing_lock_pop_back", \
|
||||
"bpf_spin_lock at off=" #off " must be held for bpf_list_head" },
|
||||
TEST(kptr, 32)
|
||||
TEST(kptr, 40)
|
||||
TEST(global, 16)
|
||||
TEST(map, 0)
|
||||
TEST(inner_map, 0)
|
||||
|
|
@ -31,7 +31,7 @@ static struct {
|
|||
#define TEST(test, op) \
|
||||
{ #test "_kptr_incorrect_lock_" #op, \
|
||||
"held lock and object are not in the same allocation\n" \
|
||||
"bpf_spin_lock at off=32 must be held for bpf_list_head" }, \
|
||||
"bpf_spin_lock at off=40 must be held for bpf_list_head" }, \
|
||||
{ #test "_global_incorrect_lock_" #op, \
|
||||
"held lock and object are not in the same allocation\n" \
|
||||
"bpf_spin_lock at off=16 must be held for bpf_list_head" }, \
|
||||
|
|
@ -84,23 +84,23 @@ static struct {
|
|||
{ "double_push_back", "arg#1 expected pointer to allocated object" },
|
||||
{ "no_node_value_type", "bpf_list_node not found at offset=0" },
|
||||
{ "incorrect_value_type",
|
||||
"operation on bpf_list_head expects arg#1 bpf_list_node at offset=40 in struct foo, "
|
||||
"operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, "
|
||||
"but arg is at offset=0 in struct bar" },
|
||||
{ "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
|
||||
{ "incorrect_node_off1", "bpf_list_node not found at offset=41" },
|
||||
{ "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=40 in struct foo" },
|
||||
{ "incorrect_node_off1", "bpf_list_node not found at offset=49" },
|
||||
{ "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=48 in struct foo" },
|
||||
{ "no_head_type", "bpf_list_head not found at offset=0" },
|
||||
{ "incorrect_head_var_off1", "R1 doesn't have constant offset" },
|
||||
{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
|
||||
{ "incorrect_head_off1", "bpf_list_head not found at offset=17" },
|
||||
{ "incorrect_head_off1", "bpf_list_head not found at offset=25" },
|
||||
{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
|
||||
{ "pop_front_off",
|
||||
"15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
|
||||
"R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
|
||||
"15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) "
|
||||
"R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n"
|
||||
"16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
|
||||
{ "pop_back_off",
|
||||
"15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
|
||||
"R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
|
||||
"15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) "
|
||||
"R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n"
|
||||
"16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
|
||||
};
|
||||
|
||||
|
|
@ -257,7 +257,7 @@ static struct btf *init_btf(void)
|
|||
hid = btf__add_struct(btf, "bpf_list_head", 16);
|
||||
if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head"))
|
||||
goto end;
|
||||
nid = btf__add_struct(btf, "bpf_list_node", 16);
|
||||
nid = btf__add_struct(btf, "bpf_list_node", 24);
|
||||
if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node"))
|
||||
goto end;
|
||||
return btf;
|
||||
|
|
@ -276,7 +276,7 @@ static void list_and_rb_node_same_struct(bool refcount_field)
|
|||
if (!ASSERT_OK_PTR(btf, "init_btf"))
|
||||
return;
|
||||
|
||||
bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 24);
|
||||
bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 32);
|
||||
if (!ASSERT_GT(bpf_rb_node_btf_id, 0, "btf__add_struct bpf_rb_node"))
|
||||
return;
|
||||
|
||||
|
|
@ -286,17 +286,17 @@ static void list_and_rb_node_same_struct(bool refcount_field)
|
|||
return;
|
||||
}
|
||||
|
||||
id = btf__add_struct(btf, "bar", refcount_field ? 44 : 40);
|
||||
id = btf__add_struct(btf, "bar", refcount_field ? 60 : 56);
|
||||
if (!ASSERT_GT(id, 0, "btf__add_struct bar"))
|
||||
return;
|
||||
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::a"))
|
||||
return;
|
||||
err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 128, 0);
|
||||
err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 192, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::c"))
|
||||
return;
|
||||
if (refcount_field) {
|
||||
err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 320, 0);
|
||||
err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 448, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::ref"))
|
||||
return;
|
||||
}
|
||||
|
|
@ -527,7 +527,7 @@ static void test_btf(void)
|
|||
btf = init_btf();
|
||||
if (!ASSERT_OK_PTR(btf, "init_btf"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "foo", 36);
|
||||
id = btf__add_struct(btf, "foo", 44);
|
||||
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
|
|
@ -536,7 +536,7 @@ static void test_btf(void)
|
|||
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::b"))
|
||||
break;
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::c"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
|
||||
|
|
@ -553,7 +553,7 @@ static void test_btf(void)
|
|||
btf = init_btf();
|
||||
if (!ASSERT_OK_PTR(btf, "init_btf"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "foo", 36);
|
||||
id = btf__add_struct(btf, "foo", 44);
|
||||
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
|
|
@ -562,13 +562,13 @@ static void test_btf(void)
|
|||
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::b"))
|
||||
break;
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::c"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
|
||||
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "bar", 36);
|
||||
id = btf__add_struct(btf, "bar", 44);
|
||||
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
|
|
@ -577,7 +577,7 @@ static void test_btf(void)
|
|||
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::b"))
|
||||
break;
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::c"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0);
|
||||
|
|
@ -594,19 +594,19 @@ static void test_btf(void)
|
|||
btf = init_btf();
|
||||
if (!ASSERT_OK_PTR(btf, "init_btf"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "foo", 20);
|
||||
id = btf__add_struct(btf, "foo", 28);
|
||||
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::a"))
|
||||
break;
|
||||
err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
|
||||
err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::b"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
|
||||
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "bar", 16);
|
||||
id = btf__add_struct(btf, "bar", 24);
|
||||
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
|
||||
|
|
@ -623,19 +623,19 @@ static void test_btf(void)
|
|||
btf = init_btf();
|
||||
if (!ASSERT_OK_PTR(btf, "init_btf"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "foo", 20);
|
||||
id = btf__add_struct(btf, "foo", 28);
|
||||
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::a"))
|
||||
break;
|
||||
err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
|
||||
err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::b"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
|
||||
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "bar", 36);
|
||||
id = btf__add_struct(btf, "bar", 44);
|
||||
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
|
|
@ -644,13 +644,13 @@ static void test_btf(void)
|
|||
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::b"))
|
||||
break;
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::c"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
|
||||
if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "baz", 16);
|
||||
id = btf__add_struct(btf, "baz", 24);
|
||||
if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
|
||||
|
|
@ -667,7 +667,7 @@ static void test_btf(void)
|
|||
btf = init_btf();
|
||||
if (!ASSERT_OK_PTR(btf, "init_btf"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "foo", 36);
|
||||
id = btf__add_struct(btf, "foo", 44);
|
||||
if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
|
|
@ -676,13 +676,13 @@ static void test_btf(void)
|
|||
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::b"))
|
||||
break;
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field foo::c"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
|
||||
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "bar", 36);
|
||||
id = btf__add_struct(btf, "bar", 44);
|
||||
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
|
|
@ -691,13 +691,13 @@ static void test_btf(void)
|
|||
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar:b"))
|
||||
break;
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar:c"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
|
||||
if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "baz", 16);
|
||||
id = btf__add_struct(btf, "baz", 24);
|
||||
if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
|
||||
|
|
@ -726,7 +726,7 @@ static void test_btf(void)
|
|||
id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
|
||||
if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "bar", 36);
|
||||
id = btf__add_struct(btf, "bar", 44);
|
||||
if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
|
|
@ -735,13 +735,13 @@ static void test_btf(void)
|
|||
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::b"))
|
||||
break;
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::c"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0);
|
||||
if (!ASSERT_EQ(id, 8, "btf__add_decl_tag"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "baz", 36);
|
||||
id = btf__add_struct(btf, "baz", 44);
|
||||
if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
|
||||
|
|
@ -750,13 +750,13 @@ static void test_btf(void)
|
|||
err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::b"))
|
||||
break;
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
|
||||
err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
|
||||
if (!ASSERT_OK(err, "btf__add_field bar::c"))
|
||||
break;
|
||||
id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0);
|
||||
if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a"))
|
||||
break;
|
||||
id = btf__add_struct(btf, "bam", 16);
|
||||
id = btf__add_struct(btf, "bam", 24);
|
||||
if (!ASSERT_EQ(id, 11, "btf__add_struct bam"))
|
||||
break;
|
||||
err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
|
||||
|
|
|
|||
|
|
@ -14,3 +14,7 @@ void test_refcounted_kptr(void)
|
|||
void test_refcounted_kptr_fail(void)
|
||||
{
|
||||
}
|
||||
|
||||
void test_refcounted_kptr_wrong_owner(void)
|
||||
{
|
||||
}
|
||||
|
|
|
|||
72
tools/testing/selftests/bpf/prog_tests/tc_helpers.h
Normal file
72
tools/testing/selftests/bpf/prog_tests/tc_helpers.h
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2023 Isovalent */
|
||||
#ifndef TC_HELPERS
|
||||
#define TC_HELPERS
|
||||
#include <test_progs.h>
|
||||
|
||||
static inline __u32 id_from_prog_fd(int fd)
|
||||
{
|
||||
struct bpf_prog_info prog_info = {};
|
||||
__u32 prog_info_len = sizeof(prog_info);
|
||||
int err;
|
||||
|
||||
err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
|
||||
if (!ASSERT_OK(err, "id_from_prog_fd"))
|
||||
return 0;
|
||||
|
||||
ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
|
||||
return prog_info.id;
|
||||
}
|
||||
|
||||
static inline __u32 id_from_link_fd(int fd)
|
||||
{
|
||||
struct bpf_link_info link_info = {};
|
||||
__u32 link_info_len = sizeof(link_info);
|
||||
int err;
|
||||
|
||||
err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
|
||||
if (!ASSERT_OK(err, "id_from_link_fd"))
|
||||
return 0;
|
||||
|
||||
ASSERT_NEQ(link_info.id, 0, "link_info.id");
|
||||
return link_info.id;
|
||||
}
|
||||
|
||||
static inline __u32 ifindex_from_link_fd(int fd)
|
||||
{
|
||||
struct bpf_link_info link_info = {};
|
||||
__u32 link_info_len = sizeof(link_info);
|
||||
int err;
|
||||
|
||||
err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
|
||||
if (!ASSERT_OK(err, "id_from_link_fd"))
|
||||
return 0;
|
||||
|
||||
return link_info.tcx.ifindex;
|
||||
}
|
||||
|
||||
static inline void __assert_mprog_count(int target, int expected, bool miniq, int ifindex)
|
||||
{
|
||||
__u32 count = 0, attach_flags = 0;
|
||||
int err;
|
||||
|
||||
err = bpf_prog_query(ifindex, target, 0, &attach_flags,
|
||||
NULL, &count);
|
||||
ASSERT_EQ(count, expected, "count");
|
||||
if (!expected && !miniq)
|
||||
ASSERT_EQ(err, -ENOENT, "prog_query");
|
||||
else
|
||||
ASSERT_EQ(err, 0, "prog_query");
|
||||
}
|
||||
|
||||
static inline void assert_mprog_count(int target, int expected)
|
||||
{
|
||||
__assert_mprog_count(target, expected, false, loopback);
|
||||
}
|
||||
|
||||
static inline void assert_mprog_count_ifindex(int ifindex, int target, int expected)
|
||||
{
|
||||
__assert_mprog_count(target, expected, false, ifindex);
|
||||
}
|
||||
|
||||
#endif /* TC_HELPERS */
|
||||
1583
tools/testing/selftests/bpf/prog_tests/tc_links.c
Normal file
1583
tools/testing/selftests/bpf/prog_tests/tc_links.c
Normal file
File diff suppressed because it is too large
Load diff
2239
tools/testing/selftests/bpf/prog_tests/tc_opts.c
Normal file
2239
tools/testing/selftests/bpf/prog_tests/tc_opts.c
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -103,6 +103,8 @@ struct {
|
|||
__type(value, __u32);
|
||||
} m_hash SEC(".maps");
|
||||
|
||||
__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
|
||||
|
||||
static inline int check_hash(void)
|
||||
{
|
||||
struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
|
||||
|
|
@ -115,6 +117,8 @@ static inline int check_hash(void)
|
|||
VERIFY(hash->elem_size == 64);
|
||||
|
||||
VERIFY(hash->count.counter == 0);
|
||||
VERIFY(bpf_map_sum_elem_count(map) == 0);
|
||||
|
||||
for (i = 0; i < HALF_ENTRIES; ++i) {
|
||||
const __u32 key = i;
|
||||
const __u32 val = 1;
|
||||
|
|
@ -123,6 +127,7 @@ static inline int check_hash(void)
|
|||
return 0;
|
||||
}
|
||||
VERIFY(hash->count.counter == HALF_ENTRIES);
|
||||
VERIFY(bpf_map_sum_elem_count(map) == HALF_ENTRIES);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ struct {
|
|||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct map_value);
|
||||
__uint(max_entries, 1);
|
||||
__uint(max_entries, 2);
|
||||
} stashed_nodes SEC(".maps");
|
||||
|
||||
struct node_acquire {
|
||||
|
|
@ -42,6 +42,9 @@ private(A) struct bpf_list_head head __contains(node_data, l);
|
|||
private(B) struct bpf_spin_lock alock;
|
||||
private(B) struct bpf_rb_root aroot __contains(node_acquire, node);
|
||||
|
||||
private(C) struct bpf_spin_lock block;
|
||||
private(C) struct bpf_rb_root broot __contains(node_data, r);
|
||||
|
||||
static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b)
|
||||
{
|
||||
struct node_data *a;
|
||||
|
|
@ -405,4 +408,93 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static long __stash_map_empty_xchg(struct node_data *n, int idx)
|
||||
{
|
||||
struct map_value *mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
|
||||
|
||||
if (!mapval) {
|
||||
bpf_obj_drop(n);
|
||||
return 1;
|
||||
}
|
||||
n = bpf_kptr_xchg(&mapval->node, n);
|
||||
if (n) {
|
||||
bpf_obj_drop(n);
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
long rbtree_wrong_owner_remove_fail_a1(void *ctx)
|
||||
{
|
||||
struct node_data *n, *m;
|
||||
|
||||
n = bpf_obj_new(typeof(*n));
|
||||
if (!n)
|
||||
return 1;
|
||||
m = bpf_refcount_acquire(n);
|
||||
|
||||
if (__stash_map_empty_xchg(n, 0)) {
|
||||
bpf_obj_drop(m);
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (__stash_map_empty_xchg(m, 1))
|
||||
return 3;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
long rbtree_wrong_owner_remove_fail_b(void *ctx)
|
||||
{
|
||||
struct map_value *mapval;
|
||||
struct node_data *n;
|
||||
int idx = 0;
|
||||
|
||||
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
|
||||
if (!mapval)
|
||||
return 1;
|
||||
|
||||
n = bpf_kptr_xchg(&mapval->node, NULL);
|
||||
if (!n)
|
||||
return 2;
|
||||
|
||||
bpf_spin_lock(&block);
|
||||
|
||||
bpf_rbtree_add(&broot, &n->r, less);
|
||||
|
||||
bpf_spin_unlock(&block);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
long rbtree_wrong_owner_remove_fail_a2(void *ctx)
|
||||
{
|
||||
struct map_value *mapval;
|
||||
struct bpf_rb_node *res;
|
||||
struct node_data *m;
|
||||
int idx = 1;
|
||||
|
||||
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
|
||||
if (!mapval)
|
||||
return 1;
|
||||
|
||||
m = bpf_kptr_xchg(&mapval->node, NULL);
|
||||
if (!m)
|
||||
return 2;
|
||||
bpf_spin_lock(&lock);
|
||||
|
||||
/* make m non-owning ref */
|
||||
bpf_list_push_back(&head, &m->l);
|
||||
res = bpf_rbtree_remove(&root, &m->r);
|
||||
|
||||
bpf_spin_unlock(&lock);
|
||||
if (res) {
|
||||
bpf_obj_drop(container_of(res, struct node_data, r));
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
|
|
|||
40
tools/testing/selftests/bpf/progs/test_tc_link.c
Normal file
40
tools/testing/selftests/bpf/progs/test_tc_link.c
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2023 Isovalent */
|
||||
#include <stdbool.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
char LICENSE[] SEC("license") = "GPL";
|
||||
|
||||
bool seen_tc1;
|
||||
bool seen_tc2;
|
||||
bool seen_tc3;
|
||||
bool seen_tc4;
|
||||
|
||||
SEC("tc/ingress")
|
||||
int tc1(struct __sk_buff *skb)
|
||||
{
|
||||
seen_tc1 = true;
|
||||
return TCX_NEXT;
|
||||
}
|
||||
|
||||
SEC("tc/egress")
|
||||
int tc2(struct __sk_buff *skb)
|
||||
{
|
||||
seen_tc2 = true;
|
||||
return TCX_NEXT;
|
||||
}
|
||||
|
||||
SEC("tc/egress")
|
||||
int tc3(struct __sk_buff *skb)
|
||||
{
|
||||
seen_tc3 = true;
|
||||
return TCX_NEXT;
|
||||
}
|
||||
|
||||
SEC("tc/egress")
|
||||
int tc4(struct __sk_buff *skb)
|
||||
{
|
||||
seen_tc4 = true;
|
||||
return TCX_NEXT;
|
||||
}
|
||||
|
|
@ -15,12 +15,12 @@ struct {
|
|||
static unsigned int idx;
|
||||
int count = 0;
|
||||
|
||||
SEC("xdp") int xsk_def_prog(struct xdp_md *xdp)
|
||||
SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
|
||||
{
|
||||
return bpf_redirect_map(&xsk, 0, XDP_DROP);
|
||||
}
|
||||
|
||||
SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp)
|
||||
SEC("xdp.frags") int xsk_xdp_drop(struct xdp_md *xdp)
|
||||
{
|
||||
/* Drop every other packet */
|
||||
if (idx++ % 2)
|
||||
|
|
@ -29,7 +29,7 @@ SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp)
|
|||
return bpf_redirect_map(&xsk, 0, XDP_DROP);
|
||||
}
|
||||
|
||||
SEC("xdp") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
|
||||
SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
|
||||
{
|
||||
void *data, *data_meta;
|
||||
struct xdp_info *meta;
|
||||
|
|
|
|||
|
|
@ -171,7 +171,10 @@ exec_xskxceiver
|
|||
|
||||
if [ -z $ETH ]; then
|
||||
cleanup_exit ${VETH0} ${VETH1}
|
||||
else
|
||||
cleanup_iface ${ETH} ${MTU}
|
||||
fi
|
||||
|
||||
TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL"
|
||||
busy_poll=1
|
||||
|
||||
|
|
@ -184,6 +187,8 @@ exec_xskxceiver
|
|||
|
||||
if [ -z $ETH ]; then
|
||||
cleanup_exit ${VETH0} ${VETH1}
|
||||
else
|
||||
cleanup_iface ${ETH} ${MTU}
|
||||
fi
|
||||
|
||||
failures=0
|
||||
|
|
|
|||
|
|
@ -18,17 +18,19 @@
|
|||
#include <linux/ethtool.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <linux/if_packet.h>
|
||||
#include <linux/if_xdp.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/sockios.h>
|
||||
#include <net/if.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <linux/if_link.h>
|
||||
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
|
|
@ -81,6 +83,12 @@ struct xsk_socket {
|
|||
int fd;
|
||||
};
|
||||
|
||||
struct nl_mtu_req {
|
||||
struct nlmsghdr nh;
|
||||
struct ifinfomsg msg;
|
||||
char buf[512];
|
||||
};
|
||||
|
||||
int xsk_umem__fd(const struct xsk_umem *umem)
|
||||
{
|
||||
return umem ? umem->fd : -EINVAL;
|
||||
|
|
@ -286,6 +294,132 @@ bool xsk_is_in_mode(u32 ifindex, int mode)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Lifted from netlink.c in tools/lib/bpf */
|
||||
static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
|
||||
{
|
||||
int len;
|
||||
|
||||
do {
|
||||
len = recvmsg(sock, mhdr, flags);
|
||||
} while (len < 0 && (errno == EINTR || errno == EAGAIN));
|
||||
|
||||
if (len < 0)
|
||||
return -errno;
|
||||
return len;
|
||||
}
|
||||
|
||||
/* Lifted from netlink.c in tools/lib/bpf */
|
||||
static int alloc_iov(struct iovec *iov, int len)
|
||||
{
|
||||
void *nbuf;
|
||||
|
||||
nbuf = realloc(iov->iov_base, len);
|
||||
if (!nbuf)
|
||||
return -ENOMEM;
|
||||
|
||||
iov->iov_base = nbuf;
|
||||
iov->iov_len = len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Original version lifted from netlink.c in tools/lib/bpf */
|
||||
static int netlink_recv(int sock)
|
||||
{
|
||||
struct iovec iov = {};
|
||||
struct msghdr mhdr = {
|
||||
.msg_iov = &iov,
|
||||
.msg_iovlen = 1,
|
||||
};
|
||||
bool multipart = true;
|
||||
struct nlmsgerr *err;
|
||||
struct nlmsghdr *nh;
|
||||
int len, ret;
|
||||
|
||||
ret = alloc_iov(&iov, 4096);
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
while (multipart) {
|
||||
multipart = false;
|
||||
len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
|
||||
if (len < 0) {
|
||||
ret = len;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (len > iov.iov_len) {
|
||||
ret = alloc_iov(&iov, len);
|
||||
if (ret)
|
||||
goto done;
|
||||
}
|
||||
|
||||
len = netlink_recvmsg(sock, &mhdr, 0);
|
||||
if (len < 0) {
|
||||
ret = len;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (len == 0)
|
||||
break;
|
||||
|
||||
for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
|
||||
nh = NLMSG_NEXT(nh, len)) {
|
||||
if (nh->nlmsg_flags & NLM_F_MULTI)
|
||||
multipart = true;
|
||||
switch (nh->nlmsg_type) {
|
||||
case NLMSG_ERROR:
|
||||
err = (struct nlmsgerr *)NLMSG_DATA(nh);
|
||||
if (!err->error)
|
||||
continue;
|
||||
ret = err->error;
|
||||
goto done;
|
||||
case NLMSG_DONE:
|
||||
ret = 0;
|
||||
goto done;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
done:
|
||||
free(iov.iov_base);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int xsk_set_mtu(int ifindex, int mtu)
|
||||
{
|
||||
struct nl_mtu_req req;
|
||||
struct rtattr *rta;
|
||||
int fd, ret;
|
||||
|
||||
fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
|
||||
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
|
||||
req.nh.nlmsg_type = RTM_NEWLINK;
|
||||
req.msg.ifi_family = AF_UNSPEC;
|
||||
req.msg.ifi_index = ifindex;
|
||||
rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
|
||||
rta->rta_type = IFLA_MTU;
|
||||
rta->rta_len = RTA_LENGTH(sizeof(unsigned int));
|
||||
req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + RTA_LENGTH(sizeof(mtu));
|
||||
memcpy(RTA_DATA(rta), &mtu, sizeof(mtu));
|
||||
|
||||
ret = send(fd, &req, req.nh.nlmsg_len, 0);
|
||||
if (ret < 0) {
|
||||
close(fd);
|
||||
return errno;
|
||||
}
|
||||
|
||||
ret = netlink_recv(fd);
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
|
||||
{
|
||||
int prog_fd;
|
||||
|
|
|
|||
|
|
@ -239,6 +239,8 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
|
|||
int xsk_umem__delete(struct xsk_umem *umem);
|
||||
void xsk_socket__delete(struct xsk_socket *xsk);
|
||||
|
||||
int xsk_set_mtu(int ifindex, int mtu);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -53,6 +53,13 @@ test_exit()
|
|||
exit 1
|
||||
}
|
||||
|
||||
cleanup_iface()
|
||||
{
|
||||
ip link set $1 mtu $2
|
||||
ip link set $1 xdp off
|
||||
ip link set $1 xdpgeneric off
|
||||
}
|
||||
|
||||
clear_configs()
|
||||
{
|
||||
[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
|
||||
|
|
|
|||
|
|
@ -49,8 +49,11 @@
|
|||
* h. tests for invalid and corner case Tx descriptors so that the correct ones
|
||||
* are discarded and let through, respectively.
|
||||
* i. 2K frame size tests
|
||||
*
|
||||
* Total tests: 12
|
||||
* j. If multi-buffer is supported, send 9k packets divided into 3 frames
|
||||
* k. If multi-buffer and huge pages are supported, send 9k packets in a single frame
|
||||
* using unaligned mode
|
||||
* l. If multi-buffer is supported, try various nasty combinations of descriptors to
|
||||
* check if they pass the validation or not
|
||||
*
|
||||
* Flow:
|
||||
* -----
|
||||
|
|
@ -73,10 +76,10 @@
|
|||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <getopt.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/netdev.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <net/if.h>
|
||||
#include <locale.h>
|
||||
|
|
@ -91,7 +94,6 @@
|
|||
#include <sys/socket.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "xsk_xdp_progs.skel.h"
|
||||
|
|
@ -253,6 +255,8 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
|
|||
cfg.bind_flags = ifobject->bind_flags;
|
||||
if (shared)
|
||||
cfg.bind_flags |= XDP_SHARED_UMEM;
|
||||
if (ifobject->pkt_stream && ifobject->mtu > MAX_ETH_PKT_SIZE)
|
||||
cfg.bind_flags |= XDP_USE_SG;
|
||||
|
||||
txr = ifobject->tx_on ? &xsk->tx : NULL;
|
||||
rxr = ifobject->rx_on ? &xsk->rx : NULL;
|
||||
|
|
@ -415,6 +419,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
|
|||
test->total_steps = 1;
|
||||
test->nb_sockets = 1;
|
||||
test->fail = false;
|
||||
test->mtu = MAX_ETH_PKT_SIZE;
|
||||
test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
|
||||
test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
|
||||
test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
|
||||
|
|
@ -468,6 +473,26 @@ static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *x
|
|||
test->xskmap_tx = xskmap_tx;
|
||||
}
|
||||
|
||||
static int test_spec_set_mtu(struct test_spec *test, int mtu)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (test->ifobj_rx->mtu != mtu) {
|
||||
err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
|
||||
if (err)
|
||||
return err;
|
||||
test->ifobj_rx->mtu = mtu;
|
||||
}
|
||||
if (test->ifobj_tx->mtu != mtu) {
|
||||
err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
|
||||
if (err)
|
||||
return err;
|
||||
test->ifobj_tx->mtu = mtu;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pkt_stream_reset(struct pkt_stream *pkt_stream)
|
||||
{
|
||||
if (pkt_stream)
|
||||
|
|
@ -533,23 +558,49 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
|
|||
return pkt_stream;
|
||||
}
|
||||
|
||||
static bool pkt_continues(u32 options)
|
||||
{
|
||||
return options & XDP_PKT_CONTD;
|
||||
}
|
||||
|
||||
static u32 ceil_u32(u32 a, u32 b)
|
||||
{
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
static u32 pkt_nb_frags(u32 frame_size, struct pkt *pkt)
|
||||
static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
|
||||
{
|
||||
if (!pkt || !pkt->valid)
|
||||
u32 nb_frags = 1, next_frag;
|
||||
|
||||
if (!pkt)
|
||||
return 1;
|
||||
return ceil_u32(pkt->len, frame_size);
|
||||
|
||||
if (!pkt_stream->verbatim) {
|
||||
if (!pkt->valid || !pkt->len)
|
||||
return 1;
|
||||
return ceil_u32(pkt->len, frame_size);
|
||||
}
|
||||
|
||||
/* Search for the end of the packet in verbatim mode */
|
||||
if (!pkt_continues(pkt->options))
|
||||
return nb_frags;
|
||||
|
||||
next_frag = pkt_stream->current_pkt_nb;
|
||||
pkt++;
|
||||
while (next_frag++ < pkt_stream->nb_pkts) {
|
||||
nb_frags++;
|
||||
if (!pkt_continues(pkt->options) || !pkt->valid)
|
||||
break;
|
||||
pkt++;
|
||||
}
|
||||
return nb_frags;
|
||||
}
|
||||
|
||||
static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len)
|
||||
{
|
||||
pkt->offset = offset;
|
||||
pkt->len = len;
|
||||
if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
|
||||
if (len > MAX_ETH_JUMBO_SIZE)
|
||||
pkt->valid = false;
|
||||
else
|
||||
pkt->valid = true;
|
||||
|
|
@ -637,6 +688,11 @@ static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
|
|||
return pkt->offset + umem_alloc_buffer(umem);
|
||||
}
|
||||
|
||||
static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
|
||||
{
|
||||
pkt_stream->current_pkt_nb--;
|
||||
}
|
||||
|
||||
static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb,
|
||||
u32 bytes_written)
|
||||
{
|
||||
|
|
@ -657,34 +713,59 @@ static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_n
|
|||
write_payload(data, pkt_nb, bytes_written, len);
|
||||
}
|
||||
|
||||
static void __pkt_stream_generate_custom(struct ifobject *ifobj,
|
||||
struct pkt *pkts, u32 nb_pkts)
|
||||
static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
|
||||
u32 nb_frames, bool verbatim)
|
||||
{
|
||||
u32 i, len = 0, pkt_nb = 0, payload = 0;
|
||||
struct pkt_stream *pkt_stream;
|
||||
u32 i;
|
||||
|
||||
pkt_stream = __pkt_stream_alloc(nb_pkts);
|
||||
pkt_stream = __pkt_stream_alloc(nb_frames);
|
||||
if (!pkt_stream)
|
||||
exit_with_error(ENOMEM);
|
||||
|
||||
for (i = 0; i < nb_pkts; i++) {
|
||||
struct pkt *pkt = &pkt_stream->pkts[i];
|
||||
for (i = 0; i < nb_frames; i++) {
|
||||
struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
|
||||
struct pkt *frame = &frames[i];
|
||||
|
||||
pkt->offset = pkts[i].offset;
|
||||
pkt->len = pkts[i].len;
|
||||
pkt->pkt_nb = i;
|
||||
pkt->valid = pkts[i].valid;
|
||||
if (pkt->len > pkt_stream->max_pkt_len)
|
||||
pkt->offset = frame->offset;
|
||||
if (verbatim) {
|
||||
*pkt = *frame;
|
||||
pkt->pkt_nb = payload;
|
||||
if (!frame->valid || !pkt_continues(frame->options))
|
||||
payload++;
|
||||
} else {
|
||||
if (frame->valid)
|
||||
len += frame->len;
|
||||
if (frame->valid && pkt_continues(frame->options))
|
||||
continue;
|
||||
|
||||
pkt->pkt_nb = pkt_nb;
|
||||
pkt->len = len;
|
||||
pkt->valid = frame->valid;
|
||||
pkt->options = 0;
|
||||
|
||||
len = 0;
|
||||
}
|
||||
|
||||
if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
|
||||
pkt_stream->max_pkt_len = pkt->len;
|
||||
pkt_nb++;
|
||||
}
|
||||
|
||||
ifobj->pkt_stream = pkt_stream;
|
||||
pkt_stream->nb_pkts = pkt_nb;
|
||||
pkt_stream->verbatim = verbatim;
|
||||
return pkt_stream;
|
||||
}
|
||||
|
||||
static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
|
||||
{
|
||||
__pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts);
|
||||
__pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts);
|
||||
struct pkt_stream *pkt_stream;
|
||||
|
||||
pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
|
||||
test->ifobj_tx->pkt_stream = pkt_stream;
|
||||
|
||||
pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
|
||||
test->ifobj_rx->pkt_stream = pkt_stream;
|
||||
}
|
||||
|
||||
static void pkt_print_data(u32 *data, u32 cnt)
|
||||
|
|
@ -765,43 +846,76 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
|
||||
static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
|
||||
u32 bytes_processed)
|
||||
{
|
||||
void *data = xsk_umem__get_data(buffer, addr);
|
||||
u32 seqnum, pkt_data;
|
||||
u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
|
||||
void *data = xsk_umem__get_data(umem->buffer, addr);
|
||||
|
||||
if (!pkt) {
|
||||
ksft_print_msg("[%s] too many packets received\n", __func__);
|
||||
addr -= umem->base_addr;
|
||||
|
||||
if (addr >= umem->num_frames * umem->frame_size ||
|
||||
addr + len > umem->num_frames * umem->frame_size) {
|
||||
ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
|
||||
return false;
|
||||
}
|
||||
if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
|
||||
ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len);
|
||||
return false;
|
||||
}
|
||||
|
||||
pkt_data = data;
|
||||
if (!bytes_processed) {
|
||||
pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
|
||||
len -= PKT_HDR_SIZE;
|
||||
} else {
|
||||
bytes_processed -= PKT_HDR_SIZE;
|
||||
}
|
||||
|
||||
expected_seqnum = bytes_processed / sizeof(*pkt_data);
|
||||
seqnum = ntohl(*pkt_data) & 0xffff;
|
||||
pkt_nb = ntohl(*pkt_data) >> 16;
|
||||
|
||||
if (expected_pkt_nb != pkt_nb) {
|
||||
ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
|
||||
__func__, expected_pkt_nb, pkt_nb);
|
||||
goto error;
|
||||
}
|
||||
if (expected_seqnum != seqnum) {
|
||||
ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
|
||||
__func__, expected_seqnum, seqnum);
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) {
|
||||
/* Do not try to verify packets that are smaller than minimum size. */
|
||||
return true;
|
||||
}
|
||||
|
||||
if (pkt->len != len) {
|
||||
ksft_print_msg("[%s] expected length [%d], got length [%d]\n",
|
||||
__func__, pkt->len, len);
|
||||
goto error;
|
||||
}
|
||||
|
||||
pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
|
||||
seqnum = pkt_data >> 16;
|
||||
|
||||
if (pkt->pkt_nb != seqnum) {
|
||||
ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
|
||||
__func__, pkt->pkt_nb, seqnum);
|
||||
words_to_end = len / sizeof(*pkt_data) - 1;
|
||||
pkt_data += words_to_end;
|
||||
seqnum = ntohl(*pkt_data) & 0xffff;
|
||||
expected_seqnum += words_to_end;
|
||||
if (expected_seqnum != seqnum) {
|
||||
ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
|
||||
__func__, expected_seqnum, seqnum);
|
||||
goto error;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
error:
|
||||
pkt_dump(data, len, true);
|
||||
pkt_dump(data, len, !bytes_processed);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
|
||||
{
|
||||
if (pkt->len != len) {
|
||||
ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
|
||||
__func__, pkt->len, len);
|
||||
pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kick_tx(struct xsk_socket_info *xsk)
|
||||
{
|
||||
int ret;
|
||||
|
|
@ -854,8 +968,8 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
|
|||
{
|
||||
struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
|
||||
struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream;
|
||||
u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0;
|
||||
struct xsk_socket_info *xsk = test->ifobj_rx->xsk;
|
||||
u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
|
||||
struct ifobject *ifobj = test->ifobj_rx;
|
||||
struct xsk_umem_info *umem = xsk->umem;
|
||||
struct pkt *pkt;
|
||||
|
|
@ -868,6 +982,9 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
|
|||
|
||||
pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
|
||||
while (pkt) {
|
||||
u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
|
||||
u64 first_addr;
|
||||
|
||||
ret = gettimeofday(&tv_now, NULL);
|
||||
if (ret)
|
||||
exit_with_error(errno);
|
||||
|
|
@ -888,7 +1005,6 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
|
|||
|
||||
ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
|
||||
return TEST_FAILURE;
|
||||
|
||||
}
|
||||
|
||||
if (!(fds->revents & POLLIN))
|
||||
|
|
@ -913,27 +1029,59 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < rcvd; i++) {
|
||||
while (frags_processed < rcvd) {
|
||||
const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
|
||||
u64 addr = desc->addr, orig;
|
||||
|
||||
orig = xsk_umem__extract_addr(addr);
|
||||
addr = xsk_umem__add_offset_to_addr(addr);
|
||||
|
||||
if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
|
||||
if (!pkt) {
|
||||
ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
|
||||
__func__, addr, desc->len);
|
||||
return TEST_FAILURE;
|
||||
}
|
||||
|
||||
if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
|
||||
!is_offset_correct(umem, pkt, addr) ||
|
||||
(ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr)))
|
||||
return TEST_FAILURE;
|
||||
|
||||
if (!nb_frags++)
|
||||
first_addr = addr;
|
||||
frags_processed++;
|
||||
pkt_len += desc->len;
|
||||
if (ifobj->use_fill_ring)
|
||||
*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
|
||||
|
||||
if (pkt_continues(desc->options))
|
||||
continue;
|
||||
|
||||
/* The complete packet has been received */
|
||||
if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
|
||||
!is_offset_correct(umem, pkt, addr))
|
||||
return TEST_FAILURE;
|
||||
|
||||
pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
|
||||
nb_frags = 0;
|
||||
pkt_len = 0;
|
||||
}
|
||||
|
||||
if (nb_frags) {
|
||||
/* In the middle of a packet. Start over from beginning of packet. */
|
||||
idx_rx -= nb_frags;
|
||||
xsk_ring_cons__cancel(&xsk->rx, nb_frags);
|
||||
if (ifobj->use_fill_ring) {
|
||||
idx_fq -= nb_frags;
|
||||
xsk_ring_prod__cancel(&umem->fq, nb_frags);
|
||||
}
|
||||
frags_processed -= nb_frags;
|
||||
}
|
||||
|
||||
if (ifobj->use_fill_ring)
|
||||
xsk_ring_prod__submit(&umem->fq, rcvd);
|
||||
xsk_ring_prod__submit(&umem->fq, frags_processed);
|
||||
if (ifobj->release_rx)
|
||||
xsk_ring_cons__release(&xsk->rx, rcvd);
|
||||
xsk_ring_cons__release(&xsk->rx, frags_processed);
|
||||
|
||||
pthread_mutex_lock(&pacing_mutex);
|
||||
pkts_in_flight -= pkts_sent;
|
||||
|
|
@ -946,13 +1094,14 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
|
|||
|
||||
static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout)
|
||||
{
|
||||
u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
|
||||
struct pkt_stream *pkt_stream = ifobject->pkt_stream;
|
||||
struct xsk_socket_info *xsk = ifobject->xsk;
|
||||
struct xsk_umem_info *umem = ifobject->umem;
|
||||
u32 i, idx = 0, valid_pkts = 0, buffer_len;
|
||||
bool use_poll = ifobject->use_poll;
|
||||
int ret;
|
||||
|
||||
buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len);
|
||||
buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
|
||||
/* pkts_in_flight might be negative if many invalid packets are sent */
|
||||
if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
|
||||
kick_tx(xsk);
|
||||
|
|
@ -983,17 +1132,49 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
|
|||
}
|
||||
|
||||
for (i = 0; i < BATCH_SIZE; i++) {
|
||||
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
|
||||
struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream);
|
||||
struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
|
||||
u32 nb_frags_left, nb_frags, bytes_written = 0;
|
||||
|
||||
if (!pkt)
|
||||
break;
|
||||
|
||||
tx_desc->addr = pkt_get_addr(pkt, umem);
|
||||
tx_desc->len = pkt->len;
|
||||
if (pkt->valid) {
|
||||
nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
|
||||
if (nb_frags > BATCH_SIZE - i) {
|
||||
pkt_stream_cancel(pkt_stream);
|
||||
xsk_ring_prod__cancel(&xsk->tx, BATCH_SIZE - i);
|
||||
break;
|
||||
}
|
||||
nb_frags_left = nb_frags;
|
||||
|
||||
while (nb_frags_left--) {
|
||||
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
|
||||
|
||||
tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
|
||||
if (pkt_stream->verbatim) {
|
||||
tx_desc->len = pkt->len;
|
||||
tx_desc->options = pkt->options;
|
||||
} else if (nb_frags_left) {
|
||||
tx_desc->len = umem->frame_size;
|
||||
tx_desc->options = XDP_PKT_CONTD;
|
||||
} else {
|
||||
tx_desc->len = pkt->len - bytes_written;
|
||||
tx_desc->options = 0;
|
||||
}
|
||||
if (pkt->valid)
|
||||
pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
|
||||
bytes_written);
|
||||
bytes_written += tx_desc->len;
|
||||
|
||||
if (nb_frags_left) {
|
||||
i++;
|
||||
if (pkt_stream->verbatim)
|
||||
pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
|
||||
}
|
||||
}
|
||||
|
||||
if (pkt && pkt->valid) {
|
||||
valid_pkts++;
|
||||
pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0);
|
||||
valid_frags += nb_frags;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1002,7 +1183,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
|
|||
pthread_mutex_unlock(&pacing_mutex);
|
||||
|
||||
xsk_ring_prod__submit(&xsk->tx, i);
|
||||
xsk->outstanding_tx += valid_pkts;
|
||||
xsk->outstanding_tx += valid_frags;
|
||||
|
||||
if (use_poll) {
|
||||
ret = poll(fds, 1, POLL_TMOUT);
|
||||
|
|
@ -1222,7 +1403,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream
|
|||
u64 addr;
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt); i++) {
|
||||
for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
|
||||
if (!pkt) {
|
||||
if (!fill_up)
|
||||
break;
|
||||
|
|
@ -1415,6 +1596,25 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
|
|||
struct ifobject *ifobj2)
|
||||
{
|
||||
pthread_t t0, t1;
|
||||
int err;
|
||||
|
||||
if (test->mtu > MAX_ETH_PKT_SIZE) {
|
||||
if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
|
||||
(ifobj2 && !ifobj2->multi_buff_zc_supp))) {
|
||||
ksft_test_result_skip("Multi buffer for zero-copy not supported.\n");
|
||||
return TEST_SKIP;
|
||||
}
|
||||
if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
|
||||
(ifobj2 && !ifobj2->multi_buff_supp))) {
|
||||
ksft_test_result_skip("Multi buffer not supported.\n");
|
||||
return TEST_SKIP;
|
||||
}
|
||||
}
|
||||
err = test_spec_set_mtu(test, test->mtu);
|
||||
if (err) {
|
||||
ksft_print_msg("Error, could not set mtu.\n");
|
||||
exit_with_error(err);
|
||||
}
|
||||
|
||||
if (ifobj2) {
|
||||
if (pthread_barrier_init(&barr, NULL, 2))
|
||||
|
|
@ -1616,6 +1816,16 @@ static int testapp_unaligned(struct test_spec *test)
|
|||
return testapp_validate_traffic(test);
|
||||
}
|
||||
|
||||
static int testapp_unaligned_mb(struct test_spec *test)
|
||||
{
|
||||
test_spec_set_name(test, "UNALIGNED_MODE_9K");
|
||||
test->mtu = MAX_ETH_JUMBO_SIZE;
|
||||
test->ifobj_tx->umem->unaligned_mode = true;
|
||||
test->ifobj_rx->umem->unaligned_mode = true;
|
||||
pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
|
||||
return testapp_validate_traffic(test);
|
||||
}
|
||||
|
||||
static int testapp_single_pkt(struct test_spec *test)
|
||||
{
|
||||
struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
|
||||
|
|
@ -1624,6 +1834,55 @@ static int testapp_single_pkt(struct test_spec *test)
|
|||
return testapp_validate_traffic(test);
|
||||
}
|
||||
|
||||
static int testapp_multi_buffer(struct test_spec *test)
|
||||
{
|
||||
test_spec_set_name(test, "RUN_TO_COMPLETION_9K_PACKETS");
|
||||
test->mtu = MAX_ETH_JUMBO_SIZE;
|
||||
pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
|
||||
|
||||
return testapp_validate_traffic(test);
|
||||
}
|
||||
|
||||
static int testapp_invalid_desc_mb(struct test_spec *test)
|
||||
{
|
||||
struct xsk_umem_info *umem = test->ifobj_tx->umem;
|
||||
u64 umem_size = umem->num_frames * umem->frame_size;
|
||||
struct pkt pkts[] = {
|
||||
/* Valid packet for synch to start with */
|
||||
{0, MIN_PKT_SIZE, 0, true, 0},
|
||||
/* Zero frame len is not legal */
|
||||
{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
|
||||
{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
|
||||
{0, 0, 0, false, 0},
|
||||
/* Invalid address in the second frame */
|
||||
{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
|
||||
{umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
|
||||
/* Invalid len in the middle */
|
||||
{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
|
||||
{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
|
||||
/* Invalid options in the middle */
|
||||
{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
|
||||
{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
|
||||
/* Transmit 2 frags, receive 3 */
|
||||
{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
|
||||
{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
|
||||
/* Middle frame crosses chunk boundary with small length */
|
||||
{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
|
||||
{-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
|
||||
/* Valid packet for synch so that something is received */
|
||||
{0, MIN_PKT_SIZE, 0, true, 0}};
|
||||
|
||||
if (umem->unaligned_mode) {
|
||||
/* Crossing a chunk boundary allowed */
|
||||
pkts[12].valid = true;
|
||||
pkts[13].valid = true;
|
||||
}
|
||||
|
||||
test->mtu = MAX_ETH_JUMBO_SIZE;
|
||||
pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
|
||||
return testapp_validate_traffic(test);
|
||||
}
|
||||
|
||||
static int testapp_invalid_desc(struct test_spec *test)
|
||||
{
|
||||
struct xsk_umem_info *umem = test->ifobj_tx->umem;
|
||||
|
|
@ -1690,7 +1949,6 @@ static int testapp_xdp_metadata_count(struct test_spec *test)
|
|||
int count = 0;
|
||||
int key = 0;
|
||||
|
||||
test_spec_set_name(test, "XDP_METADATA_COUNT");
|
||||
test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
|
||||
skel_tx->progs.xsk_xdp_populate_metadata,
|
||||
skel_rx->maps.xsk, skel_tx->maps.xsk);
|
||||
|
|
@ -1724,6 +1982,48 @@ static int testapp_poll_rxq_tmout(struct test_spec *test)
|
|||
return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
|
||||
}
|
||||
|
||||
static int testapp_too_many_frags(struct test_spec *test)
|
||||
{
|
||||
struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
|
||||
u32 max_frags, i;
|
||||
|
||||
test_spec_set_name(test, "TOO_MANY_FRAGS");
|
||||
if (test->mode == TEST_MODE_ZC)
|
||||
max_frags = test->ifobj_tx->xdp_zc_max_segs;
|
||||
else
|
||||
max_frags = XSK_DESC__MAX_SKB_FRAGS;
|
||||
|
||||
test->mtu = MAX_ETH_JUMBO_SIZE;
|
||||
|
||||
/* Valid packet for synch */
|
||||
pkts[0].len = MIN_PKT_SIZE;
|
||||
pkts[0].valid = true;
|
||||
|
||||
/* One valid packet with the max amount of frags */
|
||||
for (i = 1; i < max_frags + 1; i++) {
|
||||
pkts[i].len = MIN_PKT_SIZE;
|
||||
pkts[i].options = XDP_PKT_CONTD;
|
||||
pkts[i].valid = true;
|
||||
}
|
||||
pkts[max_frags].options = 0;
|
||||
|
||||
/* An invalid packet with the max amount of frags but signals packet
|
||||
* continues on the last frag
|
||||
*/
|
||||
for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
|
||||
pkts[i].len = MIN_PKT_SIZE;
|
||||
pkts[i].options = XDP_PKT_CONTD;
|
||||
pkts[i].valid = false;
|
||||
}
|
||||
|
||||
/* Valid packet for synch */
|
||||
pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
|
||||
pkts[2 * max_frags + 1].valid = true;
|
||||
|
||||
pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
|
||||
return testapp_validate_traffic(test);
|
||||
}
|
||||
|
||||
static int xsk_load_xdp_programs(struct ifobject *ifobj)
|
||||
{
|
||||
ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
|
||||
|
|
@ -1757,6 +2057,7 @@ static bool hugepages_present(void)
|
|||
static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
|
||||
thread_func_t func_ptr)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
|
||||
int err;
|
||||
|
||||
memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
|
||||
|
|
@ -1772,6 +2073,22 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
|
|||
|
||||
if (hugepages_present())
|
||||
ifobj->unaligned_supp = true;
|
||||
|
||||
err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
|
||||
if (err) {
|
||||
ksft_print_msg("Error querrying XDP capabilities\n");
|
||||
exit_with_error(-err);
|
||||
}
|
||||
if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
|
||||
ifobj->multi_buff_supp = true;
|
||||
if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
|
||||
if (query_opts.xdp_zc_max_segs > 1) {
|
||||
ifobj->multi_buff_zc_supp = true;
|
||||
ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
|
||||
} else {
|
||||
ifobj->xdp_zc_max_segs = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
|
||||
|
|
@ -1804,6 +2121,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
|
|||
test_spec_set_name(test, "RUN_TO_COMPLETION");
|
||||
ret = testapp_validate_traffic(test);
|
||||
break;
|
||||
case TEST_TYPE_RUN_TO_COMPLETION_MB:
|
||||
ret = testapp_multi_buffer(test);
|
||||
break;
|
||||
case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
|
||||
test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
|
||||
ret = testapp_single_pkt(test);
|
||||
|
|
@ -1866,9 +2186,22 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
|
|||
ret = testapp_invalid_desc(test);
|
||||
break;
|
||||
}
|
||||
case TEST_TYPE_ALIGNED_INV_DESC_MB:
|
||||
test_spec_set_name(test, "ALIGNED_INV_DESC_MULTI_BUFF");
|
||||
ret = testapp_invalid_desc_mb(test);
|
||||
break;
|
||||
case TEST_TYPE_UNALIGNED_INV_DESC_MB:
|
||||
test_spec_set_name(test, "UNALIGNED_INV_DESC_MULTI_BUFF");
|
||||
test->ifobj_tx->umem->unaligned_mode = true;
|
||||
test->ifobj_rx->umem->unaligned_mode = true;
|
||||
ret = testapp_invalid_desc_mb(test);
|
||||
break;
|
||||
case TEST_TYPE_UNALIGNED:
|
||||
ret = testapp_unaligned(test);
|
||||
break;
|
||||
case TEST_TYPE_UNALIGNED_MB:
|
||||
ret = testapp_unaligned_mb(test);
|
||||
break;
|
||||
case TEST_TYPE_HEADROOM:
|
||||
ret = testapp_headroom(test);
|
||||
break;
|
||||
|
|
@ -1876,8 +2209,17 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
|
|||
ret = testapp_xdp_drop(test);
|
||||
break;
|
||||
case TEST_TYPE_XDP_METADATA_COUNT:
|
||||
test_spec_set_name(test, "XDP_METADATA_COUNT");
|
||||
ret = testapp_xdp_metadata_count(test);
|
||||
break;
|
||||
case TEST_TYPE_XDP_METADATA_COUNT_MB:
|
||||
test_spec_set_name(test, "XDP_METADATA_COUNT_MULTI_BUFF");
|
||||
test->mtu = MAX_ETH_JUMBO_SIZE;
|
||||
ret = testapp_xdp_metadata_count(test);
|
||||
break;
|
||||
case TEST_TYPE_TOO_MANY_FRAGS:
|
||||
ret = testapp_too_many_frags(test);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,8 @@
|
|||
#define MAX_TEARDOWN_ITER 10
|
||||
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
|
||||
#define MIN_PKT_SIZE 64
|
||||
#define MAX_ETH_PKT_SIZE 1518
|
||||
#define MAX_ETH_JUMBO_SIZE 9000
|
||||
#define USLEEP_MAX 10000
|
||||
#define SOCK_RECONF_CTR 10
|
||||
#define BATCH_SIZE 64
|
||||
|
|
@ -47,7 +49,11 @@
|
|||
#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
|
||||
#define RX_FULL_RXQSIZE 32
|
||||
#define UMEM_HEADROOM_TEST_SIZE 128
|
||||
#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
|
||||
#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
|
||||
#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
|
||||
#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
|
||||
#define XSK_DESC__INVALID_OPTION (0xffff)
|
||||
#define XSK_DESC__MAX_SKB_FRAGS 18
|
||||
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
|
||||
#define PKT_DUMP_NB_TO_PRINT 16
|
||||
|
||||
|
|
@ -83,6 +89,12 @@ enum test_type {
|
|||
TEST_TYPE_BPF_RES,
|
||||
TEST_TYPE_XDP_DROP_HALF,
|
||||
TEST_TYPE_XDP_METADATA_COUNT,
|
||||
TEST_TYPE_XDP_METADATA_COUNT_MB,
|
||||
TEST_TYPE_RUN_TO_COMPLETION_MB,
|
||||
TEST_TYPE_UNALIGNED_MB,
|
||||
TEST_TYPE_ALIGNED_INV_DESC_MB,
|
||||
TEST_TYPE_UNALIGNED_INV_DESC_MB,
|
||||
TEST_TYPE_TOO_MANY_FRAGS,
|
||||
TEST_TYPE_MAX
|
||||
};
|
||||
|
||||
|
|
@ -115,6 +127,7 @@ struct pkt {
|
|||
u32 len;
|
||||
u32 pkt_nb;
|
||||
bool valid;
|
||||
u16 options;
|
||||
};
|
||||
|
||||
struct pkt_stream {
|
||||
|
|
@ -122,6 +135,7 @@ struct pkt_stream {
|
|||
u32 current_pkt_nb;
|
||||
struct pkt *pkts;
|
||||
u32 max_pkt_len;
|
||||
bool verbatim;
|
||||
};
|
||||
|
||||
struct ifobject;
|
||||
|
|
@ -141,7 +155,9 @@ struct ifobject {
|
|||
struct bpf_program *xdp_prog;
|
||||
enum test_mode mode;
|
||||
int ifindex;
|
||||
int mtu;
|
||||
u32 bind_flags;
|
||||
u32 xdp_zc_max_segs;
|
||||
bool tx_on;
|
||||
bool rx_on;
|
||||
bool use_poll;
|
||||
|
|
@ -151,6 +167,8 @@ struct ifobject {
|
|||
bool shared_umem;
|
||||
bool use_metadata;
|
||||
bool unaligned_supp;
|
||||
bool multi_buff_supp;
|
||||
bool multi_buff_zc_supp;
|
||||
u8 dst_mac[ETH_ALEN];
|
||||
u8 src_mac[ETH_ALEN];
|
||||
};
|
||||
|
|
@ -164,6 +182,7 @@ struct test_spec {
|
|||
struct bpf_program *xdp_prog_tx;
|
||||
struct bpf_map *xskmap_rx;
|
||||
struct bpf_map *xskmap_tx;
|
||||
int mtu;
|
||||
u16 total_steps;
|
||||
u16 current_step;
|
||||
u16 nb_sockets;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue