mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
io_uring-bpf-restrictions.4-20260206
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmmGJ1kQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpky8EAChIL3uJ5Vmv+oQTxT4EVb1wpc8U/XzXWU5 Q5F9IpZZCGO7+i015Y7iTTqDRixjblRaWpWzZZP8vflWDUS8LESNZLQdcoEnxaiv P367KNPUGwxejcKsu8PvZvfnX6JWSQoNstcDmrwkCF0ND2UUfvvMZyn3uKhkbBRY h5Ehcqkvqc1OJDAWC7+yPzYAmB01uRPQ6sc9/GeujznHPlfbvie4u6gBvvfXeirT 592zbVftINMrm6Twd6zl4n+HNAn+CUoyVMppeeddv5IcyFPm9uz/dLOZBXTz6552 jFYNmB0U4g+SxGXMyqp37YISTALnuY+57y5eXmEAtgkEeE3HrF+F/ZdxQHwXSpo3 T2Lb9IOqFyHtSvq678HZ37JB6aIYbBE/mZdNf8FFFpnPJGb5Ey7d50qPp/ywVq0H p9CahbpkzGUBMsZ+koew0YHiFdWV9tww+/Bnk5dTtn2197uyaHsLdmbf4C36GWke Bk5cwNgU+3DMFAfTiL9m+AIXYsJkBayRJn+hViTrF5AL7gcGiBryGF43FOSKoYuq f0mniDnGSwvn86VZPuZQ6wBRHZPEMR3OlaUXn6XrUU6cYyvMg0pBZV+QHF7zlsSP 2sdfUbPL5TxexF3G8dsxlDIypz9Z6TCoUCfU0WiiUETnCrVNkXfIY846A+w08p0b ejBjzrwRtQ== =CqJq -----END PGP SIGNATURE----- Merge tag 'io_uring-bpf-restrictions.4-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux Pull io_uring bpf filters from Jens Axboe: "This adds support for both cBPF filters for io_uring, as well as task inherited restrictions and filters. seccomp and io_uring don't play along nicely, as most of the interesting data to filter on resides somewhat out-of-band, in the submission queue ring. As a result, things like containers and systemd that apply seccomp filters, can't filter io_uring operations. That leaves them with just one choice if filtering is critical - filter the actual io_uring_setup(2) system call to simply disallow io_uring. That's rather unfortunate, and has limited us because of it. io_uring already has some filtering support. It requires the ring to be setup in a disabled state, and then a filter set can be applied. This filter set is completely bi-modal - an opcode is either enabled or it's not. Once a filter set is registered, the ring can be enabled. This is very restrictive, and it's not useful at all to systemd or containers which really want both broader and more specific control. This first adds support for cBPF filters for opcodes, which enables tighter control over what exactly a specific opcode may do. As examples, specific support is added for IORING_OP_OPENAT/OPENAT2, allowing filtering on resolve flags. And another example is added for IORING_OP_SOCKET, allowing filtering on domain/type/protocol. These are both common use cases. cBPF was chosen rather than eBPF, because the latter is often restricted in containers as well. These filters are run post the init phase of the request, which allows filters to even dip into data that is being passed in struct in user memory, as the init side of requests make that data stable by bringing it into the kernel. This allows filtering without needing to copy this data twice, or have filters etc know about the exact layout of the user data. The filters get the already copied and sanitized data passed. On top of that support is added for per-task filters, meaning that any ring created with a task that has a per-task filter will get those filters applied when it's created. These filters are inherited across fork as well. Once a filter has been registered, any further added filters may only further restrict what operations are permitted. Filters cannot change the return value of an operation, they can only permit or deny it based on the contents" * tag 'io_uring-bpf-restrictions.4-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring: allow registration of per-task restrictions io_uring: add task fork hook io_uring/bpf_filter: add ref counts to struct io_bpf_filter io_uring/bpf_filter: cache lookup table in ctx->bpf_filters io_uring/bpf_filter: allow filtering on contents of struct open_how io_uring/net: allow filtering on IORING_OP_SOCKET data io_uring: add support for BPF filtering for opcode restrictions
This commit is contained in:
commit
591beb0e3a
18 changed files with 789 additions and 10 deletions
|
|
@ -12,6 +12,7 @@ void __io_uring_free(struct task_struct *tsk);
|
|||
void io_uring_unreg_ringfd(void);
|
||||
const char *io_uring_get_opcode(u8 opcode);
|
||||
bool io_is_uring_fops(struct file *file);
|
||||
int __io_uring_fork(struct task_struct *tsk);
|
||||
|
||||
static inline void io_uring_files_cancel(void)
|
||||
{
|
||||
|
|
@ -25,9 +26,16 @@ static inline void io_uring_task_cancel(void)
|
|||
}
|
||||
static inline void io_uring_free(struct task_struct *tsk)
|
||||
{
|
||||
if (tsk->io_uring)
|
||||
if (tsk->io_uring || tsk->io_uring_restrict)
|
||||
__io_uring_free(tsk);
|
||||
}
|
||||
static inline int io_uring_fork(struct task_struct *tsk)
|
||||
{
|
||||
if (tsk->io_uring_restrict)
|
||||
return __io_uring_fork(tsk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static inline void io_uring_task_cancel(void)
|
||||
{
|
||||
|
|
@ -46,6 +54,10 @@ static inline bool io_is_uring_fops(struct file *file)
|
|||
{
|
||||
return false;
|
||||
}
|
||||
static inline int io_uring_fork(struct task_struct *tsk)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -219,9 +219,20 @@ struct io_rings {
|
|||
struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
|
||||
};
|
||||
|
||||
struct io_bpf_filter;
|
||||
struct io_bpf_filters {
|
||||
refcount_t refs; /* ref for ->bpf_filters */
|
||||
spinlock_t lock; /* protects ->bpf_filters modifications */
|
||||
struct io_bpf_filter __rcu **filters;
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
struct io_restriction {
|
||||
DECLARE_BITMAP(register_op, IORING_REGISTER_LAST);
|
||||
DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
|
||||
struct io_bpf_filters *bpf_filters;
|
||||
/* ->bpf_filters needs COW on modification */
|
||||
bool bpf_filters_cow;
|
||||
u8 sqe_flags_allowed;
|
||||
u8 sqe_flags_required;
|
||||
/* IORING_OP_* restrictions exist */
|
||||
|
|
@ -278,6 +289,8 @@ struct io_ring_ctx {
|
|||
|
||||
struct task_struct *submitter_task;
|
||||
struct io_rings *rings;
|
||||
/* cache of ->restrictions.bpf_filters->filters */
|
||||
struct io_bpf_filter __rcu **bpf_filters;
|
||||
struct percpu_ref refs;
|
||||
|
||||
clockid_t clockid;
|
||||
|
|
|
|||
|
|
@ -1186,6 +1186,7 @@ struct task_struct {
|
|||
|
||||
#ifdef CONFIG_IO_URING
|
||||
struct io_uring_task *io_uring;
|
||||
struct io_restriction *io_uring_restrict;
|
||||
#endif
|
||||
|
||||
/* Namespaces: */
|
||||
|
|
|
|||
|
|
@ -712,6 +712,9 @@ enum io_uring_register_op {
|
|||
/* auxiliary zcrx configuration, see enum zcrx_ctrl_op */
|
||||
IORING_REGISTER_ZCRX_CTRL = 36,
|
||||
|
||||
/* register bpf filtering programs */
|
||||
IORING_REGISTER_BPF_FILTER = 37,
|
||||
|
||||
/* this goes last */
|
||||
IORING_REGISTER_LAST,
|
||||
|
||||
|
|
@ -817,6 +820,13 @@ struct io_uring_restriction {
|
|||
__u32 resv2[3];
|
||||
};
|
||||
|
||||
struct io_uring_task_restriction {
|
||||
__u16 flags;
|
||||
__u16 nr_res;
|
||||
__u32 resv[3];
|
||||
__DECLARE_FLEX_ARRAY(struct io_uring_restriction, restrictions);
|
||||
};
|
||||
|
||||
struct io_uring_clock_register {
|
||||
__u32 clockid;
|
||||
__u32 __resv[3];
|
||||
|
|
|
|||
62
include/uapi/linux/io_uring/bpf_filter.h
Normal file
62
include/uapi/linux/io_uring/bpf_filter.h
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
|
||||
/*
|
||||
* Header file for the io_uring BPF filters.
|
||||
*/
|
||||
#ifndef LINUX_IO_URING_BPF_FILTER_H
|
||||
#define LINUX_IO_URING_BPF_FILTER_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* Struct passed to filters.
|
||||
*/
|
||||
struct io_uring_bpf_ctx {
|
||||
__u64 user_data;
|
||||
__u8 opcode;
|
||||
__u8 sqe_flags;
|
||||
__u8 pdu_size; /* size of aux data for filter */
|
||||
__u8 pad[5];
|
||||
union {
|
||||
struct {
|
||||
__u32 family;
|
||||
__u32 type;
|
||||
__u32 protocol;
|
||||
} socket;
|
||||
struct {
|
||||
__u64 flags;
|
||||
__u64 mode;
|
||||
__u64 resolve;
|
||||
} open;
|
||||
};
|
||||
};
|
||||
|
||||
enum {
|
||||
/*
|
||||
* If set, any currently unset opcode will have a deny filter attached
|
||||
*/
|
||||
IO_URING_BPF_FILTER_DENY_REST = 1,
|
||||
};
|
||||
|
||||
struct io_uring_bpf_filter {
|
||||
__u32 opcode; /* io_uring opcode to filter */
|
||||
__u32 flags;
|
||||
__u32 filter_len; /* number of BPF instructions */
|
||||
__u32 resv;
|
||||
__u64 filter_ptr; /* pointer to BPF filter */
|
||||
__u64 resv2[5];
|
||||
};
|
||||
|
||||
enum {
|
||||
IO_URING_BPF_CMD_FILTER = 1,
|
||||
};
|
||||
|
||||
struct io_uring_bpf {
|
||||
__u16 cmd_type; /* IO_URING_BPF_* values */
|
||||
__u16 cmd_flags; /* none so far */
|
||||
__u32 resv;
|
||||
union {
|
||||
struct io_uring_bpf_filter filter;
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -9,3 +9,8 @@ config IO_URING_ZCRX
|
|||
depends on PAGE_POOL
|
||||
depends on INET
|
||||
depends on NET_RX_BUSY_POLL
|
||||
|
||||
config IO_URING_BPF
|
||||
def_bool y
|
||||
depends on BPF
|
||||
depends on NET
|
||||
|
|
|
|||
|
|
@ -24,3 +24,4 @@ obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
|
|||
obj-$(CONFIG_NET) += net.o cmd_net.o
|
||||
obj-$(CONFIG_PROC_FS) += fdinfo.o
|
||||
obj-$(CONFIG_IO_URING_MOCK_FILE) += mock_file.o
|
||||
obj-$(CONFIG_IO_URING_BPF) += bpf_filter.o
|
||||
|
|
|
|||
430
io_uring/bpf_filter.c
Normal file
430
io_uring/bpf_filter.c
Normal file
|
|
@ -0,0 +1,430 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* BPF filter support for io_uring. Supports SQE opcodes for now.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
#include "io_uring.h"
|
||||
#include "bpf_filter.h"
|
||||
#include "net.h"
|
||||
#include "openclose.h"
|
||||
|
||||
struct io_bpf_filter {
|
||||
refcount_t refs;
|
||||
struct bpf_prog *prog;
|
||||
struct io_bpf_filter *next;
|
||||
};
|
||||
|
||||
/* Deny if this is set as the filter */
|
||||
static const struct io_bpf_filter dummy_filter;
|
||||
|
||||
static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
|
||||
struct io_kiocb *req)
|
||||
{
|
||||
bctx->opcode = req->opcode;
|
||||
bctx->sqe_flags = (__force int) req->flags & SQE_VALID_FLAGS;
|
||||
bctx->user_data = req->cqe.user_data;
|
||||
/* clear residual, anything from pdu_size and below */
|
||||
memset((void *) bctx + offsetof(struct io_uring_bpf_ctx, pdu_size), 0,
|
||||
sizeof(*bctx) - offsetof(struct io_uring_bpf_ctx, pdu_size));
|
||||
|
||||
/*
|
||||
* Opcodes can provide a handler fo populating more data into bctx,
|
||||
* for filters to use.
|
||||
*/
|
||||
switch (req->opcode) {
|
||||
case IORING_OP_SOCKET:
|
||||
bctx->pdu_size = sizeof(bctx->socket);
|
||||
io_socket_bpf_populate(bctx, req);
|
||||
break;
|
||||
case IORING_OP_OPENAT:
|
||||
case IORING_OP_OPENAT2:
|
||||
bctx->pdu_size = sizeof(bctx->open);
|
||||
io_openat_bpf_populate(bctx, req);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Run registered filters for a given opcode. For filters, a return of 0 denies
|
||||
* execution of the request, a return of 1 allows it. If any filter for an
|
||||
* opcode returns 0, filter processing is stopped, and the request is denied.
|
||||
* This also stops the processing of filters.
|
||||
*
|
||||
* __io_uring_run_bpf_filters() returns 0 on success, allow running the
|
||||
* request, and -EACCES when a request is denied.
|
||||
*/
|
||||
int __io_uring_run_bpf_filters(struct io_bpf_filter __rcu **filters,
|
||||
struct io_kiocb *req)
|
||||
{
|
||||
struct io_bpf_filter *filter;
|
||||
struct io_uring_bpf_ctx bpf_ctx;
|
||||
int ret;
|
||||
|
||||
/* Fast check for existence of filters outside of RCU */
|
||||
if (!rcu_access_pointer(filters[req->opcode]))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* req->opcode has already been validated to be within the range
|
||||
* of what we expect, io_init_req() does this.
|
||||
*/
|
||||
guard(rcu)();
|
||||
filter = rcu_dereference(filters[req->opcode]);
|
||||
if (!filter)
|
||||
return 0;
|
||||
else if (filter == &dummy_filter)
|
||||
return -EACCES;
|
||||
|
||||
io_uring_populate_bpf_ctx(&bpf_ctx, req);
|
||||
|
||||
/*
|
||||
* Iterate registered filters. The opcode is allowed IFF all filters
|
||||
* return 1. If any filter returns denied, opcode will be denied.
|
||||
*/
|
||||
do {
|
||||
if (filter == &dummy_filter)
|
||||
return -EACCES;
|
||||
ret = bpf_prog_run(filter->prog, &bpf_ctx);
|
||||
if (!ret)
|
||||
return -EACCES;
|
||||
filter = filter->next;
|
||||
} while (filter);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void io_free_bpf_filters(struct rcu_head *head)
|
||||
{
|
||||
struct io_bpf_filter __rcu **filter;
|
||||
struct io_bpf_filters *filters;
|
||||
int i;
|
||||
|
||||
filters = container_of(head, struct io_bpf_filters, rcu_head);
|
||||
scoped_guard(spinlock, &filters->lock) {
|
||||
filter = filters->filters;
|
||||
if (!filter)
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < IORING_OP_LAST; i++) {
|
||||
struct io_bpf_filter *f;
|
||||
|
||||
rcu_read_lock();
|
||||
f = rcu_dereference(filter[i]);
|
||||
while (f) {
|
||||
struct io_bpf_filter *next = f->next;
|
||||
|
||||
/*
|
||||
* Even if stacked, dummy filter will always be last
|
||||
* as it can only get installed into an empty spot.
|
||||
*/
|
||||
if (f == &dummy_filter)
|
||||
break;
|
||||
|
||||
/* Someone still holds a ref, stop iterating. */
|
||||
if (!refcount_dec_and_test(&f->refs))
|
||||
break;
|
||||
|
||||
bpf_prog_destroy(f->prog);
|
||||
kfree(f);
|
||||
f = next;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
kfree(filters->filters);
|
||||
kfree(filters);
|
||||
}
|
||||
|
||||
static void __io_put_bpf_filters(struct io_bpf_filters *filters)
|
||||
{
|
||||
if (refcount_dec_and_test(&filters->refs))
|
||||
call_rcu(&filters->rcu_head, io_free_bpf_filters);
|
||||
}
|
||||
|
||||
void io_put_bpf_filters(struct io_restriction *res)
|
||||
{
|
||||
if (res->bpf_filters)
|
||||
__io_put_bpf_filters(res->bpf_filters);
|
||||
}
|
||||
|
||||
static struct io_bpf_filters *io_new_bpf_filters(void)
|
||||
{
|
||||
struct io_bpf_filters *filters __free(kfree) = NULL;
|
||||
|
||||
filters = kzalloc(sizeof(*filters), GFP_KERNEL_ACCOUNT);
|
||||
if (!filters)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
filters->filters = kcalloc(IORING_OP_LAST,
|
||||
sizeof(struct io_bpf_filter *),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!filters->filters)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
refcount_set(&filters->refs, 1);
|
||||
spin_lock_init(&filters->lock);
|
||||
return no_free_ptr(filters);
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate classic BPF filter instructions. Only allow a safe subset of
|
||||
* operations - no packet data access, just context field loads and basic
|
||||
* ALU/jump operations.
|
||||
*/
|
||||
static int io_uring_check_cbpf_filter(struct sock_filter *filter,
|
||||
unsigned int flen)
|
||||
{
|
||||
int pc;
|
||||
|
||||
for (pc = 0; pc < flen; pc++) {
|
||||
struct sock_filter *ftest = &filter[pc];
|
||||
u16 code = ftest->code;
|
||||
u32 k = ftest->k;
|
||||
|
||||
switch (code) {
|
||||
case BPF_LD | BPF_W | BPF_ABS:
|
||||
ftest->code = BPF_LDX | BPF_W | BPF_ABS;
|
||||
/* 32-bit aligned and not out of bounds. */
|
||||
if (k >= sizeof(struct io_uring_bpf_ctx) || k & 3)
|
||||
return -EINVAL;
|
||||
continue;
|
||||
case BPF_LD | BPF_W | BPF_LEN:
|
||||
ftest->code = BPF_LD | BPF_IMM;
|
||||
ftest->k = sizeof(struct io_uring_bpf_ctx);
|
||||
continue;
|
||||
case BPF_LDX | BPF_W | BPF_LEN:
|
||||
ftest->code = BPF_LDX | BPF_IMM;
|
||||
ftest->k = sizeof(struct io_uring_bpf_ctx);
|
||||
continue;
|
||||
/* Explicitly include allowed calls. */
|
||||
case BPF_RET | BPF_K:
|
||||
case BPF_RET | BPF_A:
|
||||
case BPF_ALU | BPF_ADD | BPF_K:
|
||||
case BPF_ALU | BPF_ADD | BPF_X:
|
||||
case BPF_ALU | BPF_SUB | BPF_K:
|
||||
case BPF_ALU | BPF_SUB | BPF_X:
|
||||
case BPF_ALU | BPF_MUL | BPF_K:
|
||||
case BPF_ALU | BPF_MUL | BPF_X:
|
||||
case BPF_ALU | BPF_DIV | BPF_K:
|
||||
case BPF_ALU | BPF_DIV | BPF_X:
|
||||
case BPF_ALU | BPF_AND | BPF_K:
|
||||
case BPF_ALU | BPF_AND | BPF_X:
|
||||
case BPF_ALU | BPF_OR | BPF_K:
|
||||
case BPF_ALU | BPF_OR | BPF_X:
|
||||
case BPF_ALU | BPF_XOR | BPF_K:
|
||||
case BPF_ALU | BPF_XOR | BPF_X:
|
||||
case BPF_ALU | BPF_LSH | BPF_K:
|
||||
case BPF_ALU | BPF_LSH | BPF_X:
|
||||
case BPF_ALU | BPF_RSH | BPF_K:
|
||||
case BPF_ALU | BPF_RSH | BPF_X:
|
||||
case BPF_ALU | BPF_NEG:
|
||||
case BPF_LD | BPF_IMM:
|
||||
case BPF_LDX | BPF_IMM:
|
||||
case BPF_MISC | BPF_TAX:
|
||||
case BPF_MISC | BPF_TXA:
|
||||
case BPF_LD | BPF_MEM:
|
||||
case BPF_LDX | BPF_MEM:
|
||||
case BPF_ST:
|
||||
case BPF_STX:
|
||||
case BPF_JMP | BPF_JA:
|
||||
case BPF_JMP | BPF_JEQ | BPF_K:
|
||||
case BPF_JMP | BPF_JEQ | BPF_X:
|
||||
case BPF_JMP | BPF_JGE | BPF_K:
|
||||
case BPF_JMP | BPF_JGE | BPF_X:
|
||||
case BPF_JMP | BPF_JGT | BPF_K:
|
||||
case BPF_JMP | BPF_JGT | BPF_X:
|
||||
case BPF_JMP | BPF_JSET | BPF_K:
|
||||
case BPF_JMP | BPF_JSET | BPF_X:
|
||||
continue;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void io_bpf_filter_clone(struct io_restriction *dst, struct io_restriction *src)
|
||||
{
|
||||
if (!src->bpf_filters)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* If the src filter is going away, just ignore it.
|
||||
*/
|
||||
if (refcount_inc_not_zero(&src->bpf_filters->refs)) {
|
||||
dst->bpf_filters = src->bpf_filters;
|
||||
dst->bpf_filters_cow = true;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new struct io_bpf_filters. Used when a filter is cloned and
|
||||
* modifications need to be made.
|
||||
*/
|
||||
static struct io_bpf_filters *io_bpf_filter_cow(struct io_restriction *src)
|
||||
{
|
||||
struct io_bpf_filters *filters;
|
||||
struct io_bpf_filter *srcf;
|
||||
int i;
|
||||
|
||||
filters = io_new_bpf_filters();
|
||||
if (IS_ERR(filters))
|
||||
return filters;
|
||||
|
||||
/*
|
||||
* Iterate filters from src and assign in destination. Grabbing
|
||||
* a reference is enough, we don't need to duplicate the memory.
|
||||
* This is safe because filters are only ever appended to the
|
||||
* front of the list, hence the only memory ever touched inside
|
||||
* a filter is the refcount.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < IORING_OP_LAST; i++) {
|
||||
srcf = rcu_dereference(src->bpf_filters->filters[i]);
|
||||
if (!srcf) {
|
||||
continue;
|
||||
} else if (srcf == &dummy_filter) {
|
||||
rcu_assign_pointer(filters->filters[i], &dummy_filter);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Getting a ref on the first node is enough, putting the
|
||||
* filter and iterating nodes to free will stop on the first
|
||||
* one that doesn't hit zero when dropping.
|
||||
*/
|
||||
if (!refcount_inc_not_zero(&srcf->refs))
|
||||
goto err;
|
||||
rcu_assign_pointer(filters->filters[i], srcf);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return filters;
|
||||
err:
|
||||
rcu_read_unlock();
|
||||
__io_put_bpf_filters(filters);
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
|
||||
#define IO_URING_BPF_FILTER_FLAGS IO_URING_BPF_FILTER_DENY_REST
|
||||
|
||||
int io_register_bpf_filter(struct io_restriction *res,
|
||||
struct io_uring_bpf __user *arg)
|
||||
{
|
||||
struct io_bpf_filters *filters, *old_filters = NULL;
|
||||
struct io_bpf_filter *filter, *old_filter;
|
||||
struct io_uring_bpf reg;
|
||||
struct bpf_prog *prog;
|
||||
struct sock_fprog fprog;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(®, arg, sizeof(reg)))
|
||||
return -EFAULT;
|
||||
if (reg.cmd_type != IO_URING_BPF_CMD_FILTER)
|
||||
return -EINVAL;
|
||||
if (reg.cmd_flags || reg.resv)
|
||||
return -EINVAL;
|
||||
|
||||
if (reg.filter.opcode >= IORING_OP_LAST)
|
||||
return -EINVAL;
|
||||
if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
|
||||
return -EINVAL;
|
||||
if (reg.filter.resv)
|
||||
return -EINVAL;
|
||||
if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2)))
|
||||
return -EINVAL;
|
||||
if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS)
|
||||
return -EINVAL;
|
||||
|
||||
fprog.len = reg.filter.filter_len;
|
||||
fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr);
|
||||
|
||||
ret = bpf_prog_create_from_user(&prog, &fprog,
|
||||
io_uring_check_cbpf_filter, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* No existing filters, allocate set.
|
||||
*/
|
||||
filters = res->bpf_filters;
|
||||
if (!filters) {
|
||||
filters = io_new_bpf_filters();
|
||||
if (IS_ERR(filters)) {
|
||||
ret = PTR_ERR(filters);
|
||||
goto err_prog;
|
||||
}
|
||||
} else if (res->bpf_filters_cow) {
|
||||
filters = io_bpf_filter_cow(res);
|
||||
if (IS_ERR(filters)) {
|
||||
ret = PTR_ERR(filters);
|
||||
goto err_prog;
|
||||
}
|
||||
/*
|
||||
* Stash old filters, we'll put them once we know we'll
|
||||
* succeed. Until then, res->bpf_filters is left untouched.
|
||||
*/
|
||||
old_filters = res->bpf_filters;
|
||||
}
|
||||
|
||||
filter = kzalloc(sizeof(*filter), GFP_KERNEL_ACCOUNT);
|
||||
if (!filter) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
refcount_set(&filter->refs, 1);
|
||||
filter->prog = prog;
|
||||
|
||||
/*
|
||||
* Success - install the new filter set now. If we did COW, put
|
||||
* the old filters as we're replacing them.
|
||||
*/
|
||||
if (old_filters) {
|
||||
__io_put_bpf_filters(old_filters);
|
||||
res->bpf_filters_cow = false;
|
||||
}
|
||||
res->bpf_filters = filters;
|
||||
|
||||
/*
|
||||
* Insert filter - if the current opcode already has a filter
|
||||
* attached, add to the set.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
spin_lock_bh(&filters->lock);
|
||||
old_filter = rcu_dereference(filters->filters[reg.filter.opcode]);
|
||||
if (old_filter)
|
||||
filter->next = old_filter;
|
||||
rcu_assign_pointer(filters->filters[reg.filter.opcode], filter);
|
||||
|
||||
/*
|
||||
* If IO_URING_BPF_FILTER_DENY_REST is set, fill any unregistered
|
||||
* opcode with the dummy filter. That will cause them to be denied.
|
||||
*/
|
||||
if (reg.filter.flags & IO_URING_BPF_FILTER_DENY_REST) {
|
||||
for (int i = 0; i < IORING_OP_LAST; i++) {
|
||||
if (i == reg.filter.opcode)
|
||||
continue;
|
||||
old_filter = rcu_dereference(filters->filters[i]);
|
||||
if (old_filter)
|
||||
continue;
|
||||
rcu_assign_pointer(filters->filters[i], &dummy_filter);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_bh(&filters->lock);
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
err:
|
||||
if (filters != res->bpf_filters)
|
||||
__io_put_bpf_filters(filters);
|
||||
err_prog:
|
||||
bpf_prog_destroy(prog);
|
||||
return ret;
|
||||
}
|
||||
48
io_uring/bpf_filter.h
Normal file
48
io_uring/bpf_filter.h
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef IO_URING_BPF_FILTER_H
|
||||
#define IO_URING_BPF_FILTER_H
|
||||
|
||||
#include <uapi/linux/io_uring/bpf_filter.h>
|
||||
|
||||
#ifdef CONFIG_IO_URING_BPF
|
||||
|
||||
int __io_uring_run_bpf_filters(struct io_bpf_filter __rcu **filters, struct io_kiocb *req);
|
||||
|
||||
int io_register_bpf_filter(struct io_restriction *res,
|
||||
struct io_uring_bpf __user *arg);
|
||||
|
||||
void io_put_bpf_filters(struct io_restriction *res);
|
||||
|
||||
void io_bpf_filter_clone(struct io_restriction *dst, struct io_restriction *src);
|
||||
|
||||
static inline int io_uring_run_bpf_filters(struct io_bpf_filter __rcu **filters,
|
||||
struct io_kiocb *req)
|
||||
{
|
||||
if (filters)
|
||||
return __io_uring_run_bpf_filters(filters, req);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline int io_register_bpf_filter(struct io_restriction *res,
|
||||
struct io_uring_bpf __user *arg)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline int io_uring_run_bpf_filters(struct io_bpf_filter __rcu **filters,
|
||||
struct io_kiocb *req)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void io_put_bpf_filters(struct io_restriction *res)
|
||||
{
|
||||
}
|
||||
static inline void io_bpf_filter_clone(struct io_restriction *dst,
|
||||
struct io_restriction *src)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_IO_URING_BPF */
|
||||
|
||||
#endif
|
||||
|
|
@ -94,6 +94,7 @@
|
|||
#include "alloc_cache.h"
|
||||
#include "eventfd.h"
|
||||
#include "wait.h"
|
||||
#include "bpf_filter.h"
|
||||
|
||||
#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
|
||||
IOSQE_IO_HARDLINK | IOSQE_ASYNC)
|
||||
|
|
@ -1875,6 +1876,12 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
|||
if (unlikely(ret))
|
||||
return io_submit_fail_init(sqe, req, ret);
|
||||
|
||||
if (unlikely(ctx->bpf_filters)) {
|
||||
ret = io_uring_run_bpf_filters(ctx->bpf_filters, req);
|
||||
if (ret)
|
||||
return io_submit_fail_init(sqe, req, ret);
|
||||
}
|
||||
|
||||
trace_io_uring_submit_req(req);
|
||||
|
||||
/*
|
||||
|
|
@ -2172,6 +2179,14 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
|||
free_uid(ctx->user);
|
||||
io_req_caches_free(ctx);
|
||||
|
||||
if (ctx->restrictions.bpf_filters) {
|
||||
WARN_ON_ONCE(ctx->bpf_filters !=
|
||||
ctx->restrictions.bpf_filters->filters);
|
||||
} else {
|
||||
WARN_ON_ONCE(ctx->bpf_filters);
|
||||
}
|
||||
io_put_bpf_filters(&ctx->restrictions);
|
||||
|
||||
WARN_ON_ONCE(ctx->nr_req_allocated);
|
||||
|
||||
if (ctx->hash_map)
|
||||
|
|
@ -2885,6 +2900,32 @@ int io_prepare_config(struct io_ctx_config *config)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void io_restriction_clone(struct io_restriction *dst, struct io_restriction *src)
|
||||
{
|
||||
memcpy(&dst->register_op, &src->register_op, sizeof(dst->register_op));
|
||||
memcpy(&dst->sqe_op, &src->sqe_op, sizeof(dst->sqe_op));
|
||||
dst->sqe_flags_allowed = src->sqe_flags_allowed;
|
||||
dst->sqe_flags_required = src->sqe_flags_required;
|
||||
dst->op_registered = src->op_registered;
|
||||
dst->reg_registered = src->reg_registered;
|
||||
|
||||
io_bpf_filter_clone(dst, src);
|
||||
}
|
||||
|
||||
static void io_ctx_restriction_clone(struct io_ring_ctx *ctx,
|
||||
struct io_restriction *src)
|
||||
{
|
||||
struct io_restriction *dst = &ctx->restrictions;
|
||||
|
||||
io_restriction_clone(dst, src);
|
||||
if (dst->bpf_filters)
|
||||
WRITE_ONCE(ctx->bpf_filters, dst->bpf_filters->filters);
|
||||
if (dst->op_registered)
|
||||
ctx->op_restricted = 1;
|
||||
if (dst->reg_registered)
|
||||
ctx->reg_restricted = 1;
|
||||
}
|
||||
|
||||
static __cold int io_uring_create(struct io_ctx_config *config)
|
||||
{
|
||||
struct io_uring_params *p = &config->p;
|
||||
|
|
@ -2945,6 +2986,13 @@ static __cold int io_uring_create(struct io_ctx_config *config)
|
|||
else
|
||||
ctx->notify_method = TWA_SIGNAL;
|
||||
|
||||
/*
|
||||
* If the current task has restrictions enabled, then copy them to
|
||||
* our newly created ring and mark it as registered.
|
||||
*/
|
||||
if (current->io_uring_restrict)
|
||||
io_ctx_restriction_clone(ctx, current->io_uring_restrict);
|
||||
|
||||
/*
|
||||
* This is just grabbed for accounting purposes. When a process exits,
|
||||
* the mm is exited and dropped before the files, hence we need to hang
|
||||
|
|
|
|||
|
|
@ -199,6 +199,7 @@ void io_task_refs_refill(struct io_uring_task *tctx);
|
|||
bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
|
||||
|
||||
void io_activate_pollwq(struct io_ring_ctx *ctx);
|
||||
void io_restriction_clone(struct io_restriction *dst, struct io_restriction *src);
|
||||
|
||||
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1703,6 +1703,15 @@ retry:
|
|||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req)
|
||||
{
|
||||
struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
|
||||
|
||||
bctx->socket.family = sock->domain;
|
||||
bctx->socket.type = sock->type;
|
||||
bctx->socket.protocol = sock->protocol;
|
||||
}
|
||||
|
||||
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <linux/net.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
#include <uapi/linux/io_uring/bpf_filter.h>
|
||||
|
||||
struct io_async_msghdr {
|
||||
#if defined(CONFIG_NET)
|
||||
|
|
@ -44,6 +45,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags);
|
|||
|
||||
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_socket(struct io_kiocb *req, unsigned int issue_flags);
|
||||
void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req);
|
||||
|
||||
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_connect(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
|
@ -64,4 +66,8 @@ void io_netmsg_cache_free(const void *entry);
|
|||
static inline void io_netmsg_cache_free(const void *entry)
|
||||
{
|
||||
}
|
||||
static inline void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx,
|
||||
struct io_kiocb *req)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -82,6 +82,15 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
|
|||
return 0;
|
||||
}
|
||||
|
||||
void io_openat_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req)
|
||||
{
|
||||
struct io_open *open = io_kiocb_to_cmd(req, struct io_open);
|
||||
|
||||
bctx->open.flags = open->how.flags;
|
||||
bctx->open.mode = open->how.mode;
|
||||
bctx->open.resolve = open->how.resolve;
|
||||
}
|
||||
|
||||
int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_open *open = io_kiocb_to_cmd(req, struct io_open);
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bpf_filter.h"
|
||||
|
||||
int __io_close_fixed(struct io_ring_ctx *ctx, unsigned int issue_flags,
|
||||
unsigned int offset);
|
||||
|
||||
int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_openat(struct io_kiocb *req, unsigned int issue_flags);
|
||||
void io_open_cleanup(struct io_kiocb *req);
|
||||
void io_openat_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req);
|
||||
|
||||
int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_openat2(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#include "memmap.h"
|
||||
#include "zcrx.h"
|
||||
#include "query.h"
|
||||
#include "bpf_filter.h"
|
||||
|
||||
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
|
||||
IORING_REGISTER_LAST + IORING_OP_LAST)
|
||||
|
|
@ -189,6 +190,82 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int io_register_restrictions_task(void __user *arg, unsigned int nr_args)
|
||||
{
|
||||
struct io_uring_task_restriction __user *ures = arg;
|
||||
struct io_uring_task_restriction tres;
|
||||
struct io_restriction *res;
|
||||
int ret;
|
||||
|
||||
/* Disallow if task already has registered restrictions */
|
||||
if (current->io_uring_restrict)
|
||||
return -EPERM;
|
||||
/*
|
||||
* Similar to seccomp, disallow setting a filter if task_no_new_privs
|
||||
* is true and we're not CAP_SYS_ADMIN.
|
||||
*/
|
||||
if (!task_no_new_privs(current) &&
|
||||
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
if (nr_args != 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&tres, arg, sizeof(tres)))
|
||||
return -EFAULT;
|
||||
|
||||
if (tres.flags)
|
||||
return -EINVAL;
|
||||
if (!mem_is_zero(tres.resv, sizeof(tres.resv)))
|
||||
return -EINVAL;
|
||||
|
||||
res = kzalloc(sizeof(*res), GFP_KERNEL_ACCOUNT);
|
||||
if (!res)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = io_parse_restrictions(ures->restrictions, tres.nr_res, res);
|
||||
if (ret < 0) {
|
||||
kfree(res);
|
||||
return ret;
|
||||
}
|
||||
current->io_uring_restrict = res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_register_bpf_filter_task(void __user *arg, unsigned int nr_args)
|
||||
{
|
||||
struct io_restriction *res;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Similar to seccomp, disallow setting a filter if task_no_new_privs
|
||||
* is true and we're not CAP_SYS_ADMIN.
|
||||
*/
|
||||
if (!task_no_new_privs(current) &&
|
||||
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
if (nr_args != 1)
|
||||
return -EINVAL;
|
||||
|
||||
/* If no task restrictions exist, setup a new set */
|
||||
res = current->io_uring_restrict;
|
||||
if (!res) {
|
||||
res = kzalloc(sizeof(*res), GFP_KERNEL_ACCOUNT);
|
||||
if (!res)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = io_register_bpf_filter(res, arg);
|
||||
if (ret) {
|
||||
if (res != current->io_uring_restrict)
|
||||
kfree(res);
|
||||
return ret;
|
||||
}
|
||||
if (!current->io_uring_restrict)
|
||||
current->io_uring_restrict = res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_register_enable_rings(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (!(ctx->flags & IORING_SETUP_R_DISABLED))
|
||||
|
|
@ -832,6 +909,16 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
|||
case IORING_REGISTER_ZCRX_CTRL:
|
||||
ret = io_zcrx_ctrl(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_BPF_FILTER:
|
||||
ret = -EINVAL;
|
||||
|
||||
if (nr_args != 1)
|
||||
break;
|
||||
ret = io_register_bpf_filter(&ctx->restrictions, arg);
|
||||
if (!ret)
|
||||
WRITE_ONCE(ctx->bpf_filters,
|
||||
ctx->restrictions.bpf_filters->filters);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
|
|
@ -903,6 +990,10 @@ static int io_uring_register_blind(unsigned int opcode, void __user *arg,
|
|||
return io_uring_register_send_msg_ring(arg, nr_args);
|
||||
case IORING_REGISTER_QUERY:
|
||||
return io_query(arg, nr_args);
|
||||
case IORING_REGISTER_RESTRICTIONS:
|
||||
return io_register_restrictions_task(arg, nr_args);
|
||||
case IORING_REGISTER_BPF_FILTER:
|
||||
return io_register_bpf_filter_task(arg, nr_args);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include "io_uring.h"
|
||||
#include "tctx.h"
|
||||
#include "bpf_filter.h"
|
||||
|
||||
static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
|
||||
struct task_struct *task)
|
||||
|
|
@ -54,16 +55,23 @@ void __io_uring_free(struct task_struct *tsk)
|
|||
* node is stored in the xarray. Until that gets sorted out, attempt
|
||||
* an iteration here and warn if any entries are found.
|
||||
*/
|
||||
xa_for_each(&tctx->xa, index, node) {
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
WARN_ON_ONCE(tctx->io_wq);
|
||||
WARN_ON_ONCE(tctx->cached_refs);
|
||||
if (tctx) {
|
||||
xa_for_each(&tctx->xa, index, node) {
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
WARN_ON_ONCE(tctx->io_wq);
|
||||
WARN_ON_ONCE(tctx->cached_refs);
|
||||
|
||||
percpu_counter_destroy(&tctx->inflight);
|
||||
kfree(tctx);
|
||||
tsk->io_uring = NULL;
|
||||
percpu_counter_destroy(&tctx->inflight);
|
||||
kfree(tctx);
|
||||
tsk->io_uring = NULL;
|
||||
}
|
||||
if (tsk->io_uring_restrict) {
|
||||
io_put_bpf_filters(tsk->io_uring_restrict);
|
||||
kfree(tsk->io_uring_restrict);
|
||||
tsk->io_uring_restrict = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
__cold int io_uring_alloc_task_context(struct task_struct *task,
|
||||
|
|
@ -362,3 +370,19 @@ int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg,
|
|||
|
||||
return i ? i : ret;
|
||||
}
|
||||
|
||||
int __io_uring_fork(struct task_struct *tsk)
|
||||
{
|
||||
struct io_restriction *res, *src = tsk->io_uring_restrict;
|
||||
|
||||
/* Don't leave it dangling on error */
|
||||
tsk->io_uring_restrict = NULL;
|
||||
|
||||
res = kzalloc(sizeof(*res), GFP_KERNEL_ACCOUNT);
|
||||
if (!res)
|
||||
return -ENOMEM;
|
||||
|
||||
tsk->io_uring_restrict = res;
|
||||
io_restriction_clone(res, src);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@
|
|||
#include <linux/kasan.h>
|
||||
#include <linux/scs.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/stackprotector.h>
|
||||
#include <linux/user_events.h>
|
||||
|
|
@ -2126,6 +2127,10 @@ __latent_entropy struct task_struct *copy_process(
|
|||
|
||||
#ifdef CONFIG_IO_URING
|
||||
p->io_uring = NULL;
|
||||
retval = io_uring_fork(p);
|
||||
if (unlikely(retval))
|
||||
goto bad_fork_cleanup_delayacct;
|
||||
retval = -EAGAIN;
|
||||
#endif
|
||||
|
||||
p->default_timer_slack_ns = current->timer_slack_ns;
|
||||
|
|
@ -2522,6 +2527,7 @@ bad_fork_cleanup_policy:
|
|||
mpol_put(p->mempolicy);
|
||||
#endif
|
||||
bad_fork_cleanup_delayacct:
|
||||
io_uring_free(p);
|
||||
delayacct_tsk_free(p);
|
||||
bad_fork_cleanup_count:
|
||||
dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue