linux/io_uring/fs.c
Al Viro 9fa3ec8458 allow incomplete imports of filenames
There are two filename-related problems in io_uring and its
interplay with audit.

Filenames are imported when request is submitted and used when
it is processed.  Unfortunately, the latter may very well
happen in a different thread.  In that case the reference to
filename is put into the wrong audit_context - that of submitting
thread, not the processing one.  Audit logics is called by
the latter, and it really wants to be able to find the names
in audit_context current (== processing) thread.

Another related problem is the headache with refcounts -
normally all references to given struct filename are visible
only to one thread (the one that uses that struct filename).
io_uring violates that - an extra reference is stashed in
audit_context of submitter.  It gets dropped when submitter
returns to userland, which can happen simultaneously with
processing thread deciding to drop the reference it got.

We paper over that by making refcount atomic, but that means
pointless headache for everyone.

Solution: the notion of partially imported filenames.  Namely,
already copied from userland, but *not* exposed to audit yet.

io_uring can create that in submitter thread, and complete the
import (obtaining the usual reference to struct filename) in
processing thread.

Object: struct delayed_filename.

Primitives for working with it:

delayed_getname(&delayed_filename, user_string) - copies the name from
userland, returning 0 and stashing the address of (still incomplete)
struct filename in delayed_filename on success and returning -E... on
error.

delayed_getname_uflags(&delayed_filename, user_string, atflags) -
similar, in the same relation to delayed_getname() as getname_uflags()
is to getname()

complete_getname(&delayed_filename) - completes the import of filename
stashed in delayed_filename and returns struct filename to caller,
emptying delayed_filename.

CLASS(filename_complete_delayed, name)(&delayed_filename) - variant of
CLASS(filename) with complete_getname() for constructor.

dismiss_delayed_filename(&delayed_filename) - destructor; drops whatever
might be stashed in delayed_filename, emptying it.

putname_to_delayed(&delayed_filename, name) - if name is shared, stashes
its copy into delayed_filename and drops the reference to name, otherwise
stashes the name itself in there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2026-01-13 15:18:07 -05:00

300 lines
7.1 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
#include "../fs/internal.h"
#include "io_uring.h"
#include "fs.h"
struct io_rename {
struct file *file;
int old_dfd;
int new_dfd;
struct delayed_filename oldpath;
struct delayed_filename newpath;
int flags;
};
struct io_unlink {
struct file *file;
int dfd;
int flags;
struct delayed_filename filename;
};
struct io_mkdir {
struct file *file;
int dfd;
umode_t mode;
struct delayed_filename filename;
};
struct io_link {
struct file *file;
int old_dfd;
int new_dfd;
struct delayed_filename oldpath;
struct delayed_filename newpath;
int flags;
};
int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename);
const char __user *oldf, *newf;
int err;
if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
ren->old_dfd = READ_ONCE(sqe->fd);
oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
ren->new_dfd = READ_ONCE(sqe->len);
ren->flags = READ_ONCE(sqe->rename_flags);
err = delayed_getname(&ren->oldpath, oldf);
if (unlikely(err))
return err;
err = delayed_getname(&ren->newpath, newf);
if (unlikely(err)) {
dismiss_delayed_filename(&ren->oldpath);
return err;
}
req->flags |= REQ_F_NEED_CLEANUP;
req->flags |= REQ_F_FORCE_ASYNC;
return 0;
}
int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename);
int ret;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
ret = do_renameat2(ren->old_dfd, complete_getname(&ren->oldpath),
ren->new_dfd, complete_getname(&ren->newpath),
ren->flags);
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
return IOU_COMPLETE;
}
void io_renameat_cleanup(struct io_kiocb *req)
{
struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename);
dismiss_delayed_filename(&ren->oldpath);
dismiss_delayed_filename(&ren->newpath);
}
int io_unlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink);
const char __user *fname;
int err;
if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
un->dfd = READ_ONCE(sqe->fd);
un->flags = READ_ONCE(sqe->unlink_flags);
if (un->flags & ~AT_REMOVEDIR)
return -EINVAL;
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
err = delayed_getname(&un->filename, fname);
if (unlikely(err))
return err;
req->flags |= REQ_F_NEED_CLEANUP;
req->flags |= REQ_F_FORCE_ASYNC;
return 0;
}
int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink);
int ret;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
if (un->flags & AT_REMOVEDIR)
ret = do_rmdir(un->dfd, complete_getname(&un->filename));
else
ret = do_unlinkat(un->dfd, complete_getname(&un->filename));
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
return IOU_COMPLETE;
}
void io_unlinkat_cleanup(struct io_kiocb *req)
{
struct io_unlink *ul = io_kiocb_to_cmd(req, struct io_unlink);
dismiss_delayed_filename(&ul->filename);
}
int io_mkdirat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir);
const char __user *fname;
int err;
if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
mkd->dfd = READ_ONCE(sqe->fd);
mkd->mode = READ_ONCE(sqe->len);
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
err = delayed_getname(&mkd->filename, fname);
if (unlikely(err))
return err;
req->flags |= REQ_F_NEED_CLEANUP;
req->flags |= REQ_F_FORCE_ASYNC;
return 0;
}
int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir);
int ret;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
ret = do_mkdirat(mkd->dfd, complete_getname(&mkd->filename), mkd->mode);
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
return IOU_COMPLETE;
}
void io_mkdirat_cleanup(struct io_kiocb *req)
{
struct io_mkdir *md = io_kiocb_to_cmd(req, struct io_mkdir);
dismiss_delayed_filename(&md->filename);
}
int io_symlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_link *sl = io_kiocb_to_cmd(req, struct io_link);
const char __user *oldpath, *newpath;
int err;
if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
sl->new_dfd = READ_ONCE(sqe->fd);
oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr));
newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2));
err = delayed_getname(&sl->oldpath, oldpath);
if (unlikely(err))
return err;
err = delayed_getname(&sl->newpath, newpath);
if (unlikely(err)) {
dismiss_delayed_filename(&sl->oldpath);
return err;
}
req->flags |= REQ_F_NEED_CLEANUP;
req->flags |= REQ_F_FORCE_ASYNC;
return 0;
}
int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_link *sl = io_kiocb_to_cmd(req, struct io_link);
int ret;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
ret = do_symlinkat(complete_getname(&sl->oldpath), sl->new_dfd,
complete_getname(&sl->newpath));
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
return IOU_COMPLETE;
}
int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link);
const char __user *oldf, *newf;
int err;
if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
lnk->old_dfd = READ_ONCE(sqe->fd);
lnk->new_dfd = READ_ONCE(sqe->len);
oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
lnk->flags = READ_ONCE(sqe->hardlink_flags);
err = delayed_getname_uflags(&lnk->oldpath, oldf, lnk->flags);
if (unlikely(err))
return err;
err = delayed_getname(&lnk->newpath, newf);
if (unlikely(err)) {
dismiss_delayed_filename(&lnk->oldpath);
return err;
}
req->flags |= REQ_F_NEED_CLEANUP;
req->flags |= REQ_F_FORCE_ASYNC;
return 0;
}
int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link);
int ret;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
ret = do_linkat(lnk->old_dfd, complete_getname(&lnk->oldpath),
lnk->new_dfd, complete_getname(&lnk->newpath), lnk->flags);
req->flags &= ~REQ_F_NEED_CLEANUP;
io_req_set_res(req, ret, 0);
return IOU_COMPLETE;
}
void io_link_cleanup(struct io_kiocb *req)
{
struct io_link *sl = io_kiocb_to_cmd(req, struct io_link);
dismiss_delayed_filename(&sl->oldpath);
dismiss_delayed_filename(&sl->newpath);
}