mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
vfs-7.0-rc3.fixes
Please consider pulling these changes from the signed vfs-7.0-rc3.fixes tag.
Thanks!
Christian
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaaikgAAKCRCRxhvAZXjc
orflAP9Dfs/DCoHLi9xknIqHgMqxJKHpwVzcGAOX8eI0ZOLVjQEA2nnhtbBvVh3q
CAbQzwVHaujKVL2lGV/qwoaRFEvf1gI=
=aZoy
-----END PGP SIGNATURE-----
Merge tag 'vfs-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner:
- kthread: consolidate kthread exit paths to prevent use-after-free
- iomap:
- don't mark folio uptodate if read IO has bytes pending
- don't report direct-io retries to fserror
- reject delalloc mappings during writeback
- ns: tighten visibility checks
- netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict
sequence
* tag 'vfs-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
iomap: reject delalloc mappings during writeback
iomap: don't mark folio uptodate if read IO has bytes pending
selftests: fix mntns iteration selftests
nstree: tighten permission checks for listing
nsfs: tighten permission checks for handle opening
nsfs: tighten permission checks for ns iteration ioctls
netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence
kthread: consolidate kthread exit paths to prevent use-after-free
iomap: don't report direct-io retries to fserror
This commit is contained in:
commit
0b3bb20580
16 changed files with 326 additions and 160 deletions
|
|
@ -80,18 +80,27 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
|
|||
{
|
||||
struct iomap_folio_state *ifs = folio->private;
|
||||
unsigned long flags;
|
||||
bool uptodate = true;
|
||||
bool mark_uptodate = true;
|
||||
|
||||
if (folio_test_uptodate(folio))
|
||||
return;
|
||||
|
||||
if (ifs) {
|
||||
spin_lock_irqsave(&ifs->state_lock, flags);
|
||||
uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
|
||||
/*
|
||||
* If a read with bytes pending is in progress, we must not call
|
||||
* folio_mark_uptodate(). The read completion path
|
||||
* (iomap_read_end()) will call folio_end_read(), which uses XOR
|
||||
* semantics to set the uptodate bit. If we set it here, the XOR
|
||||
* in folio_end_read() will clear it, leaving the folio not
|
||||
* uptodate.
|
||||
*/
|
||||
mark_uptodate = ifs_set_range_uptodate(folio, ifs, off, len) &&
|
||||
!ifs->read_bytes_pending;
|
||||
spin_unlock_irqrestore(&ifs->state_lock, flags);
|
||||
}
|
||||
|
||||
if (uptodate)
|
||||
if (mark_uptodate)
|
||||
folio_mark_uptodate(folio);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -87,6 +87,19 @@ static inline enum fserror_type iomap_dio_err_type(const struct iomap_dio *dio)
|
|||
return FSERR_DIRECTIO_READ;
|
||||
}
|
||||
|
||||
static inline bool should_report_dio_fserror(const struct iomap_dio *dio)
|
||||
{
|
||||
switch (dio->error) {
|
||||
case 0:
|
||||
case -EAGAIN:
|
||||
case -ENOTBLK:
|
||||
/* don't send fsnotify for success or magic retry codes */
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||
{
|
||||
const struct iomap_dio_ops *dops = dio->dops;
|
||||
|
|
@ -96,7 +109,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
|||
|
||||
if (dops && dops->end_io)
|
||||
ret = dops->end_io(iocb, dio->size, ret, dio->flags);
|
||||
if (dio->error)
|
||||
if (should_report_dio_fserror(dio))
|
||||
fserror_report_io(file_inode(iocb->ki_filp),
|
||||
iomap_dio_err_type(dio), offset, dio->size,
|
||||
dio->error, GFP_NOFS);
|
||||
|
|
|
|||
|
|
@ -215,17 +215,18 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
|
|||
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
|
||||
|
||||
switch (wpc->iomap.type) {
|
||||
case IOMAP_INLINE:
|
||||
WARN_ON_ONCE(1);
|
||||
return -EIO;
|
||||
case IOMAP_UNWRITTEN:
|
||||
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
|
||||
break;
|
||||
case IOMAP_MAPPED:
|
||||
break;
|
||||
case IOMAP_HOLE:
|
||||
return map_len;
|
||||
default:
|
||||
break;
|
||||
WARN_ON_ONCE(1);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (wpc->iomap.type == IOMAP_UNWRITTEN)
|
||||
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
|
||||
if (wpc->iomap.flags & IOMAP_F_SHARED)
|
||||
ioend_flags |= IOMAP_IOEND_SHARED;
|
||||
if (folio_test_dropbehind(folio))
|
||||
|
|
|
|||
|
|
@ -9,6 +9,202 @@
|
|||
#include <linux/uio.h>
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* Perform the cleanup rituals after an unbuffered write is complete.
|
||||
*/
|
||||
static void netfs_unbuffered_write_done(struct netfs_io_request *wreq)
|
||||
{
|
||||
struct netfs_inode *ictx = netfs_inode(wreq->inode);
|
||||
|
||||
_enter("R=%x", wreq->debug_id);
|
||||
|
||||
/* Okay, declare that all I/O is complete. */
|
||||
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
|
||||
|
||||
if (!wreq->error)
|
||||
netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
|
||||
|
||||
if (wreq->origin == NETFS_DIO_WRITE &&
|
||||
wreq->mapping->nrpages) {
|
||||
/* mmap may have got underfoot and we may now have folios
|
||||
* locally covering the region we just wrote. Attempt to
|
||||
* discard the folios, but leave in place any modified locally.
|
||||
* ->write_iter() is prevented from interfering by the DIO
|
||||
* counter.
|
||||
*/
|
||||
pgoff_t first = wreq->start >> PAGE_SHIFT;
|
||||
pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
|
||||
|
||||
invalidate_inode_pages2_range(wreq->mapping, first, last);
|
||||
}
|
||||
|
||||
if (wreq->origin == NETFS_DIO_WRITE)
|
||||
inode_dio_end(wreq->inode);
|
||||
|
||||
_debug("finished");
|
||||
netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
|
||||
/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
|
||||
|
||||
if (wreq->iocb) {
|
||||
size_t written = umin(wreq->transferred, wreq->len);
|
||||
|
||||
wreq->iocb->ki_pos += written;
|
||||
if (wreq->iocb->ki_complete) {
|
||||
trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
|
||||
wreq->iocb->ki_complete(wreq->iocb, wreq->error ?: written);
|
||||
}
|
||||
wreq->iocb = VFS_PTR_POISON;
|
||||
}
|
||||
|
||||
netfs_clear_subrequests(wreq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Collect the subrequest results of unbuffered write subrequests.
|
||||
*/
|
||||
static void netfs_unbuffered_write_collect(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
trace_netfs_collect_sreq(wreq, subreq);
|
||||
|
||||
spin_lock(&wreq->lock);
|
||||
list_del_init(&subreq->rreq_link);
|
||||
spin_unlock(&wreq->lock);
|
||||
|
||||
wreq->transferred += subreq->transferred;
|
||||
iov_iter_advance(&wreq->buffer.iter, subreq->transferred);
|
||||
|
||||
stream->collected_to = subreq->start + subreq->transferred;
|
||||
wreq->collected_to = stream->collected_to;
|
||||
netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
|
||||
|
||||
trace_netfs_collect_stream(wreq, stream);
|
||||
trace_netfs_collect_state(wreq, wreq->collected_to, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write data to the server without going through the pagecache and without
|
||||
* writing it to the local cache. We dispatch the subrequests serially and
|
||||
* wait for each to complete before dispatching the next, lest we leave a gap
|
||||
* in the data written due to a failure such as ENOSPC. We could, however
|
||||
* attempt to do preparation such as content encryption for the next subreq
|
||||
* whilst the current is in progress.
|
||||
*/
|
||||
static int netfs_unbuffered_write(struct netfs_io_request *wreq)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = NULL;
|
||||
struct netfs_io_stream *stream = &wreq->io_streams[0];
|
||||
int ret;
|
||||
|
||||
_enter("%llx", wreq->len);
|
||||
|
||||
if (wreq->origin == NETFS_DIO_WRITE)
|
||||
inode_dio_begin(wreq->inode);
|
||||
|
||||
stream->collected_to = wreq->start;
|
||||
|
||||
for (;;) {
|
||||
bool retry = false;
|
||||
|
||||
if (!subreq) {
|
||||
netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred);
|
||||
subreq = stream->construct;
|
||||
stream->construct = NULL;
|
||||
stream->front = NULL;
|
||||
}
|
||||
|
||||
/* Check if (re-)preparation failed. */
|
||||
if (unlikely(test_bit(NETFS_SREQ_FAILED, &subreq->flags))) {
|
||||
netfs_write_subrequest_terminated(subreq, subreq->error);
|
||||
wreq->error = subreq->error;
|
||||
break;
|
||||
}
|
||||
|
||||
iov_iter_truncate(&subreq->io_iter, wreq->len - wreq->transferred);
|
||||
if (!iov_iter_count(&subreq->io_iter))
|
||||
break;
|
||||
|
||||
subreq->len = netfs_limit_iter(&subreq->io_iter, 0,
|
||||
stream->sreq_max_len,
|
||||
stream->sreq_max_segs);
|
||||
iov_iter_truncate(&subreq->io_iter, subreq->len);
|
||||
stream->submit_extendable_to = subreq->len;
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
|
||||
stream->issue_write(subreq);
|
||||
|
||||
/* Async, need to wait. */
|
||||
netfs_wait_for_in_progress_stream(wreq, stream);
|
||||
|
||||
if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
|
||||
retry = true;
|
||||
} else if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
|
||||
ret = subreq->error;
|
||||
wreq->error = ret;
|
||||
netfs_see_subrequest(subreq, netfs_sreq_trace_see_failed);
|
||||
subreq = NULL;
|
||||
break;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
if (!retry) {
|
||||
netfs_unbuffered_write_collect(wreq, stream, subreq);
|
||||
subreq = NULL;
|
||||
if (wreq->transferred >= wreq->len)
|
||||
break;
|
||||
if (!wreq->iocb && signal_pending(current)) {
|
||||
ret = wreq->transferred ? -EINTR : -ERESTARTSYS;
|
||||
trace_netfs_rreq(wreq, netfs_rreq_trace_intr);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We need to retry the last subrequest, so first reset the
|
||||
* iterator, taking into account what, if anything, we managed
|
||||
* to transfer.
|
||||
*/
|
||||
subreq->error = -EAGAIN;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
||||
if (subreq->transferred > 0)
|
||||
iov_iter_advance(&wreq->buffer.iter, subreq->transferred);
|
||||
|
||||
if (stream->source == NETFS_UPLOAD_TO_SERVER &&
|
||||
wreq->netfs_ops->retry_request)
|
||||
wreq->netfs_ops->retry_request(wreq, stream);
|
||||
|
||||
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
__clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
|
||||
__clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
subreq->io_iter = wreq->buffer.iter;
|
||||
subreq->start = wreq->start + wreq->transferred;
|
||||
subreq->len = wreq->len - wreq->transferred;
|
||||
subreq->transferred = 0;
|
||||
subreq->retry_count += 1;
|
||||
stream->sreq_max_len = UINT_MAX;
|
||||
stream->sreq_max_segs = INT_MAX;
|
||||
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
stream->prepare_write(subreq);
|
||||
|
||||
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
|
||||
netfs_stat(&netfs_n_wh_retry_write_subreq);
|
||||
}
|
||||
|
||||
netfs_unbuffered_write_done(wreq);
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void netfs_unbuffered_write_async(struct work_struct *work)
|
||||
{
|
||||
struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
|
||||
|
||||
netfs_unbuffered_write(wreq);
|
||||
netfs_put_request(wreq, netfs_rreq_trace_put_complete);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform an unbuffered write where we may have to do an RMW operation on an
|
||||
* encrypted file. This can also be used for direct I/O writes.
|
||||
|
|
@ -70,35 +266,35 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
|
|||
*/
|
||||
wreq->buffer.iter = *iter;
|
||||
}
|
||||
|
||||
wreq->len = iov_iter_count(&wreq->buffer.iter);
|
||||
}
|
||||
|
||||
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
|
||||
if (async)
|
||||
__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
|
||||
|
||||
/* Copy the data into the bounce buffer and encrypt it. */
|
||||
// TODO
|
||||
|
||||
/* Dispatch the write. */
|
||||
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
|
||||
if (async)
|
||||
|
||||
if (async) {
|
||||
INIT_WORK(&wreq->work, netfs_unbuffered_write_async);
|
||||
wreq->iocb = iocb;
|
||||
wreq->len = iov_iter_count(&wreq->buffer.iter);
|
||||
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
|
||||
if (ret < 0) {
|
||||
_debug("begin = %zd", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!async) {
|
||||
ret = netfs_wait_for_write(wreq);
|
||||
if (ret > 0)
|
||||
iocb->ki_pos += ret;
|
||||
} else {
|
||||
queue_work(system_dfl_wq, &wreq->work);
|
||||
ret = -EIOCBQUEUED;
|
||||
} else {
|
||||
ret = netfs_unbuffered_write(wreq);
|
||||
if (ret < 0) {
|
||||
_debug("begin = %zd", ret);
|
||||
} else {
|
||||
iocb->ki_pos += wreq->transferred;
|
||||
ret = wreq->transferred ?: wreq->error;
|
||||
}
|
||||
|
||||
netfs_put_request(wreq, netfs_rreq_trace_put_complete);
|
||||
}
|
||||
|
||||
out:
|
||||
netfs_put_request(wreq, netfs_rreq_trace_put_return);
|
||||
return ret;
|
||||
|
||||
|
|
|
|||
|
|
@ -198,6 +198,9 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
|
|||
struct file *file,
|
||||
loff_t start,
|
||||
enum netfs_io_origin origin);
|
||||
void netfs_prepare_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start);
|
||||
void netfs_reissue_write(struct netfs_io_stream *stream,
|
||||
struct netfs_io_subrequest *subreq,
|
||||
struct iov_iter *source);
|
||||
|
|
@ -212,7 +215,6 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
|
|||
struct folio **writethrough_cache);
|
||||
ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
|
||||
struct folio *writethrough_cache);
|
||||
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
|
||||
|
||||
/*
|
||||
* write_retry.c
|
||||
|
|
|
|||
|
|
@ -399,27 +399,6 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
|
|||
ictx->ops->invalidate_cache(wreq);
|
||||
}
|
||||
|
||||
if ((wreq->origin == NETFS_UNBUFFERED_WRITE ||
|
||||
wreq->origin == NETFS_DIO_WRITE) &&
|
||||
!wreq->error)
|
||||
netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
|
||||
|
||||
if (wreq->origin == NETFS_DIO_WRITE &&
|
||||
wreq->mapping->nrpages) {
|
||||
/* mmap may have got underfoot and we may now have folios
|
||||
* locally covering the region we just wrote. Attempt to
|
||||
* discard the folios, but leave in place any modified locally.
|
||||
* ->write_iter() is prevented from interfering by the DIO
|
||||
* counter.
|
||||
*/
|
||||
pgoff_t first = wreq->start >> PAGE_SHIFT;
|
||||
pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
|
||||
invalidate_inode_pages2_range(wreq->mapping, first, last);
|
||||
}
|
||||
|
||||
if (wreq->origin == NETFS_DIO_WRITE)
|
||||
inode_dio_end(wreq->inode);
|
||||
|
||||
_debug("finished");
|
||||
netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
|
||||
/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
|
||||
|
|
|
|||
|
|
@ -154,9 +154,9 @@ EXPORT_SYMBOL(netfs_prepare_write_failed);
|
|||
* Prepare a write subrequest. We need to allocate a new subrequest
|
||||
* if we don't have one.
|
||||
*/
|
||||
static void netfs_prepare_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start)
|
||||
void netfs_prepare_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct iov_iter *wreq_iter = &wreq->buffer.iter;
|
||||
|
|
@ -698,41 +698,6 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write data to the server without going through the pagecache and without
|
||||
* writing it to the local cache.
|
||||
*/
|
||||
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len)
|
||||
{
|
||||
struct netfs_io_stream *upload = &wreq->io_streams[0];
|
||||
ssize_t part;
|
||||
loff_t start = wreq->start;
|
||||
int error = 0;
|
||||
|
||||
_enter("%zx", len);
|
||||
|
||||
if (wreq->origin == NETFS_DIO_WRITE)
|
||||
inode_dio_begin(wreq->inode);
|
||||
|
||||
while (len) {
|
||||
// TODO: Prepare content encryption
|
||||
|
||||
_debug("unbuffered %zx", len);
|
||||
part = netfs_advance_write(wreq, upload, start, len, false);
|
||||
start += part;
|
||||
len -= part;
|
||||
rolling_buffer_advance(&wreq->buffer, part);
|
||||
if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags))
|
||||
netfs_wait_for_paused_write(wreq);
|
||||
if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
|
||||
break;
|
||||
}
|
||||
|
||||
netfs_end_issue_write(wreq);
|
||||
_leave(" = %d", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write some of a pending folio data back to the server and/or the cache.
|
||||
*/
|
||||
|
|
|
|||
15
fs/nsfs.c
15
fs/nsfs.c
|
|
@ -199,6 +199,17 @@ static bool nsfs_ioctl_valid(unsigned int cmd)
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool may_use_nsfs_ioctl(unsigned int cmd)
|
||||
{
|
||||
switch (_IOC_NR(cmd)) {
|
||||
case _IOC_NR(NS_MNT_GET_NEXT):
|
||||
fallthrough;
|
||||
case _IOC_NR(NS_MNT_GET_PREV):
|
||||
return may_see_all_namespaces();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static long ns_ioctl(struct file *filp, unsigned int ioctl,
|
||||
unsigned long arg)
|
||||
{
|
||||
|
|
@ -214,6 +225,8 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
|
|||
|
||||
if (!nsfs_ioctl_valid(ioctl))
|
||||
return -ENOIOCTLCMD;
|
||||
if (!may_use_nsfs_ioctl(ioctl))
|
||||
return -EPERM;
|
||||
|
||||
ns = get_proc_ns(file_inode(filp));
|
||||
switch (ioctl) {
|
||||
|
|
@ -614,7 +627,7 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
|||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
|
||||
if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) {
|
||||
if (owning_ns && !may_see_all_namespaces()) {
|
||||
ns->ops->put(ns);
|
||||
return ERR_PTR(-EPERM);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,24 @@
|
|||
|
||||
struct mm_struct;
|
||||
|
||||
/* opaque kthread data */
|
||||
struct kthread;
|
||||
|
||||
/*
|
||||
* When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
|
||||
* always remain a kthread. For kthreads p->worker_private always
|
||||
* points to a struct kthread. For tasks that are not kthreads
|
||||
* p->worker_private is used to point to other things.
|
||||
*
|
||||
* Return NULL for any task that is not a kthread.
|
||||
*/
|
||||
static inline struct kthread *tsk_is_kthread(struct task_struct *p)
|
||||
{
|
||||
if (p->flags & PF_KTHREAD)
|
||||
return p->worker_private;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
__printf(4, 5)
|
||||
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
|
||||
void *data,
|
||||
|
|
@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k);
|
|||
int kthread_park(struct task_struct *k);
|
||||
void kthread_unpark(struct task_struct *k);
|
||||
void kthread_parkme(void);
|
||||
void kthread_exit(long result) __noreturn;
|
||||
#define kthread_exit(result) do_exit(result)
|
||||
void kthread_complete_and_exit(struct completion *, long) __noreturn;
|
||||
int kthreads_update_housekeeping(void);
|
||||
void kthread_do_exit(struct kthread *, long);
|
||||
|
||||
int kthreadd(void *unused);
|
||||
extern struct task_struct *kthreadd_task;
|
||||
|
|
|
|||
|
|
@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns)
|
|||
|
||||
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
|
||||
|
||||
bool may_see_all_namespaces(void);
|
||||
|
||||
static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns)
|
||||
{
|
||||
return atomic_read(&ns->__ns_ref_active);
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@
|
|||
EM(netfs_rreq_trace_done, "DONE ") \
|
||||
EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \
|
||||
EM(netfs_rreq_trace_free, "FREE ") \
|
||||
EM(netfs_rreq_trace_intr, "INTR ") \
|
||||
EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \
|
||||
EM(netfs_rreq_trace_recollect, "RECLLCT") \
|
||||
EM(netfs_rreq_trace_redirty, "REDIRTY") \
|
||||
|
|
@ -169,7 +170,8 @@
|
|||
EM(netfs_sreq_trace_put_oom, "PUT OOM ") \
|
||||
EM(netfs_sreq_trace_put_wip, "PUT WIP ") \
|
||||
EM(netfs_sreq_trace_put_work, "PUT WORK ") \
|
||||
E_(netfs_sreq_trace_put_terminated, "PUT TERM ")
|
||||
EM(netfs_sreq_trace_put_terminated, "PUT TERM ") \
|
||||
E_(netfs_sreq_trace_see_failed, "SEE FAILED ")
|
||||
|
||||
#define netfs_folio_traces \
|
||||
EM(netfs_folio_is_uptodate, "mod-uptodate") \
|
||||
|
|
|
|||
|
|
@ -896,11 +896,16 @@ static void synchronize_group_exit(struct task_struct *tsk, long code)
|
|||
void __noreturn do_exit(long code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct kthread *kthread;
|
||||
int group_dead;
|
||||
|
||||
WARN_ON(irqs_disabled());
|
||||
WARN_ON(tsk->plug);
|
||||
|
||||
kthread = tsk_is_kthread(tsk);
|
||||
if (unlikely(kthread))
|
||||
kthread_do_exit(kthread, code);
|
||||
|
||||
kcov_task_exit(tsk);
|
||||
kmsan_task_exit(tsk);
|
||||
|
||||
|
|
@ -1013,6 +1018,7 @@ void __noreturn do_exit(long code)
|
|||
lockdep_free_task(tsk);
|
||||
do_task_dead();
|
||||
}
|
||||
EXPORT_SYMBOL(do_exit);
|
||||
|
||||
void __noreturn make_task_dead(int signr)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -85,24 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k)
|
|||
return k->worker_private;
|
||||
}
|
||||
|
||||
/*
|
||||
* Variant of to_kthread() that doesn't assume @p is a kthread.
|
||||
*
|
||||
* When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
|
||||
* always remain a kthread. For kthreads p->worker_private always
|
||||
* points to a struct kthread. For tasks that are not kthreads
|
||||
* p->worker_private is used to point to other things.
|
||||
*
|
||||
* Return NULL for any task that is not a kthread.
|
||||
*/
|
||||
static inline struct kthread *__to_kthread(struct task_struct *p)
|
||||
{
|
||||
void *kthread = p->worker_private;
|
||||
if (kthread && !(p->flags & PF_KTHREAD))
|
||||
kthread = NULL;
|
||||
return kthread;
|
||||
}
|
||||
|
||||
void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
|
||||
{
|
||||
struct kthread *kthread = to_kthread(tsk);
|
||||
|
|
@ -193,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park);
|
|||
|
||||
bool kthread_should_stop_or_park(void)
|
||||
{
|
||||
struct kthread *kthread = __to_kthread(current);
|
||||
struct kthread *kthread = tsk_is_kthread(current);
|
||||
|
||||
if (!kthread)
|
||||
return false;
|
||||
|
|
@ -234,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
|
|||
*/
|
||||
void *kthread_func(struct task_struct *task)
|
||||
{
|
||||
struct kthread *kthread = __to_kthread(task);
|
||||
struct kthread *kthread = tsk_is_kthread(task);
|
||||
if (kthread)
|
||||
return kthread->threadfn;
|
||||
return NULL;
|
||||
|
|
@ -266,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data);
|
|||
*/
|
||||
void *kthread_probe_data(struct task_struct *task)
|
||||
{
|
||||
struct kthread *kthread = __to_kthread(task);
|
||||
struct kthread *kthread = tsk_is_kthread(task);
|
||||
void *data = NULL;
|
||||
|
||||
if (kthread)
|
||||
|
|
@ -309,19 +291,8 @@ void kthread_parkme(void)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_parkme);
|
||||
|
||||
/**
|
||||
* kthread_exit - Cause the current kthread return @result to kthread_stop().
|
||||
* @result: The integer value to return to kthread_stop().
|
||||
*
|
||||
* While kthread_exit can be called directly, it exists so that
|
||||
* functions which do some additional work in non-modular code such as
|
||||
* module_put_and_kthread_exit can be implemented.
|
||||
*
|
||||
* Does not return.
|
||||
*/
|
||||
void __noreturn kthread_exit(long result)
|
||||
void kthread_do_exit(struct kthread *kthread, long result)
|
||||
{
|
||||
struct kthread *kthread = to_kthread(current);
|
||||
kthread->result = result;
|
||||
if (!list_empty(&kthread->affinity_node)) {
|
||||
mutex_lock(&kthread_affinity_lock);
|
||||
|
|
@ -333,9 +304,7 @@ void __noreturn kthread_exit(long result)
|
|||
kthread->preferred_affinity = NULL;
|
||||
}
|
||||
}
|
||||
do_exit(0);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_exit);
|
||||
|
||||
/**
|
||||
* kthread_complete_and_exit - Exit the current kthread.
|
||||
|
|
@ -683,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu)
|
|||
|
||||
bool kthread_is_per_cpu(struct task_struct *p)
|
||||
{
|
||||
struct kthread *kthread = __to_kthread(p);
|
||||
struct kthread *kthread = tsk_is_kthread(p);
|
||||
if (!kthread)
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -309,3 +309,9 @@ void __ns_ref_active_get(struct ns_common *ns)
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bool may_see_all_namespaces(void)
|
||||
{
|
||||
return (task_active_pid_ns(current) == &init_pid_ns) &&
|
||||
ns_capable_noaudit(init_pid_ns.user_ns, CAP_SYS_ADMIN);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -515,32 +515,11 @@ static inline bool __must_check ns_requested(const struct klistns *kls,
|
|||
static inline bool __must_check may_list_ns(const struct klistns *kls,
|
||||
struct ns_common *ns)
|
||||
{
|
||||
if (kls->user_ns) {
|
||||
if (kls->userns_capable)
|
||||
return true;
|
||||
} else {
|
||||
struct ns_common *owner;
|
||||
struct user_namespace *user_ns;
|
||||
|
||||
owner = ns_owner(ns);
|
||||
if (owner)
|
||||
user_ns = to_user_ns(owner);
|
||||
else
|
||||
user_ns = &init_user_ns;
|
||||
if (ns_capable_noaudit(user_ns, CAP_SYS_ADMIN))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (kls->user_ns && kls->userns_capable)
|
||||
return true;
|
||||
if (is_current_namespace(ns))
|
||||
return true;
|
||||
|
||||
if (ns->ns_type != CLONE_NEWUSER)
|
||||
return false;
|
||||
|
||||
if (ns_capable_noaudit(to_user_ns(ns), CAP_SYS_ADMIN))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return may_see_all_namespaces();
|
||||
}
|
||||
|
||||
static inline void ns_put(struct ns_common *ns)
|
||||
|
|
@ -600,7 +579,7 @@ static ssize_t do_listns_userns(struct klistns *kls)
|
|||
|
||||
ret = 0;
|
||||
head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head;
|
||||
kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN);
|
||||
kls->userns_capable = may_see_all_namespaces();
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
|
|
|
|||
|
|
@ -37,17 +37,20 @@ FIXTURE(iterate_mount_namespaces) {
|
|||
__u64 mnt_ns_id[MNT_NS_COUNT];
|
||||
};
|
||||
|
||||
static inline bool mntns_in_list(__u64 *mnt_ns_id, struct mnt_ns_info *info)
|
||||
{
|
||||
for (int i = 0; i < MNT_NS_COUNT; i++) {
|
||||
if (mnt_ns_id[i] == info->mnt_ns_id)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
FIXTURE_SETUP(iterate_mount_namespaces)
|
||||
{
|
||||
for (int i = 0; i < MNT_NS_COUNT; i++)
|
||||
self->fd_mnt_ns[i] = -EBADF;
|
||||
|
||||
/*
|
||||
* Creating a new user namespace let's us guarantee that we only see
|
||||
* mount namespaces that we did actually create.
|
||||
*/
|
||||
ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
|
||||
|
||||
for (int i = 0; i < MNT_NS_COUNT; i++) {
|
||||
struct mnt_ns_info info = {};
|
||||
|
||||
|
|
@ -75,13 +78,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_forward)
|
|||
fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC);
|
||||
ASSERT_GE(fd_mnt_ns_cur, 0);
|
||||
|
||||
for (;; count++) {
|
||||
for (;;) {
|
||||
struct mnt_ns_info info = {};
|
||||
int fd_mnt_ns_next;
|
||||
|
||||
fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
|
||||
if (fd_mnt_ns_next < 0 && errno == ENOENT)
|
||||
break;
|
||||
if (mntns_in_list(self->mnt_ns_id, &info))
|
||||
count++;
|
||||
ASSERT_GE(fd_mnt_ns_next, 0);
|
||||
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
|
||||
fd_mnt_ns_cur = fd_mnt_ns_next;
|
||||
|
|
@ -96,13 +101,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_backwards)
|
|||
fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC);
|
||||
ASSERT_GE(fd_mnt_ns_cur, 0);
|
||||
|
||||
for (;; count++) {
|
||||
for (;;) {
|
||||
struct mnt_ns_info info = {};
|
||||
int fd_mnt_ns_prev;
|
||||
|
||||
fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
|
||||
if (fd_mnt_ns_prev < 0 && errno == ENOENT)
|
||||
break;
|
||||
if (mntns_in_list(self->mnt_ns_id, &info))
|
||||
count++;
|
||||
ASSERT_GE(fd_mnt_ns_prev, 0);
|
||||
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
|
||||
fd_mnt_ns_cur = fd_mnt_ns_prev;
|
||||
|
|
@ -125,7 +132,6 @@ TEST_F(iterate_mount_namespaces, iterate_forward)
|
|||
ASSERT_GE(fd_mnt_ns_next, 0);
|
||||
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
|
||||
fd_mnt_ns_cur = fd_mnt_ns_next;
|
||||
ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -144,7 +150,6 @@ TEST_F(iterate_mount_namespaces, iterate_backward)
|
|||
ASSERT_GE(fd_mnt_ns_prev, 0);
|
||||
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
|
||||
fd_mnt_ns_cur = fd_mnt_ns_prev;
|
||||
ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue