vfs-7.0-rc3.fixes

Please consider pulling these changes from the signed vfs-7.0-rc3.fixes tag.
 
 Thanks!
 Christian
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaaikgAAKCRCRxhvAZXjc
 orflAP9Dfs/DCoHLi9xknIqHgMqxJKHpwVzcGAOX8eI0ZOLVjQEA2nnhtbBvVh3q
 CAbQzwVHaujKVL2lGV/qwoaRFEvf1gI=
 =aZoy
 -----END PGP SIGNATURE-----

Merge tag 'vfs-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

 - kthread: consolidate kthread exit paths to prevent use-after-free

 - iomap:
    - don't mark folio uptodate if read IO has bytes pending
    - don't report direct-io retries to fserror
    - reject delalloc mappings during writeback

 - ns: tighten visibility checks

 - netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict
   sequence

* tag 'vfs-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  iomap: reject delalloc mappings during writeback
  iomap: don't mark folio uptodate if read IO has bytes pending
  selftests: fix mntns iteration selftests
  nstree: tighten permission checks for listing
  nsfs: tighten permission checks for handle opening
  nsfs: tighten permission checks for ns iteration ioctls
  netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence
  kthread: consolidate kthread exit paths to prevent use-after-free
  iomap: don't report direct-io retries to fserror
This commit is contained in:
Linus Torvalds 2026-03-04 15:03:16 -08:00
commit 0b3bb20580
16 changed files with 326 additions and 160 deletions

View file

@ -80,18 +80,27 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
{
struct iomap_folio_state *ifs = folio->private;
unsigned long flags;
bool uptodate = true;
bool mark_uptodate = true;
if (folio_test_uptodate(folio))
return;
if (ifs) {
spin_lock_irqsave(&ifs->state_lock, flags);
uptodate = ifs_set_range_uptodate(folio, ifs, off, len);
/*
* If a read with bytes pending is in progress, we must not call
* folio_mark_uptodate(). The read completion path
* (iomap_read_end()) will call folio_end_read(), which uses XOR
* semantics to set the uptodate bit. If we set it here, the XOR
* in folio_end_read() will clear it, leaving the folio not
* uptodate.
*/
mark_uptodate = ifs_set_range_uptodate(folio, ifs, off, len) &&
!ifs->read_bytes_pending;
spin_unlock_irqrestore(&ifs->state_lock, flags);
}
if (uptodate)
if (mark_uptodate)
folio_mark_uptodate(folio);
}

View file

@ -87,6 +87,19 @@ static inline enum fserror_type iomap_dio_err_type(const struct iomap_dio *dio)
return FSERR_DIRECTIO_READ;
}
static inline bool should_report_dio_fserror(const struct iomap_dio *dio)
{
switch (dio->error) {
case 0:
case -EAGAIN:
case -ENOTBLK:
/* don't send fsnotify for success or magic retry codes */
return false;
default:
return true;
}
}
ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
const struct iomap_dio_ops *dops = dio->dops;
@ -96,7 +109,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (dops && dops->end_io)
ret = dops->end_io(iocb, dio->size, ret, dio->flags);
if (dio->error)
if (should_report_dio_fserror(dio))
fserror_report_io(file_inode(iocb->ki_filp),
iomap_dio_err_type(dio), offset, dio->size,
dio->error, GFP_NOFS);

View file

@ -215,17 +215,18 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio,
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
switch (wpc->iomap.type) {
case IOMAP_INLINE:
WARN_ON_ONCE(1);
return -EIO;
case IOMAP_UNWRITTEN:
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
break;
case IOMAP_MAPPED:
break;
case IOMAP_HOLE:
return map_len;
default:
break;
WARN_ON_ONCE(1);
return -EIO;
}
if (wpc->iomap.type == IOMAP_UNWRITTEN)
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
if (wpc->iomap.flags & IOMAP_F_SHARED)
ioend_flags |= IOMAP_IOEND_SHARED;
if (folio_test_dropbehind(folio))

View file

@ -9,6 +9,202 @@
#include <linux/uio.h>
#include "internal.h"
/*
* Perform the cleanup rituals after an unbuffered write is complete.
*/
static void netfs_unbuffered_write_done(struct netfs_io_request *wreq)
{
struct netfs_inode *ictx = netfs_inode(wreq->inode);
_enter("R=%x", wreq->debug_id);
/* Okay, declare that all I/O is complete. */
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
if (!wreq->error)
netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
if (wreq->origin == NETFS_DIO_WRITE &&
wreq->mapping->nrpages) {
/* mmap may have got underfoot and we may now have folios
* locally covering the region we just wrote. Attempt to
* discard the folios, but leave in place any modified locally.
* ->write_iter() is prevented from interfering by the DIO
* counter.
*/
pgoff_t first = wreq->start >> PAGE_SHIFT;
pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
invalidate_inode_pages2_range(wreq->mapping, first, last);
}
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
_debug("finished");
netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
if (wreq->iocb) {
size_t written = umin(wreq->transferred, wreq->len);
wreq->iocb->ki_pos += written;
if (wreq->iocb->ki_complete) {
trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
wreq->iocb->ki_complete(wreq->iocb, wreq->error ?: written);
}
wreq->iocb = VFS_PTR_POISON;
}
netfs_clear_subrequests(wreq);
}
/*
* Collect the subrequest results of unbuffered write subrequests.
*/
static void netfs_unbuffered_write_collect(struct netfs_io_request *wreq,
struct netfs_io_stream *stream,
struct netfs_io_subrequest *subreq)
{
trace_netfs_collect_sreq(wreq, subreq);
spin_lock(&wreq->lock);
list_del_init(&subreq->rreq_link);
spin_unlock(&wreq->lock);
wreq->transferred += subreq->transferred;
iov_iter_advance(&wreq->buffer.iter, subreq->transferred);
stream->collected_to = subreq->start + subreq->transferred;
wreq->collected_to = stream->collected_to;
netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
trace_netfs_collect_stream(wreq, stream);
trace_netfs_collect_state(wreq, wreq->collected_to, 0);
}
/*
* Write data to the server without going through the pagecache and without
* writing it to the local cache. We dispatch the subrequests serially and
* wait for each to complete before dispatching the next, lest we leave a gap
* in the data written due to a failure such as ENOSPC. We could, however
* attempt to do preparation such as content encryption for the next subreq
* whilst the current is in progress.
*/
static int netfs_unbuffered_write(struct netfs_io_request *wreq)
{
struct netfs_io_subrequest *subreq = NULL;
struct netfs_io_stream *stream = &wreq->io_streams[0];
int ret;
_enter("%llx", wreq->len);
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
stream->collected_to = wreq->start;
for (;;) {
bool retry = false;
if (!subreq) {
netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred);
subreq = stream->construct;
stream->construct = NULL;
stream->front = NULL;
}
/* Check if (re-)preparation failed. */
if (unlikely(test_bit(NETFS_SREQ_FAILED, &subreq->flags))) {
netfs_write_subrequest_terminated(subreq, subreq->error);
wreq->error = subreq->error;
break;
}
iov_iter_truncate(&subreq->io_iter, wreq->len - wreq->transferred);
if (!iov_iter_count(&subreq->io_iter))
break;
subreq->len = netfs_limit_iter(&subreq->io_iter, 0,
stream->sreq_max_len,
stream->sreq_max_segs);
iov_iter_truncate(&subreq->io_iter, subreq->len);
stream->submit_extendable_to = subreq->len;
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
stream->issue_write(subreq);
/* Async, need to wait. */
netfs_wait_for_in_progress_stream(wreq, stream);
if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
retry = true;
} else if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
ret = subreq->error;
wreq->error = ret;
netfs_see_subrequest(subreq, netfs_sreq_trace_see_failed);
subreq = NULL;
break;
}
ret = 0;
if (!retry) {
netfs_unbuffered_write_collect(wreq, stream, subreq);
subreq = NULL;
if (wreq->transferred >= wreq->len)
break;
if (!wreq->iocb && signal_pending(current)) {
ret = wreq->transferred ? -EINTR : -ERESTARTSYS;
trace_netfs_rreq(wreq, netfs_rreq_trace_intr);
break;
}
continue;
}
/* We need to retry the last subrequest, so first reset the
* iterator, taking into account what, if anything, we managed
* to transfer.
*/
subreq->error = -EAGAIN;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
if (subreq->transferred > 0)
iov_iter_advance(&wreq->buffer.iter, subreq->transferred);
if (stream->source == NETFS_UPLOAD_TO_SERVER &&
wreq->netfs_ops->retry_request)
wreq->netfs_ops->retry_request(wreq, stream);
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
__clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
__clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
subreq->io_iter = wreq->buffer.iter;
subreq->start = wreq->start + wreq->transferred;
subreq->len = wreq->len - wreq->transferred;
subreq->transferred = 0;
subreq->retry_count += 1;
stream->sreq_max_len = UINT_MAX;
stream->sreq_max_segs = INT_MAX;
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
stream->prepare_write(subreq);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
netfs_stat(&netfs_n_wh_retry_write_subreq);
}
netfs_unbuffered_write_done(wreq);
_leave(" = %d", ret);
return ret;
}
static void netfs_unbuffered_write_async(struct work_struct *work)
{
struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
netfs_unbuffered_write(wreq);
netfs_put_request(wreq, netfs_rreq_trace_put_complete);
}
/*
* Perform an unbuffered write where we may have to do an RMW operation on an
* encrypted file. This can also be used for direct I/O writes.
@ -70,35 +266,35 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
*/
wreq->buffer.iter = *iter;
}
wreq->len = iov_iter_count(&wreq->buffer.iter);
}
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
if (async)
__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
/* Copy the data into the bounce buffer and encrypt it. */
// TODO
/* Dispatch the write. */
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
if (async)
if (async) {
INIT_WORK(&wreq->work, netfs_unbuffered_write_async);
wreq->iocb = iocb;
wreq->len = iov_iter_count(&wreq->buffer.iter);
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
_debug("begin = %zd", ret);
goto out;
}
if (!async) {
ret = netfs_wait_for_write(wreq);
if (ret > 0)
iocb->ki_pos += ret;
} else {
queue_work(system_dfl_wq, &wreq->work);
ret = -EIOCBQUEUED;
} else {
ret = netfs_unbuffered_write(wreq);
if (ret < 0) {
_debug("begin = %zd", ret);
} else {
iocb->ki_pos += wreq->transferred;
ret = wreq->transferred ?: wreq->error;
}
netfs_put_request(wreq, netfs_rreq_trace_put_complete);
}
out:
netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;

View file

@ -198,6 +198,9 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
struct file *file,
loff_t start,
enum netfs_io_origin origin);
void netfs_prepare_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream,
loff_t start);
void netfs_reissue_write(struct netfs_io_stream *stream,
struct netfs_io_subrequest *subreq,
struct iov_iter *source);
@ -212,7 +215,6 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio **writethrough_cache);
ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
struct folio *writethrough_cache);
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
/*
* write_retry.c

View file

@ -399,27 +399,6 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
ictx->ops->invalidate_cache(wreq);
}
if ((wreq->origin == NETFS_UNBUFFERED_WRITE ||
wreq->origin == NETFS_DIO_WRITE) &&
!wreq->error)
netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
if (wreq->origin == NETFS_DIO_WRITE &&
wreq->mapping->nrpages) {
/* mmap may have got underfoot and we may now have folios
* locally covering the region we just wrote. Attempt to
* discard the folios, but leave in place any modified locally.
* ->write_iter() is prevented from interfering by the DIO
* counter.
*/
pgoff_t first = wreq->start >> PAGE_SHIFT;
pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
invalidate_inode_pages2_range(wreq->mapping, first, last);
}
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
_debug("finished");
netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */

View file

@ -154,9 +154,9 @@ EXPORT_SYMBOL(netfs_prepare_write_failed);
* Prepare a write subrequest. We need to allocate a new subrequest
* if we don't have one.
*/
static void netfs_prepare_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream,
loff_t start)
void netfs_prepare_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream,
loff_t start)
{
struct netfs_io_subrequest *subreq;
struct iov_iter *wreq_iter = &wreq->buffer.iter;
@ -698,41 +698,6 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c
return ret;
}
/*
* Write data to the server without going through the pagecache and without
* writing it to the local cache.
*/
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len)
{
struct netfs_io_stream *upload = &wreq->io_streams[0];
ssize_t part;
loff_t start = wreq->start;
int error = 0;
_enter("%zx", len);
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
while (len) {
// TODO: Prepare content encryption
_debug("unbuffered %zx", len);
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
rolling_buffer_advance(&wreq->buffer, part);
if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags))
netfs_wait_for_paused_write(wreq);
if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
break;
}
netfs_end_issue_write(wreq);
_leave(" = %d", error);
return error;
}
/*
* Write some of a pending folio data back to the server and/or the cache.
*/

View file

@ -199,6 +199,17 @@ static bool nsfs_ioctl_valid(unsigned int cmd)
return false;
}
static bool may_use_nsfs_ioctl(unsigned int cmd)
{
switch (_IOC_NR(cmd)) {
case _IOC_NR(NS_MNT_GET_NEXT):
fallthrough;
case _IOC_NR(NS_MNT_GET_PREV):
return may_see_all_namespaces();
}
return true;
}
static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
@ -214,6 +225,8 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
if (!nsfs_ioctl_valid(ioctl))
return -ENOIOCTLCMD;
if (!may_use_nsfs_ioctl(ioctl))
return -EPERM;
ns = get_proc_ns(file_inode(filp));
switch (ioctl) {
@ -614,7 +627,7 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
return ERR_PTR(-EOPNOTSUPP);
}
if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) {
if (owning_ns && !may_see_all_namespaces()) {
ns->ops->put(ns);
return ERR_PTR(-EPERM);
}

View file

@ -7,6 +7,24 @@
struct mm_struct;
/* opaque kthread data */
struct kthread;
/*
* When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
* always remain a kthread. For kthreads p->worker_private always
* points to a struct kthread. For tasks that are not kthreads
* p->worker_private is used to point to other things.
*
* Return NULL for any task that is not a kthread.
*/
static inline struct kthread *tsk_is_kthread(struct task_struct *p)
{
if (p->flags & PF_KTHREAD)
return p->worker_private;
return NULL;
}
__printf(4, 5)
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
void *data,
@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k);
void kthread_unpark(struct task_struct *k);
void kthread_parkme(void);
void kthread_exit(long result) __noreturn;
#define kthread_exit(result) do_exit(result)
void kthread_complete_and_exit(struct completion *, long) __noreturn;
int kthreads_update_housekeeping(void);
void kthread_do_exit(struct kthread *, long);
int kthreadd(void *unused);
extern struct task_struct *kthreadd_task;

View file

@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns)
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
bool may_see_all_namespaces(void);
static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns)
{
return atomic_read(&ns->__ns_ref_active);

View file

@ -57,6 +57,7 @@
EM(netfs_rreq_trace_done, "DONE ") \
EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \
EM(netfs_rreq_trace_free, "FREE ") \
EM(netfs_rreq_trace_intr, "INTR ") \
EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \
EM(netfs_rreq_trace_recollect, "RECLLCT") \
EM(netfs_rreq_trace_redirty, "REDIRTY") \
@ -169,7 +170,8 @@
EM(netfs_sreq_trace_put_oom, "PUT OOM ") \
EM(netfs_sreq_trace_put_wip, "PUT WIP ") \
EM(netfs_sreq_trace_put_work, "PUT WORK ") \
E_(netfs_sreq_trace_put_terminated, "PUT TERM ")
EM(netfs_sreq_trace_put_terminated, "PUT TERM ") \
E_(netfs_sreq_trace_see_failed, "SEE FAILED ")
#define netfs_folio_traces \
EM(netfs_folio_is_uptodate, "mod-uptodate") \

View file

@ -896,11 +896,16 @@ static void synchronize_group_exit(struct task_struct *tsk, long code)
void __noreturn do_exit(long code)
{
struct task_struct *tsk = current;
struct kthread *kthread;
int group_dead;
WARN_ON(irqs_disabled());
WARN_ON(tsk->plug);
kthread = tsk_is_kthread(tsk);
if (unlikely(kthread))
kthread_do_exit(kthread, code);
kcov_task_exit(tsk);
kmsan_task_exit(tsk);
@ -1013,6 +1018,7 @@ void __noreturn do_exit(long code)
lockdep_free_task(tsk);
do_task_dead();
}
EXPORT_SYMBOL(do_exit);
void __noreturn make_task_dead(int signr)
{

View file

@ -85,24 +85,6 @@ static inline struct kthread *to_kthread(struct task_struct *k)
return k->worker_private;
}
/*
* Variant of to_kthread() that doesn't assume @p is a kthread.
*
* When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will
* always remain a kthread. For kthreads p->worker_private always
* points to a struct kthread. For tasks that are not kthreads
* p->worker_private is used to point to other things.
*
* Return NULL for any task that is not a kthread.
*/
static inline struct kthread *__to_kthread(struct task_struct *p)
{
void *kthread = p->worker_private;
if (kthread && !(p->flags & PF_KTHREAD))
kthread = NULL;
return kthread;
}
void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
struct kthread *kthread = to_kthread(tsk);
@ -193,7 +175,7 @@ EXPORT_SYMBOL_GPL(kthread_should_park);
bool kthread_should_stop_or_park(void)
{
struct kthread *kthread = __to_kthread(current);
struct kthread *kthread = tsk_is_kthread(current);
if (!kthread)
return false;
@ -234,7 +216,7 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
*/
void *kthread_func(struct task_struct *task)
{
struct kthread *kthread = __to_kthread(task);
struct kthread *kthread = tsk_is_kthread(task);
if (kthread)
return kthread->threadfn;
return NULL;
@ -266,7 +248,7 @@ EXPORT_SYMBOL_GPL(kthread_data);
*/
void *kthread_probe_data(struct task_struct *task)
{
struct kthread *kthread = __to_kthread(task);
struct kthread *kthread = tsk_is_kthread(task);
void *data = NULL;
if (kthread)
@ -309,19 +291,8 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
/**
* kthread_exit - Cause the current kthread return @result to kthread_stop().
* @result: The integer value to return to kthread_stop().
*
* While kthread_exit can be called directly, it exists so that
* functions which do some additional work in non-modular code such as
* module_put_and_kthread_exit can be implemented.
*
* Does not return.
*/
void __noreturn kthread_exit(long result)
void kthread_do_exit(struct kthread *kthread, long result)
{
struct kthread *kthread = to_kthread(current);
kthread->result = result;
if (!list_empty(&kthread->affinity_node)) {
mutex_lock(&kthread_affinity_lock);
@ -333,9 +304,7 @@ void __noreturn kthread_exit(long result)
kthread->preferred_affinity = NULL;
}
}
do_exit(0);
}
EXPORT_SYMBOL(kthread_exit);
/**
* kthread_complete_and_exit - Exit the current kthread.
@ -683,7 +652,7 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu)
bool kthread_is_per_cpu(struct task_struct *p)
{
struct kthread *kthread = __to_kthread(p);
struct kthread *kthread = tsk_is_kthread(p);
if (!kthread)
return false;

View file

@ -309,3 +309,9 @@ void __ns_ref_active_get(struct ns_common *ns)
return;
}
}
bool may_see_all_namespaces(void)
{
return (task_active_pid_ns(current) == &init_pid_ns) &&
ns_capable_noaudit(init_pid_ns.user_ns, CAP_SYS_ADMIN);
}

View file

@ -515,32 +515,11 @@ static inline bool __must_check ns_requested(const struct klistns *kls,
static inline bool __must_check may_list_ns(const struct klistns *kls,
struct ns_common *ns)
{
if (kls->user_ns) {
if (kls->userns_capable)
return true;
} else {
struct ns_common *owner;
struct user_namespace *user_ns;
owner = ns_owner(ns);
if (owner)
user_ns = to_user_ns(owner);
else
user_ns = &init_user_ns;
if (ns_capable_noaudit(user_ns, CAP_SYS_ADMIN))
return true;
}
if (kls->user_ns && kls->userns_capable)
return true;
if (is_current_namespace(ns))
return true;
if (ns->ns_type != CLONE_NEWUSER)
return false;
if (ns_capable_noaudit(to_user_ns(ns), CAP_SYS_ADMIN))
return true;
return false;
return may_see_all_namespaces();
}
static inline void ns_put(struct ns_common *ns)
@ -600,7 +579,7 @@ static ssize_t do_listns_userns(struct klistns *kls)
ret = 0;
head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head;
kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN);
kls->userns_capable = may_see_all_namespaces();
rcu_read_lock();

View file

@ -37,17 +37,20 @@ FIXTURE(iterate_mount_namespaces) {
__u64 mnt_ns_id[MNT_NS_COUNT];
};
static inline bool mntns_in_list(__u64 *mnt_ns_id, struct mnt_ns_info *info)
{
for (int i = 0; i < MNT_NS_COUNT; i++) {
if (mnt_ns_id[i] == info->mnt_ns_id)
return true;
}
return false;
}
FIXTURE_SETUP(iterate_mount_namespaces)
{
for (int i = 0; i < MNT_NS_COUNT; i++)
self->fd_mnt_ns[i] = -EBADF;
/*
* Creating a new user namespace let's us guarantee that we only see
* mount namespaces that we did actually create.
*/
ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
for (int i = 0; i < MNT_NS_COUNT; i++) {
struct mnt_ns_info info = {};
@ -75,13 +78,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_forward)
fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC);
ASSERT_GE(fd_mnt_ns_cur, 0);
for (;; count++) {
for (;;) {
struct mnt_ns_info info = {};
int fd_mnt_ns_next;
fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
if (fd_mnt_ns_next < 0 && errno == ENOENT)
break;
if (mntns_in_list(self->mnt_ns_id, &info))
count++;
ASSERT_GE(fd_mnt_ns_next, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_next;
@ -96,13 +101,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_backwards)
fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC);
ASSERT_GE(fd_mnt_ns_cur, 0);
for (;; count++) {
for (;;) {
struct mnt_ns_info info = {};
int fd_mnt_ns_prev;
fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
if (fd_mnt_ns_prev < 0 && errno == ENOENT)
break;
if (mntns_in_list(self->mnt_ns_id, &info))
count++;
ASSERT_GE(fd_mnt_ns_prev, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_prev;
@ -125,7 +132,6 @@ TEST_F(iterate_mount_namespaces, iterate_forward)
ASSERT_GE(fd_mnt_ns_next, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_next;
ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
}
}
@ -144,7 +150,6 @@ TEST_F(iterate_mount_namespaces, iterate_backward)
ASSERT_GE(fd_mnt_ns_prev, 0);
ASSERT_EQ(close(fd_mnt_ns_cur), 0);
fd_mnt_ns_cur = fd_mnt_ns_prev;
ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
}
}