vfs-7.0-rc1.namespace

Please consider pulling these changes from the signed vfs-7.0-rc1.namespace tag.
 
 Thanks!
 Christian
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaYX49gAKCRCRxhvAZXjc
 ovzgAP9BpqMQhMy2VCurru8/T5VAd6eJdgXzEfXqMksL5BNm8gEAsLx666KJNKgm
 Sh/yVA2KBjf51gvcLZ4gHOISaMU8bAI=
 =RGLf
 -----END PGP SIGNATURE-----

Merge tag 'vfs-7.0-rc1.namespace' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs mount updates from Christian Brauner:

 - statmount: accept fd as a parameter

   Extend struct mnt_id_req with a file descriptor field and a new
   STATMOUNT_BY_FD flag. When set, statmount() returns mount information
   for the mount the fd resides on — including detached mounts
   (unmounted via umount2(MNT_DETACH)).

   For detached mounts the STATMOUNT_MNT_POINT and STATMOUNT_MNT_NS_ID
   mask bits are cleared since neither is meaningful. The capability
   check is skipped for STATMOUNT_BY_FD since holding an fd already
   implies prior access to the mount and equivalent information is
   available through fstatfs() and /proc/pid/mountinfo without
   privilege. Includes comprehensive selftests covering both attached
   and detached mount cases.

 - fs: Remove internal old mount API code (1 patch)

   Now that every in-tree filesystem has been converted to the new
   mount API, remove all the legacy shim code in fs_context.c that
   handled unconverted filesystems. This deletes ~280 lines including
   legacy_init_fs_context(), the legacy_fs_context struct, and
   associated wrappers. The mount(2) syscall path for userspace remains
   untouched. Documentation references to the legacy callbacks are
   cleaned up.

 - mount: add OPEN_TREE_NAMESPACE to open_tree()

   Container runtimes currently use CLONE_NEWNS to copy the caller's
   entire mount namespace — only to then pivot_root() and recursively
   unmount everything they just copied. With large mount tables and
   thousands of parallel container launches this creates significant
   contention on the namespace semaphore.

   OPEN_TREE_NAMESPACE copies only the specified mount tree (like
   OPEN_TREE_CLONE) but returns a mount namespace fd instead of a
   detached mount fd. The new namespace contains the copied tree mounted
   on top of a clone of the real rootfs.

   This functions as a combined unshare(CLONE_NEWNS) + pivot_root() in a
   single syscall. Works with user namespaces: an unshare(CLONE_NEWUSER)
   followed by OPEN_TREE_NAMESPACE creates a mount namespace owned by
   the new user namespace. Mount namespace file mounts are excluded from
   the copy to prevent cycles. Includes ~1000 lines of selftests"

* tag 'vfs-7.0-rc1.namespace' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  selftests/open_tree: add OPEN_TREE_NAMESPACE tests
  mount: add OPEN_TREE_NAMESPACE
  fs: Remove internal old mount API code
  selftests: statmount: tests for STATMOUNT_BY_FD
  statmount: accept fd as a parameter
  statmount: permission check should return EPERM
This commit is contained in:
Linus Torvalds 2026-02-09 14:43:47 -08:00
commit 157d3d6efd
20 changed files with 1669 additions and 365 deletions

View file

@ -180,7 +180,6 @@ prototypes::
int (*freeze_fs) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct dentry *);
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
@ -204,7 +203,6 @@ sync_fs: read
freeze_fs: write
unfreeze_fs: write
statfs: maybe(read) (see below)
remount_fs: write
umount_begin: no
show_options: no (namespace_sem)
quota_read: no (see below)
@ -229,8 +227,6 @@ file_system_type
prototypes::
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);
void (*kill_sb) (struct super_block *);
locking rules:
@ -238,13 +234,9 @@ locking rules:
======= =========
ops may block
======= =========
mount yes
kill_sb yes
======= =========
->mount() returns ERR_PTR or the root dentry; its superblock should be locked
on return.
->kill_sb() takes a write-locked superblock, does all shutdown work on it,
unlocks and drops the reference.

View file

@ -299,8 +299,6 @@ manage the filesystem context. They are as follows:
On success it should return 0. In the case of an error, it should return
a negative error code.
.. Note:: reconfigure is intended as a replacement for remount_fs.
Filesystem context Security
===========================

View file

@ -448,11 +448,8 @@ a file off.
**mandatory**
->get_sb() is gone. Switch to use of ->mount(). Typically it's just
a matter of switching from calling ``get_sb_``... to ``mount_``... and changing
the function type. If you were doing it manually, just switch from setting
->mnt_root to some pointer to returning that pointer. On errors return
ERR_PTR(...).
->get_sb() and ->mount() are gone. Switch to using the new mount API. See
Documentation/filesystems/mount_api.rst for more details.
---

View file

@ -94,11 +94,9 @@ functions:
The passed struct file_system_type describes your filesystem. When a
request is made to mount a filesystem onto a directory in your
namespace, the VFS will call the appropriate mount() method for the
specific filesystem. New vfsmount referring to the tree returned by
->mount() will be attached to the mountpoint, so that when pathname
resolution reaches the mountpoint it will jump into the root of that
vfsmount.
namespace, the VFS will call the appropriate get_tree() method for the
specific filesystem. See Documentation/filesystems/mount_api.rst
for more details.
You can see all filesystems that are registered to the kernel in the
file /proc/filesystems.
@ -117,8 +115,6 @@ members are defined:
int fs_flags;
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
@ -151,10 +147,6 @@ members are defined:
'struct fs_parameter_spec'.
More info in Documentation/filesystems/mount_api.rst.
``mount``
the method to call when a new instance of this filesystem should
be mounted
``kill_sb``
the method to call when an instance of this filesystem should be
shut down
@ -173,45 +165,6 @@ members are defined:
s_lock_key, s_umount_key, s_vfs_rename_key, s_writers_key,
i_lock_key, i_mutex_key, invalidate_lock_key, i_mutex_dir_key: lockdep-specific
The mount() method has the following arguments:
``struct file_system_type *fs_type``
describes the filesystem, partly initialized by the specific
filesystem code
``int flags``
mount flags
``const char *dev_name``
the device name we are mounting.
``void *data``
arbitrary mount options, usually comes as an ASCII string (see
"Mount Options" section)
The mount() method must return the root dentry of the tree requested by
caller. An active reference to its superblock must be grabbed and the
superblock must be locked. On failure it should return ERR_PTR(error).
The arguments match those of mount(2) and their interpretation depends
on filesystem type. E.g. for block filesystems, dev_name is interpreted
as block device name, that device is opened and if it contains a
suitable filesystem image the method creates and initializes struct
super_block accordingly, returning its root dentry to caller.
->mount() may choose to return a subtree of existing filesystem - it
doesn't have to create a new one. The main result from the caller's
point of view is a reference to dentry at the root of (sub)tree to be
attached; creation of new superblock is a common side effect.
The most interesting member of the superblock structure that the mount()
method fills in is the "s_op" field. This is a pointer to a "struct
super_operations" which describes the next level of the filesystem
implementation.
For more information on mounting (and the new mount API), see
Documentation/filesystems/mount_api.rst.
The Superblock Object
=====================
@ -244,7 +197,6 @@ filesystem. The following members are defined:
enum freeze_wholder who);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct dentry *);
@ -351,10 +303,6 @@ or bottom half).
``statfs``
called when the VFS needs to get filesystem statistics.
``remount_fs``
called when the filesystem is remounted. This is called with
the kernel lock held
``umount_begin``
called when the VFS is unmounting a filesystem.

View file

@ -24,20 +24,6 @@
#include "mount.h"
#include "internal.h"
enum legacy_fs_param {
LEGACY_FS_UNSET_PARAMS,
LEGACY_FS_MONOLITHIC_PARAMS,
LEGACY_FS_INDIVIDUAL_PARAMS,
};
struct legacy_fs_context {
char *legacy_data; /* Data page for legacy filesystems */
size_t data_size;
enum legacy_fs_param param_type;
};
static int legacy_init_fs_context(struct fs_context *fc);
static const struct constant_table common_set_sb_flag[] = {
{ "dirsync", SB_DIRSYNC },
{ "lazytime", SB_LAZYTIME },
@ -275,7 +261,6 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
unsigned int sb_flags_mask,
enum fs_context_purpose purpose)
{
int (*init_fs_context)(struct fs_context *);
struct fs_context *fc;
int ret = -ENOMEM;
@ -307,12 +292,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
break;
}
/* TODO: Make all filesystems support this unconditionally */
init_fs_context = fc->fs_type->init_fs_context;
if (!init_fs_context)
init_fs_context = legacy_init_fs_context;
ret = init_fs_context(fc);
ret = fc->fs_type->init_fs_context(fc);
if (ret < 0)
goto err_fc;
fc->need_free = true;
@ -376,8 +356,6 @@ void fc_drop_locked(struct fs_context *fc)
deactivate_locked_super(sb);
}
static void legacy_fs_context_free(struct fs_context *fc);
/**
* vfs_dup_fs_context - Duplicate a filesystem context.
* @src_fc: The context to copy.
@ -531,184 +509,6 @@ void put_fs_context(struct fs_context *fc)
}
EXPORT_SYMBOL(put_fs_context);
/*
* Free the config for a filesystem that doesn't support fs_context.
*/
static void legacy_fs_context_free(struct fs_context *fc)
{
struct legacy_fs_context *ctx = fc->fs_private;
if (ctx) {
if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS)
kfree(ctx->legacy_data);
kfree(ctx);
}
}
/*
* Duplicate a legacy config.
*/
static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
{
struct legacy_fs_context *ctx;
struct legacy_fs_context *src_ctx = src_fc->fs_private;
ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) {
ctx->legacy_data = kmemdup(src_ctx->legacy_data,
src_ctx->data_size, GFP_KERNEL);
if (!ctx->legacy_data) {
kfree(ctx);
return -ENOMEM;
}
}
fc->fs_private = ctx;
return 0;
}
/*
* Add a parameter to a legacy config. We build up a comma-separated list of
* options.
*/
static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct legacy_fs_context *ctx = fc->fs_private;
unsigned int size = ctx->data_size;
size_t len = 0;
int ret;
ret = vfs_parse_fs_param_source(fc, param);
if (ret != -ENOPARAM)
return ret;
if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
switch (param->type) {
case fs_value_is_string:
len = 1 + param->size;
fallthrough;
case fs_value_is_flag:
len += strlen(param->key);
break;
default:
return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported",
param->key);
}
if (size + len + 2 > PAGE_SIZE)
return invalf(fc, "VFS: Legacy: Cumulative options too large");
if (strchr(param->key, ',') ||
(param->type == fs_value_is_string &&
memchr(param->string, ',', param->size)))
return invalf(fc, "VFS: Legacy: Option '%s' contained comma",
param->key);
if (!ctx->legacy_data) {
ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!ctx->legacy_data)
return -ENOMEM;
}
if (size)
ctx->legacy_data[size++] = ',';
len = strlen(param->key);
memcpy(ctx->legacy_data + size, param->key, len);
size += len;
if (param->type == fs_value_is_string) {
ctx->legacy_data[size++] = '=';
memcpy(ctx->legacy_data + size, param->string, param->size);
size += param->size;
}
ctx->legacy_data[size] = '\0';
ctx->data_size = size;
ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS;
return 0;
}
/*
* Add monolithic mount data.
*/
static int legacy_parse_monolithic(struct fs_context *fc, void *data)
{
struct legacy_fs_context *ctx = fc->fs_private;
if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) {
pr_warn("VFS: Can't mix monolithic and individual options\n");
return -EINVAL;
}
ctx->legacy_data = data;
ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS;
if (!ctx->legacy_data)
return 0;
if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
return 0;
return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security);
}
/*
* Get a mountable root with the legacy mount command.
*/
static int legacy_get_tree(struct fs_context *fc)
{
struct legacy_fs_context *ctx = fc->fs_private;
struct super_block *sb;
struct dentry *root;
root = fc->fs_type->mount(fc->fs_type, fc->sb_flags,
fc->source, ctx->legacy_data);
if (IS_ERR(root))
return PTR_ERR(root);
sb = root->d_sb;
BUG_ON(!sb);
fc->root = root;
return 0;
}
/*
* Handle remount.
*/
static int legacy_reconfigure(struct fs_context *fc)
{
struct legacy_fs_context *ctx = fc->fs_private;
struct super_block *sb = fc->root->d_sb;
if (!sb->s_op->remount_fs)
return 0;
return sb->s_op->remount_fs(sb, &fc->sb_flags,
ctx ? ctx->legacy_data : NULL);
}
const struct fs_context_operations legacy_fs_context_ops = {
.free = legacy_fs_context_free,
.dup = legacy_fs_context_dup,
.parse_param = legacy_parse_param,
.parse_monolithic = legacy_parse_monolithic,
.get_tree = legacy_get_tree,
.reconfigure = legacy_reconfigure,
};
/*
* Initialise a legacy context for a filesystem that doesn't support
* fs_context.
*/
static int legacy_init_fs_context(struct fs_context *fc)
{
fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT);
if (!fc->fs_private)
return -ENOMEM;
fc->ops = &legacy_fs_context_ops;
return 0;
}
int parse_monolithic_mount_data(struct fs_context *fc, void *data)
{
int (*monolithic_mount_data)(struct fs_context *, void *);
@ -757,10 +557,8 @@ int finish_clean_context(struct fs_context *fc)
if (fc->phase != FS_CONTEXT_AWAITING_RECONF)
return 0;
if (fc->fs_type->init_fs_context)
error = fc->fs_type->init_fs_context(fc);
else
error = legacy_init_fs_context(fc);
error = fc->fs_type->init_fs_context(fc);
if (unlikely(error)) {
fc->phase = FS_CONTEXT_FAILED;
return error;

View file

@ -404,16 +404,6 @@ SYSCALL_DEFINE5(fsconfig,
return -EINVAL;
fc = fd_file(f)->private_data;
if (fc->ops == &legacy_fs_context_ops) {
switch (cmd) {
case FSCONFIG_SET_BINARY:
case FSCONFIG_SET_PATH:
case FSCONFIG_SET_PATH_EMPTY:
case FSCONFIG_SET_FD:
case FSCONFIG_CMD_CREATE_EXCL:
return -EOPNOTSUPP;
}
}
if (_key) {
param.key = strndup_user(_key, 256);

View file

@ -44,7 +44,6 @@ extern void __init chrdev_init(void);
/*
* fs_context.c
*/
extern const struct fs_context_operations legacy_fs_context_ops;
extern int parse_monolithic_mount_data(struct fs_context *, void *);
extern void vfs_clean_context(struct fs_context *fc);
extern int finish_clean_context(struct fs_context *fc);
@ -249,6 +248,7 @@ extern void mnt_pin_kill(struct mount *m);
*/
extern const struct dentry_operations ns_dentry_operations;
int open_namespace(struct ns_common *ns);
struct file *open_namespace_file(struct ns_common *ns);
/*
* fs/stat.c:

View file

@ -2796,6 +2796,9 @@ static inline void unlock_mount(struct pinned_mountpoint *m)
__unlock_mount(m);
}
static void lock_mount_exact(const struct path *path,
struct pinned_mountpoint *mp);
#define LOCK_MOUNT_MAYBE_BENEATH(mp, path, beneath) \
struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \
do_lock_mount((path), &mp, (beneath))
@ -2946,10 +2949,11 @@ static inline bool may_copy_tree(const struct path *path)
return check_anonymous_mnt(mnt);
}
static struct mount *__do_loopback(const struct path *old_path, int recurse)
static struct mount *__do_loopback(const struct path *old_path,
unsigned int flags, unsigned int copy_flags)
{
struct mount *old = real_mount(old_path->mnt);
bool recurse = flags & AT_RECURSIVE;
if (IS_MNT_UNBINDABLE(old))
return ERR_PTR(-EINVAL);
@ -2960,10 +2964,22 @@ static struct mount *__do_loopback(const struct path *old_path, int recurse)
if (!recurse && __has_locked_children(old, old_path->dentry))
return ERR_PTR(-EINVAL);
/*
* When creating a new mount namespace we don't want to copy over
* mounts of mount namespaces to avoid the risk of cycles and also to
* minimize the default complex interdependencies between mount
* namespaces.
*
* We could ofc just check whether all mount namespace files aren't
* creating cycles but really let's keep this simple.
*/
if (!(flags & OPEN_TREE_NAMESPACE))
copy_flags |= CL_COPY_MNT_NS_FILE;
if (recurse)
return copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
else
return clone_mnt(old, old_path->dentry, 0);
return copy_tree(old, old_path->dentry, copy_flags);
return clone_mnt(old, old_path->dentry, copy_flags);
}
/*
@ -2974,7 +2990,9 @@ static int do_loopback(const struct path *path, const char *old_name,
{
struct path old_path __free(path_put) = {};
struct mount *mnt = NULL;
unsigned int flags = recurse ? AT_RECURSIVE : 0;
int err;
if (!old_name || !*old_name)
return -EINVAL;
err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
@ -2991,7 +3009,7 @@ static int do_loopback(const struct path *path, const char *old_name,
if (!check_mnt(mp.parent))
return -EINVAL;
mnt = __do_loopback(&old_path, recurse);
mnt = __do_loopback(&old_path, flags, 0);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
@ -3004,7 +3022,7 @@ static int do_loopback(const struct path *path, const char *old_name,
return err;
}
static struct mnt_namespace *get_detached_copy(const struct path *path, bool recursive)
static struct mnt_namespace *get_detached_copy(const struct path *path, unsigned int flags)
{
struct mnt_namespace *ns, *mnt_ns = current->nsproxy->mnt_ns, *src_mnt_ns;
struct user_namespace *user_ns = mnt_ns->user_ns;
@ -3029,7 +3047,7 @@ static struct mnt_namespace *get_detached_copy(const struct path *path, bool rec
ns->seq_origin = src_mnt_ns->ns.ns_id;
}
mnt = __do_loopback(path, recursive);
mnt = __do_loopback(path, flags, 0);
if (IS_ERR(mnt)) {
emptied_ns = ns;
return ERR_CAST(mnt);
@ -3043,9 +3061,9 @@ static struct mnt_namespace *get_detached_copy(const struct path *path, bool rec
return ns;
}
static struct file *open_detached_copy(struct path *path, bool recursive)
static struct file *open_detached_copy(struct path *path, unsigned int flags)
{
struct mnt_namespace *ns = get_detached_copy(path, recursive);
struct mnt_namespace *ns = get_detached_copy(path, flags);
struct file *file;
if (IS_ERR(ns))
@ -3061,21 +3079,122 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
return file;
}
DEFINE_FREE(put_empty_mnt_ns, struct mnt_namespace *,
if (!IS_ERR_OR_NULL(_T)) free_mnt_ns(_T))
static struct mnt_namespace *create_new_namespace(struct path *path, unsigned int flags)
{
struct mnt_namespace *new_ns __free(put_empty_mnt_ns) = NULL;
struct path to_path __free(path_put) = {};
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct user_namespace *user_ns = current_user_ns();
struct mount *new_ns_root;
struct mount *mnt;
unsigned int copy_flags = 0;
bool locked = false;
if (user_ns != ns->user_ns)
copy_flags |= CL_SLAVE;
new_ns = alloc_mnt_ns(user_ns, false);
if (IS_ERR(new_ns))
return ERR_CAST(new_ns);
scoped_guard(namespace_excl) {
new_ns_root = clone_mnt(ns->root, ns->root->mnt.mnt_root, copy_flags);
if (IS_ERR(new_ns_root))
return ERR_CAST(new_ns_root);
/*
* If the real rootfs had a locked mount on top of it somewhere
* in the stack, lock the new mount tree as well so it can't be
* exposed.
*/
mnt = ns->root;
while (mnt->overmount) {
mnt = mnt->overmount;
if (mnt->mnt.mnt_flags & MNT_LOCKED)
locked = true;
}
}
/*
* We dropped the namespace semaphore so we can actually lock
* the copy for mounting. The copied mount isn't attached to any
* mount namespace and it is thus excluded from any propagation.
* So realistically we're isolated and the mount can't be
* overmounted.
*/
/* Borrow the reference from clone_mnt(). */
to_path.mnt = &new_ns_root->mnt;
to_path.dentry = dget(new_ns_root->mnt.mnt_root);
/* Now lock for actual mounting. */
LOCK_MOUNT_EXACT(mp, &to_path);
if (unlikely(IS_ERR(mp.parent)))
return ERR_CAST(mp.parent);
/*
* We don't emulate unshare()ing a mount namespace. We stick to the
* restrictions of creating detached bind-mounts. It has a lot
* saner and simpler semantics.
*/
mnt = __do_loopback(path, flags, copy_flags);
if (IS_ERR(mnt))
return ERR_CAST(mnt);
scoped_guard(mount_writer) {
if (locked)
mnt->mnt.mnt_flags |= MNT_LOCKED;
/*
* Now mount the detached tree on top of the copy of the
* real rootfs we created.
*/
attach_mnt(mnt, new_ns_root, mp.mp);
if (user_ns != ns->user_ns)
lock_mnt_tree(new_ns_root);
}
/* Add all mounts to the new namespace. */
for (struct mount *p = new_ns_root; p; p = next_mnt(p, new_ns_root)) {
mnt_add_to_ns(new_ns, p);
new_ns->nr_mounts++;
}
new_ns->root = real_mount(no_free_ptr(to_path.mnt));
ns_tree_add_raw(new_ns);
return no_free_ptr(new_ns);
}
static struct file *open_new_namespace(struct path *path, unsigned int flags)
{
struct mnt_namespace *new_ns;
new_ns = create_new_namespace(path, flags);
if (IS_ERR(new_ns))
return ERR_CAST(new_ns);
return open_namespace_file(to_ns_common(new_ns));
}
static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned int flags)
{
int ret;
struct path path __free(path_put) = {};
int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
bool detached = flags & OPEN_TREE_CLONE;
BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
OPEN_TREE_CLOEXEC))
OPEN_TREE_CLOEXEC | OPEN_TREE_NAMESPACE))
return ERR_PTR(-EINVAL);
if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE | OPEN_TREE_NAMESPACE)) ==
AT_RECURSIVE)
return ERR_PTR(-EINVAL);
if (hweight32(flags & (OPEN_TREE_CLONE | OPEN_TREE_NAMESPACE)) > 1)
return ERR_PTR(-EINVAL);
if (flags & AT_NO_AUTOMOUNT)
@ -3085,15 +3204,27 @@ static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned
if (flags & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
if (detached && !may_mount())
/*
* If we create a new mount namespace with the cloned mount tree we
* just care about being privileged over our current user namespace.
* The new mount namespace will be owned by it.
*/
if ((flags & OPEN_TREE_NAMESPACE) &&
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
if ((flags & OPEN_TREE_CLONE) && !may_mount())
return ERR_PTR(-EPERM);
ret = user_path_at(dfd, filename, lookup_flags, &path);
if (unlikely(ret))
return ERR_PTR(ret);
if (detached)
return open_detached_copy(&path, flags & AT_RECURSIVE);
if (flags & OPEN_TREE_NAMESPACE)
return open_new_namespace(&path, flags);
if (flags & OPEN_TREE_CLONE)
return open_detached_copy(&path, flags);
return dentry_open(&path, O_PATH, current_cred());
}
@ -5554,31 +5685,49 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
/* locks: namespace_shared */
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
struct file *mnt_file, struct mnt_namespace *ns)
{
struct mount *m;
int err;
/* Has the namespace already been emptied? */
if (mnt_ns_id && mnt_ns_empty(ns))
return -ENOENT;
if (mnt_file) {
WARN_ON_ONCE(ns != NULL);
s->mnt = lookup_mnt_in_ns(mnt_id, ns);
if (!s->mnt)
return -ENOENT;
s->mnt = mnt_file->f_path.mnt;
ns = real_mount(s->mnt)->mnt_ns;
if (!ns)
/*
* We can't set mount point and mnt_ns_id since we don't have a
* ns for the mount. This can happen if the mount is unmounted
* with MNT_DETACH.
*/
s->mask &= ~(STATMOUNT_MNT_POINT | STATMOUNT_MNT_NS_ID);
} else {
/* Has the namespace already been emptied? */
if (mnt_ns_id && mnt_ns_empty(ns))
return -ENOENT;
err = grab_requested_root(ns, &s->root);
if (err)
return err;
s->mnt = lookup_mnt_in_ns(mnt_id, ns);
if (!s->mnt)
return -ENOENT;
}
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
m = real_mount(s->mnt);
if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (ns) {
err = grab_requested_root(ns, &s->root);
if (err)
return err;
if (!mnt_file) {
struct mount *m;
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
m = real_mount(s->mnt);
if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
}
}
err = security_sb_statfs(s->mnt->mnt_root);
if (err)
@ -5700,7 +5849,7 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
}
static int copy_mnt_id_req(const struct mnt_id_req __user *req,
struct mnt_id_req *kreq)
struct mnt_id_req *kreq, unsigned int flags)
{
int ret;
size_t usize;
@ -5718,11 +5867,17 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize);
if (ret)
return ret;
if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id)
return -EINVAL;
/* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
return -EINVAL;
if (flags & STATMOUNT_BY_FD) {
if (kreq->mnt_id || kreq->mnt_ns_id)
return -EINVAL;
} else {
if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id)
return -EINVAL;
/* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
return -EINVAL;
}
return 0;
}
@ -5769,25 +5924,33 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
{
struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct kstatmount *ks __free(kfree) = NULL;
struct file *mnt_file __free(fput) = NULL;
struct mnt_id_req kreq;
/* We currently support retrieval of 3 strings. */
size_t seq_size = 3 * PATH_MAX;
int ret;
if (flags)
if (flags & ~STATMOUNT_BY_FD)
return -EINVAL;
ret = copy_mnt_id_req(req, &kreq);
ret = copy_mnt_id_req(req, &kreq, flags);
if (ret)
return ret;
ns = grab_requested_mnt_ns(&kreq);
if (IS_ERR(ns))
return PTR_ERR(ns);
if (flags & STATMOUNT_BY_FD) {
mnt_file = fget_raw(kreq.mnt_fd);
if (!mnt_file)
return -EBADF;
/* do_statmount sets ns in case of STATMOUNT_BY_FD */
} else {
ns = grab_requested_mnt_ns(&kreq);
if (IS_ERR(ns))
return PTR_ERR(ns);
if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
}
ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT);
if (!ks)
@ -5799,7 +5962,7 @@ retry:
return ret;
scoped_guard(namespace_shared)
ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);
ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, mnt_file, ns);
if (!ret)
ret = copy_statmount_to_user(ks);
@ -5939,7 +6102,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
return -EFAULT;
ret = copy_mnt_id_req(req, &kreq);
ret = copy_mnt_id_req(req, &kreq, 0);
if (ret)
return ret;

View file

@ -99,6 +99,19 @@ int ns_get_path(struct path *path, struct task_struct *task,
return ns_get_path_cb(path, ns_get_path_task, &args);
}
struct file *open_namespace_file(struct ns_common *ns)
{
struct path path __free(path_put) = {};
int err;
/* call first to consume reference */
err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
if (err < 0)
return ERR_PTR(err);
return dentry_open(&path, O_RDONLY, current_cred());
}
/**
* open_namespace - open a namespace
* @ns: the namespace to open

View file

@ -2282,8 +2282,6 @@ struct file_system_type {
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;

View file

@ -97,7 +97,6 @@ struct super_operations {
const void *owner);
int (*unfreeze_fs)(struct super_block *sb);
int (*statfs)(struct dentry *dentry, struct kstatfs *kstatfs);
int (*remount_fs) (struct super_block *, int *, char *);
void (*umount_begin)(struct super_block *sb);
int (*show_options)(struct seq_file *seq, struct dentry *dentry);

View file

@ -61,7 +61,8 @@
/*
* open_tree() flags.
*/
#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
#define OPEN_TREE_CLONE (1 << 0) /* Clone the target tree and attach the clone */
#define OPEN_TREE_NAMESPACE (1 << 1) /* Clone the target tree into a new mount namespace */
#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
/*
@ -197,7 +198,10 @@ struct statmount {
*/
struct mnt_id_req {
__u32 size;
__u32 mnt_ns_fd;
union {
__u32 mnt_ns_fd;
__u32 mnt_fd;
};
__u64 mnt_id;
__u64 param;
__u64 mnt_ns_id;
@ -232,4 +236,9 @@ struct mnt_id_req {
#define LSMT_ROOT 0xffffffffffffffff /* root mount */
#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
/*
* @flag bits for statmount(2)
*/
#define STATMOUNT_BY_FD 0x00000001U /* want mountinfo for given fd */
#endif /* _UAPI_LINUX_MOUNT_H */

View file

@ -0,0 +1 @@
open_tree_ns_test

View file

@ -0,0 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := open_tree_ns_test
CFLAGS := -Wall -Werror -g $(KHDR_INCLUDES)
LDLIBS := -lcap
include ../../lib.mk
$(OUTPUT)/open_tree_ns_test: open_tree_ns_test.c ../utils.c
$(CC) $(CFLAGS) -o $@ $^ $(LDLIBS)

File diff suppressed because it is too large Load diff

View file

@ -43,19 +43,24 @@
#endif
#endif
static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
struct statmount *buf, size_t bufsize,
static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint32_t fd,
uint64_t mask, struct statmount *buf, size_t bufsize,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = mask,
};
if (mnt_ns_id) {
if (flags & STATMOUNT_BY_FD) {
req.size = MNT_ID_REQ_SIZE_VER1;
req.mnt_ns_id = mnt_ns_id;
req.mnt_fd = fd;
} else {
req.mnt_id = mnt_id;
if (mnt_ns_id) {
req.size = MNT_ID_REQ_SIZE_VER1;
req.mnt_ns_id = mnt_ns_id;
}
}
return syscall(__NR_statmount, &req, buf, bufsize, flags);

View file

@ -33,15 +33,24 @@ static const char *const known_fs[] = {
"sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf",
"vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL };
static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
static struct statmount *statmount_alloc(uint64_t mnt_id, int fd, uint64_t mask, unsigned int flags)
{
size_t bufsize = 1 << 15;
struct statmount *buf = NULL, *tmp = alloca(bufsize);
struct statmount *buf = NULL, *tmp = NULL;
int tofree = 0;
int ret;
if (flags & STATMOUNT_BY_FD && fd < 0)
return NULL;
tmp = alloca(bufsize);
for (;;) {
ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
if (flags & STATMOUNT_BY_FD)
ret = statmount(0, 0, (uint32_t) fd, mask, tmp, bufsize, flags);
else
ret = statmount(mnt_id, 0, 0, mask, tmp, bufsize, flags);
if (ret != -1)
break;
if (tofree)
@ -237,7 +246,7 @@ static void test_statmount_zero_mask(void)
struct statmount sm;
int ret;
ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
ret = statmount(root_id, 0, 0, 0, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount zero mask: %s\n",
strerror(errno));
@ -263,7 +272,7 @@ static void test_statmount_mnt_basic(void)
int ret;
uint64_t mask = STATMOUNT_MNT_BASIC;
ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount mnt basic: %s\n",
strerror(errno));
@ -323,7 +332,7 @@ static void test_statmount_sb_basic(void)
struct statx sx;
struct statfs sf;
ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount sb basic: %s\n",
strerror(errno));
@ -375,7 +384,7 @@ static void test_statmount_mnt_point(void)
{
struct statmount *sm;
sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0);
sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_POINT, 0);
if (!sm) {
ksft_test_result_fail("statmount mount point: %s\n",
strerror(errno));
@ -405,7 +414,7 @@ static void test_statmount_mnt_root(void)
assert(last_dir);
last_dir++;
sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0);
sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT, 0);
if (!sm) {
ksft_test_result_fail("statmount mount root: %s\n",
strerror(errno));
@ -438,7 +447,7 @@ static void test_statmount_fs_type(void)
const char *fs_type;
const char *const *s;
sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0);
sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0);
if (!sm) {
ksft_test_result_fail("statmount fs type: %s\n",
strerror(errno));
@ -467,7 +476,7 @@ static void test_statmount_mnt_opts(void)
char *line = NULL;
size_t len = 0;
sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
0);
if (!sm) {
ksft_test_result_fail("statmount mnt opts: %s\n",
@ -557,7 +566,7 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
uint32_t start, i;
int ret;
sm = statmount_alloc(root_id, mask, 0);
sm = statmount_alloc(root_id, 0, mask, 0);
if (!sm) {
ksft_test_result_fail("statmount %s: %s\n", name,
strerror(errno));
@ -586,14 +595,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
exactsize = sm->size;
shortsize = sizeof(*sm) + i;
ret = statmount(root_id, 0, mask, sm, exactsize, 0);
ret = statmount(root_id, 0, 0, mask, sm, exactsize, 0);
if (ret == -1) {
ksft_test_result_fail("statmount exact size: %s\n",
strerror(errno));
goto out;
}
errno = 0;
ret = statmount(root_id, 0, mask, sm, shortsize, 0);
ret = statmount(root_id, 0, 0, mask, sm, shortsize, 0);
if (ret != -1 || errno != EOVERFLOW) {
ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
strerror(errno));
@ -658,6 +667,226 @@ static void test_listmount_tree(void)
ksft_test_result_pass("listmount tree\n");
}
static void test_statmount_by_fd(void)
{
struct statmount *sm = NULL;
char tmpdir[] = "/statmount.fd.XXXXXX";
const char root[] = "/test";
char subdir[PATH_MAX], tmproot[PATH_MAX];
int fd;
if (!mkdtemp(tmpdir)) {
ksft_perror("mkdtemp");
return;
}
if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) {
ksft_perror("mount");
rmdir(tmpdir);
return;
}
snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root);
snprintf(tmproot, PATH_MAX, "%s/%s", tmpdir, "chroot");
if (mkdir(subdir, 0755)) {
ksft_perror("mkdir");
goto err_tmpdir;
}
if (mount(subdir, subdir, NULL, MS_BIND, 0)) {
ksft_perror("mount");
goto err_subdir;
}
if (mkdir(tmproot, 0755)) {
ksft_perror("mkdir");
goto err_subdir;
}
fd = open(subdir, O_PATH);
if (fd < 0) {
ksft_perror("open");
goto err_tmproot;
}
if (chroot(tmproot)) {
ksft_perror("chroot");
goto err_fd;
}
sm = statmount_alloc(0, fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, STATMOUNT_BY_FD);
if (!sm) {
ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno));
goto err_chroot;
}
if (sm->size < sizeof(*sm)) {
ksft_test_result_fail("unexpected size: %u < %u\n",
sm->size, (uint32_t) sizeof(*sm));
goto err_chroot;
}
if (sm->mask & STATMOUNT_MNT_POINT) {
ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in statmount\n");
goto err_chroot;
}
if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n");
goto err_chroot;
}
if (strcmp(root, sm->str + sm->mnt_root) != 0) {
ksft_test_result_fail("statmount returned incorrect mnt_root,"
"statmount mnt_root: %s != %s\n",
sm->str + sm->mnt_root, root);
goto err_chroot;
}
if (chroot(".")) {
ksft_perror("chroot");
goto out;
}
free(sm);
sm = statmount_alloc(0, fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, STATMOUNT_BY_FD);
if (!sm) {
ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno));
goto err_fd;
}
if (sm->size < sizeof(*sm)) {
ksft_test_result_fail("unexpected size: %u < %u\n",
sm->size, (uint32_t) sizeof(*sm));
goto out;
}
if (!(sm->mask & STATMOUNT_MNT_POINT)) {
ksft_test_result_fail("STATMOUNT_MNT_POINT not set in statmount\n");
goto out;
}
if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n");
goto out;
}
if (strcmp(subdir, sm->str + sm->mnt_point) != 0) {
ksft_test_result_fail("statmount returned incorrect mnt_point,"
"statmount mnt_point: %s != %s\n", sm->str + sm->mnt_point, subdir);
goto out;
}
if (strcmp(root, sm->str + sm->mnt_root) != 0) {
ksft_test_result_fail("statmount returned incorrect mnt_root,"
"statmount mnt_root: %s != %s\n", sm->str + sm->mnt_root, root);
goto out;
}
ksft_test_result_pass("statmount by fd\n");
goto out;
err_chroot:
chroot(".");
out:
free(sm);
err_fd:
close(fd);
err_tmproot:
rmdir(tmproot);
err_subdir:
umount2(subdir, MNT_DETACH);
rmdir(subdir);
err_tmpdir:
umount2(tmpdir, MNT_DETACH);
rmdir(tmpdir);
}
static void test_statmount_by_fd_unmounted(void)
{
const char root[] = "/test.unmounted";
char tmpdir[] = "/statmount.fd.XXXXXX";
char subdir[PATH_MAX];
int fd;
struct statmount *sm = NULL;
if (!mkdtemp(tmpdir)) {
ksft_perror("mkdtemp");
return;
}
if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) {
ksft_perror("mount");
rmdir(tmpdir);
return;
}
snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root);
if (mkdir(subdir, 0755)) {
ksft_perror("mkdir");
goto err_tmpdir;
}
if (mount(subdir, subdir, 0, MS_BIND, NULL)) {
ksft_perror("mount");
goto err_subdir;
}
fd = open(subdir, O_PATH);
if (fd < 0) {
ksft_perror("open");
goto err_subdir;
}
if (umount2(tmpdir, MNT_DETACH)) {
ksft_perror("umount2");
goto err_fd;
}
sm = statmount_alloc(0, fd, STATMOUNT_MNT_POINT | STATMOUNT_MNT_ROOT, STATMOUNT_BY_FD);
if (!sm) {
ksft_test_result_fail("statmount by fd unmounted: %s\n",
strerror(errno));
goto err_sm;
}
if (sm->size < sizeof(*sm)) {
ksft_test_result_fail("unexpected size: %u < %u\n",
sm->size, (uint32_t) sizeof(*sm));
goto err_sm;
}
if (sm->mask & STATMOUNT_MNT_POINT) {
ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in mask\n");
goto err_sm;
}
if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in mask\n");
goto err_sm;
}
if (strcmp(sm->str + sm->mnt_root, root) != 0) {
ksft_test_result_fail("statmount returned incorrect mnt_root,"
"statmount mnt_root: %s != %s\n",
sm->str + sm->mnt_root, root);
goto err_sm;
}
ksft_test_result_pass("statmount by fd on unmounted mount\n");
err_sm:
free(sm);
err_fd:
close(fd);
err_subdir:
umount2(subdir, MNT_DETACH);
rmdir(subdir);
err_tmpdir:
umount2(tmpdir, MNT_DETACH);
rmdir(tmpdir);
}
#define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t))
int main(void)
@ -669,14 +898,14 @@ int main(void)
ksft_print_header();
ret = statmount(0, 0, 0, NULL, 0, 0);
ret = statmount(0, 0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
setup_namespace();
ksft_set_plan(15);
ksft_set_plan(17);
test_listmount_empty_root();
test_statmount_zero_mask();
test_statmount_mnt_basic();
@ -693,6 +922,8 @@ int main(void)
test_statmount_string(all_mask, str_off(fs_type), "fs type & all");
test_listmount_tree();
test_statmount_by_fd_unmounted();
test_statmount_by_fd();
if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)

View file

@ -102,7 +102,7 @@ static int _test_statmount_mnt_ns_id(void)
if (!root_id)
return NSID_ERROR;
ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
ret = statmount(root_id, 0, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
return NSID_ERROR;
@ -128,6 +128,98 @@ static int _test_statmount_mnt_ns_id(void)
return NSID_PASS;
}
static int _test_statmount_mnt_ns_id_by_fd(void)
{
struct statmount sm;
uint64_t mnt_ns_id;
int ret, fd, mounted = 1, status = NSID_ERROR;
char mnt[] = "/statmount.fd.XXXXXX";
ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
if (ret != NSID_PASS)
return ret;
if (!mkdtemp(mnt)) {
ksft_print_msg("statmount by fd mnt ns id mkdtemp: %s\n", strerror(errno));
return NSID_ERROR;
}
if (mount(mnt, mnt, NULL, MS_BIND, 0)) {
ksft_print_msg("statmount by fd mnt ns id mount: %s\n", strerror(errno));
status = NSID_ERROR;
goto err;
}
fd = open(mnt, O_PATH);
if (fd < 0) {
ksft_print_msg("statmount by fd mnt ns id open: %s\n", strerror(errno));
goto err;
}
ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD);
if (ret == -1) {
ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno));
status = NSID_ERROR;
goto out;
}
if (sm.size != sizeof(sm)) {
ksft_print_msg("unexpected size: %u != %u\n", sm.size,
(uint32_t)sizeof(sm));
status = NSID_FAIL;
goto out;
}
if (sm.mask != STATMOUNT_MNT_NS_ID) {
ksft_print_msg("statmount mnt ns id unavailable\n");
status = NSID_SKIP;
goto out;
}
if (sm.mnt_ns_id != mnt_ns_id) {
ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
(unsigned long long)sm.mnt_ns_id,
(unsigned long long)mnt_ns_id);
status = NSID_FAIL;
goto out;
}
mounted = 0;
if (umount2(mnt, MNT_DETACH)) {
ksft_print_msg("statmount by fd mnt ns id umount2: %s\n", strerror(errno));
goto out;
}
ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD);
if (ret == -1) {
ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno));
status = NSID_ERROR;
goto out;
}
if (sm.size != sizeof(sm)) {
ksft_print_msg("unexpected size: %u != %u\n", sm.size,
(uint32_t)sizeof(sm));
status = NSID_FAIL;
goto out;
}
if (sm.mask == STATMOUNT_MNT_NS_ID) {
ksft_print_msg("unexpected STATMOUNT_MNT_NS_ID in mask\n");
status = NSID_FAIL;
goto out;
}
status = NSID_PASS;
out:
close(fd);
if (mounted)
umount2(mnt, MNT_DETACH);
err:
rmdir(mnt);
return status;
}
static void test_statmount_mnt_ns_id(void)
{
pid_t pid;
@ -148,6 +240,9 @@ static void test_statmount_mnt_ns_id(void)
if (ret != NSID_PASS)
exit(ret);
ret = _test_statmount_mnt_ns_id();
if (ret != NSID_PASS)
exit(ret);
ret = _test_statmount_mnt_ns_id_by_fd();
exit(ret);
}
@ -179,7 +274,7 @@ static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
for (int i = 0; i < nr_mounts; i++) {
struct statmount sm;
ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
ret = statmount(list[i], mnt_ns_id, 0, STATMOUNT_MNT_NS_ID, &sm,
sizeof(sm), 0);
if (ret < 0) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
@ -275,7 +370,7 @@ int main(void)
int ret;
ksft_print_header();
ret = statmount(0, 0, 0, NULL, 0, 0);
ret = statmount(0, 0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");

View file

@ -515,6 +515,32 @@ int setup_userns(void)
return 0;
}
int enter_userns(void)
{
int ret;
char buf[32];
uid_t uid = getuid();
gid_t gid = getgid();
ret = unshare(CLONE_NEWUSER);
if (ret)
return ret;
sprintf(buf, "0 %d 1", uid);
ret = write_file("/proc/self/uid_map", buf);
if (ret)
return ret;
ret = write_file("/proc/self/setgroups", "deny");
if (ret)
return ret;
sprintf(buf, "0 %d 1", gid);
ret = write_file("/proc/self/gid_map", buf);
if (ret)
return ret;
return 0;
}
/* caps_down - lower all effective caps */
int caps_down(void)
{

View file

@ -28,6 +28,7 @@ extern int cap_down(cap_value_t down);
extern bool switch_ids(uid_t uid, gid_t gid);
extern int setup_userns(void);
extern int enter_userns(void);
static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
{