From fc94368bcee555b2cc44c2f4e7c4fcbb50404cd3 Mon Sep 17 00:00:00 2001 From: Qiliang Yuan Date: Fri, 23 Jan 2026 03:12:21 -0500 Subject: [PATCH 01/13] fs/file: optimize close_range() complexity from O(N) to O(Sparse) In close_range(), the kernel traditionally performs a linear scan over the [fd, max_fd] range, resulting in O(N) complexity where N is the range size. For processes with sparse FD tables, this is inefficient as it checks many unallocated slots. This patch optimizes __range_close() by using find_next_bit() on the open_fds bitmap to skip holes. This shifts the algorithmic complexity from O(Range Size) to O(Active FDs), providing a significant performance boost for large-range close operations on sparse file descriptor tables. Signed-off-by: Qiliang Yuan Signed-off-by: Qiliang Yuan Link: https://patch.msgid.link/20260123081221.659125-1-realwujing@gmail.com Signed-off-by: Christian Brauner --- fs/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/file.c b/fs/file.c index 0a4f3bdb2dec..51ddcff0081a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -777,23 +777,29 @@ static inline void __range_close(struct files_struct *files, unsigned int fd, unsigned int max_fd) { struct file *file; + struct fdtable *fdt; unsigned n; spin_lock(&files->file_lock); - n = last_fd(files_fdtable(files)); + fdt = files_fdtable(files); + n = last_fd(fdt); max_fd = min(max_fd, n); - for (; fd <= max_fd; fd++) { + for (fd = find_next_bit(fdt->open_fds, max_fd + 1, fd); + fd <= max_fd; + fd = find_next_bit(fdt->open_fds, max_fd + 1, fd + 1)) { file = file_close_fd_locked(files, fd); if (file) { spin_unlock(&files->file_lock); filp_close(file, files); cond_resched(); spin_lock(&files->file_lock); + fdt = files_fdtable(files); } else if (need_resched()) { spin_unlock(&files->file_lock); cond_resched(); spin_lock(&files->file_lock); + fdt = files_fdtable(files); } } spin_unlock(&files->file_lock); From 9396bfdacb5aa2bcb3d2242b0de527e7d4f8a3cd Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 21 Jan 2026 20:36:43 +0100 Subject: [PATCH 02/13] fs: reset read-only fsflags together with xflags While setting file attributes, the read-only flags are reset for ->xflags, but not for ->flags if flag is shared between both. This is fine for now as all read-only xflags don't overlap with flags. However, for any read-only shared flag this will create inconsistency between xflags and flags. The non-shared flag will be reset in vfs_fileattr_set() to the current value, but shared one is past further to ->fileattr_set. Reported-by: Eric Biggers Signed-off-by: Andrey Albershteyn Link: https://patch.msgid.link/20260121193645.3611716-1-aalbersh@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/file_attr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/file_attr.c b/fs/file_attr.c index 4c4916632f11..f3704881c126 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -141,8 +141,7 @@ static int file_attr_to_fileattr(const struct file_attr *fattr, if (fattr->fa_xflags & ~mask) return -EINVAL; - fileattr_fill_xflags(fa, fattr->fa_xflags); - fa->fsx_xflags &= ~FS_XFLAG_RDONLY_MASK; + fileattr_fill_xflags(fa, fattr->fa_xflags & ~FS_XFLAG_RDONLY_MASK); fa->fsx_extsize = fattr->fa_extsize; fa->fsx_projid = fattr->fa_projid; fa->fsx_cowextsize = fattr->fa_cowextsize; @@ -162,8 +161,7 @@ static int copy_fsxattr_from_user(struct file_kattr *fa, if (xfa.fsx_xflags & ~mask) return -EOPNOTSUPP; - fileattr_fill_xflags(fa, xfa.fsx_xflags); - fa->fsx_xflags &= ~FS_XFLAG_RDONLY_MASK; + fileattr_fill_xflags(fa, xfa.fsx_xflags & ~FS_XFLAG_RDONLY_MASK); fa->fsx_extsize = xfa.fsx_extsize; fa->fsx_nextents = xfa.fsx_nextents; fa->fsx_projid = xfa.fsx_projid; From 1992330d90dd766fcf1730fd7bf2d6af65370ac4 Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Wed, 28 Jan 2026 14:24:04 +0100 Subject: [PATCH 03/13] ovl: Fix uninit-value in ovl_fill_real Syzbot reported a KMSAN uninit-value issue in ovl_fill_real. This iusse's call chain is: __do_sys_getdents64() -> iterate_dir() ... -> ext4_readdir() -> fscrypt_fname_alloc_buffer() // alloc -> fscrypt_fname_disk_to_usr // write without tail '\0' -> dir_emit() -> ovl_fill_real() // read by strcmp() The string is used to store the decrypted directory entry name for an encrypted inode. As shown in the call chain, fscrypt_fname_disk_to_usr() write it without null-terminate. However, ovl_fill_real() uses strcmp() to compare the name against "..", which assumes a null-terminated string and may trigger a KMSAN uninit-value warning when the buffer tail contains uninit data. Reported-by: syzbot+d130f98b2c265fae5297@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d130f98b2c265fae5297 Fixes: 4edb83bb1041 ("ovl: constant d_ino for non-merge dirs") Signed-off-by: Qing Wang Signed-off-by: Amir Goldstein Link: https://patch.msgid.link/20260128132406.23768-2-amir73il@gmail.com Acked-by: Miklos Szeredi Reviewed-by: Eric Biggers Signed-off-by: Christian Brauner --- fs/overlayfs/readdir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 160960bb0ad0..724ec9d93fc8 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -755,7 +755,7 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name, struct dir_context *orig_ctx = rdt->orig_ctx; bool res; - if (rdt->parent_ino && strcmp(name, "..") == 0) { + if (rdt->parent_ino && namelen == 2 && !strncmp(name, "..", 2)) { ino = rdt->parent_ino; } else if (rdt->cache) { struct ovl_cache_entry *p; From 55fb177d3a0346106974749374ae2191ba250825 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 28 Jan 2026 14:24:05 +0100 Subject: [PATCH 04/13] fs: add helpers name_is_dot{,dot,_dotdot} Rename the helper is_dot_dotdot() into the name_ namespace and add complementary helpers to check for dot and dotdot names individually. Signed-off-by: Amir Goldstein Link: https://patch.msgid.link/20260128132406.23768-3-amir73il@gmail.com Reviewed-by: Eric Biggers Signed-off-by: Christian Brauner --- fs/crypto/fname.c | 2 +- fs/ecryptfs/crypto.c | 2 +- fs/exportfs/expfs.c | 3 ++- fs/f2fs/dir.c | 2 +- fs/f2fs/hash.c | 2 +- fs/namei.c | 2 +- fs/overlayfs/readdir.c | 3 ++- fs/smb/server/vfs.c | 2 +- include/linux/fs.h | 14 ++++++++++++-- 9 files changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index a9a4432d12ba..629eb0d72e86 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -76,7 +76,7 @@ struct fscrypt_nokey_name { static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { - return is_dot_dotdot(str->name, str->len); + return name_is_dot_dotdot(str->name, str->len); } /** diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 260f8a4938b0..3c89f06c7453 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1904,7 +1904,7 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)) { - if (is_dot_dotdot(name, name_size)) { + if (name_is_dot_dotdot(name, name_size)) { rc = ecryptfs_copy_filename(plaintext_name, plaintext_name_size, name, name_size); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index d3e55de4a2a2..6c9be60a3e48 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -253,7 +253,8 @@ static bool filldir_one(struct dir_context *ctx, const char *name, int len, container_of(ctx, struct getdents_callback, ctx); buf->sequence++; - if (buf->ino == ino && len <= NAME_MAX && !is_dot_dotdot(name, len)) { + if (buf->ino == ino && len <= NAME_MAX && + !name_is_dot_dotdot(name, len)) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 48f4f98afb01..29412e6e078d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -67,7 +67,7 @@ int f2fs_init_casefolded_name(const struct inode *dir, int len; if (IS_CASEFOLDED(dir) && - !is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) { + !name_is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) { buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, GFP_NOFS, false, F2FS_SB(sb)); if (!buf) diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 049ce50cec9b..14082fe5e6b2 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -100,7 +100,7 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) WARN_ON_ONCE(!name); - if (is_dot_dotdot(name, len)) { + if (name_is_dot_dotdot(name, len)) { fname->hash = 0; return; } diff --git a/fs/namei.c b/fs/namei.c index 4e3a5fd370a8..aa8fbca81686 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3042,7 +3042,7 @@ int lookup_noperm_common(struct qstr *qname, struct dentry *base) if (!len) return -EACCES; - if (is_dot_dotdot(name, len)) + if (name_is_dot_dotdot(name, len)) return -EACCES; while (len--) { diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 724ec9d93fc8..9f6b36f3d4cf 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -76,7 +76,8 @@ static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len, char *cf_name; int cf_len; - if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len)) + if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || + name_is_dot_dotdot(str, len)) return 0; cf_name = kmalloc(NAME_MAX, GFP_KERNEL); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 98b0eb966d91..e73e968f664d 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1052,7 +1052,7 @@ static bool __dir_empty(struct dir_context *ctx, const char *name, int namlen, struct ksmbd_readdir_data *buf; buf = container_of(ctx, struct ksmbd_readdir_data, ctx); - if (!is_dot_dotdot(name, namlen)) + if (!name_is_dot_dotdot(name, namlen)) buf->dirent_count++; return !buf->dirent_count; diff --git a/include/linux/fs.h b/include/linux/fs.h index 094b0adcb035..95bb9a15e109 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2844,12 +2844,22 @@ u64 vfsmount_to_propagation_flags(struct vfsmount *mnt); extern char *file_path(struct file *, char *, int); +static inline bool name_is_dot(const char *name, size_t len) +{ + return unlikely(len == 1 && name[0] == '.'); +} + +static inline bool name_is_dotdot(const char *name, size_t len) +{ + return unlikely(len == 2 && name[0] == '.' && name[1] == '.'); +} + /** - * is_dot_dotdot - returns true only if @name is "." or ".." + * name_is_dot_dotdot - returns true only if @name is "." or ".." * @name: file name to check * @len: length of file name, in bytes */ -static inline bool is_dot_dotdot(const char *name, size_t len) +static inline bool name_is_dot_dotdot(const char *name, size_t len) { return len && unlikely(name[0] == '.') && (len == 1 || (len == 2 && name[1] == '.')); From 9cf8ddb12a728a5b9d814dc2c12dfc0959539f9c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 28 Jan 2026 14:24:06 +0100 Subject: [PATCH 05/13] ovl: use name_is_dot* helpers in readdir code Use the helpers in place of all the different open coded variants. This makes the code more readable and robust. Signed-off-by: Amir Goldstein Link: https://patch.msgid.link/20260128132406.23768-4-amir73il@gmail.com Reviewed-by: Eric Biggers Signed-off-by: Christian Brauner --- fs/overlayfs/readdir.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 9f6b36f3d4cf..c665ff3546f7 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -154,7 +154,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, return true; /* Always recalc d_ino for parent */ - if (strcmp(p->name, "..") == 0) + if (name_is_dotdot(p->name, p->len)) return true; /* If this is lower, then native d_ino will do */ @@ -165,7 +165,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, * Recalc d_ino for '.' and for all entries if dir is impure (contains * copied up entries) */ - if ((p->name[0] == '.' && p->len == 1) || + if (name_is_dot(p->name, p->len) || ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry))) return true; @@ -561,12 +561,12 @@ static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, if (!ovl_same_dev(ofs) && !p->check_xwhiteout) goto out; - if (p->name[0] == '.') { + if (name_is_dot_dotdot(p->name, p->len)) { if (p->len == 1) { this = dget(dir); goto get; } - if (p->len == 2 && p->name[1] == '.') { + if (p->len == 2) { /* we shall not be moved */ this = dget(dir->d_parent); goto get; @@ -666,8 +666,7 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list, return err; list_for_each_entry_safe(p, n, list, l_node) { - if (strcmp(p->name, ".") != 0 && - strcmp(p->name, "..") != 0) { + if (!name_is_dot_dotdot(p->name, p->len)) { err = ovl_cache_update(path, p, true); if (err) return err; @@ -756,7 +755,7 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name, struct dir_context *orig_ctx = rdt->orig_ctx; bool res; - if (rdt->parent_ino && namelen == 2 && !strncmp(name, "..", 2)) { + if (rdt->parent_ino && name_is_dotdot(name, namelen)) { ino = rdt->parent_ino; } else if (rdt->cache) { struct ovl_cache_entry *p; @@ -1097,12 +1096,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) goto del_entry; } - if (p->name[0] == '.') { - if (p->len == 1) - goto del_entry; - if (p->len == 2 && p->name[1] == '.') - goto del_entry; - } + if (name_is_dot_dotdot(p->name, p->len)) + goto del_entry; err = -ENOTEMPTY; break; @@ -1146,7 +1141,7 @@ static bool ovl_check_d_type(struct dir_context *ctx, const char *name, container_of(ctx, struct ovl_readdir_data, ctx); /* Even if d_type is not supported, DT_DIR is returned for . and .. */ - if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) + if (name_is_dot_dotdot(name, namelen)) return true; if (d_type != DT_UNKNOWN) @@ -1209,11 +1204,8 @@ static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *pa list_for_each_entry(p, &list, l_node) { struct dentry *dentry; - if (p->name[0] == '.') { - if (p->len == 1) - continue; - if (p->len == 2 && p->name[1] == '.') - continue; + if (name_is_dot_dotdot(p->name, p->len)) { + continue; } else if (incompat) { pr_err("overlay with incompat feature '%s' cannot be mounted\n", p->name); @@ -1278,12 +1270,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs) goto out; list_for_each_entry(p, &list, l_node) { - if (p->name[0] == '.') { - if (p->len == 1) - continue; - if (p->len == 2 && p->name[1] == '.') - continue; - } + if (name_is_dot_dotdot(p->name, p->len)) + continue; index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); From fd5d8b65cfe76db197dce5fde184fd310fb4a26d Mon Sep 17 00:00:00 2001 From: Chelsy Ratnawat Date: Wed, 28 Jan 2026 06:31:50 -0800 Subject: [PATCH 06/13] fs: dcache: fix typo in enum d_walk_ret comment Fix minor spelling and indentation errors in the documentation comments. Signed-off-by: Chelsy Ratnawat Link: https://patch.msgid.link/20260128143150.3674284-1-chelsyratnawat2001@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/dcache.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index ec275f4fd81c..55c622b28ddd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1288,8 +1288,8 @@ void shrink_dcache_sb(struct super_block *sb) EXPORT_SYMBOL(shrink_dcache_sb); /** - * enum d_walk_ret - action to talke during tree walk - * @D_WALK_CONTINUE: contrinue walk + * enum d_walk_ret - action to take during tree walk + * @D_WALK_CONTINUE: continue walk * @D_WALK_QUIT: quit walk * @D_WALK_NORETRY: quit when retry is needed * @D_WALK_SKIP: skip this dentry and its children @@ -1712,7 +1712,7 @@ void d_invalidate(struct dentry *dentry) EXPORT_SYMBOL(d_invalidate); /** - * __d_alloc - allocate a dcache entry + * __d_alloc - allocate a dcache entry * @sb: filesystem it will belong to * @name: qstr of the name * @@ -1796,7 +1796,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } /** - * d_alloc - allocate a dcache entry + * d_alloc - allocate a dcache entry * @parent: parent of entry to allocate * @name: qstr of the name * @@ -2536,7 +2536,7 @@ static void __d_rehash(struct dentry *entry) } /** - * d_rehash - add an entry back to the hash + * d_rehash - add an entry back to the hash * @entry: dentry to add to the hash * * Adds a dentry to the hash according to its name. From 40210c2b11a873ff64a812c2d2600f529f01a83e Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 22 Dec 2025 13:18:57 +0100 Subject: [PATCH 07/13] rust: seq_file: replace `kernel::c_str!` with C-Strings C-String literals were added in Rust 1.77. Replace instances of `kernel::c_str!` with C-String literals where possible. Acked-by: Greg Kroah-Hartman Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Signed-off-by: Tamir Duberstein Link: https://patch.msgid.link/20251222-cstr-vfs-v1-1-18e3d327cbd7@gmail.com Acked-by: Danilo Krummrich Reviewed-by: Daniel Almeida Signed-off-by: Christian Brauner --- rust/kernel/seq_file.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/seq_file.rs b/rust/kernel/seq_file.rs index 855e533813a6..518265558d66 100644 --- a/rust/kernel/seq_file.rs +++ b/rust/kernel/seq_file.rs @@ -4,7 +4,7 @@ //! //! C header: [`include/linux/seq_file.h`](srctree/include/linux/seq_file.h) -use crate::{bindings, c_str, fmt, str::CStrExt as _, types::NotThreadSafe, types::Opaque}; +use crate::{bindings, fmt, str::CStrExt as _, types::NotThreadSafe, types::Opaque}; /// A utility for generating the contents of a seq file. #[repr(transparent)] @@ -36,7 +36,7 @@ impl SeqFile { unsafe { bindings::seq_printf( self.inner.get(), - c_str!("%pA").as_char_ptr(), + c"%pA".as_char_ptr(), core::ptr::from_ref(&args).cast::(), ); } From 0e6b7eae1fded85f94a357d6132f07d64c614cfa Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Mon, 26 Jan 2026 12:56:57 +0100 Subject: [PATCH 08/13] fs: add FS_XFLAG_VERITY for fs-verity files fs-verity introduced inode flag for inodes with enabled fs-verity on them. This patch adds FS_XFLAG_VERITY file attribute which can be retrieved with FS_IOC_FSGETXATTR ioctl() and file_getattr() syscall. This flag is read-only and can not be set with corresponding set ioctl() and file_setattr(). The FS_IOC_SETFLAGS requires file to be opened for writing which is not allowed for verity files. The FS_IOC_FSSETXATTR and file_setattr() clears this flag from the user input. As this is now common flag for both flag interfaces (flags/xflags) add it to overlapping flags list to exclude it from overwrite. Signed-off-by: Andrey Albershteyn Link: https://patch.msgid.link/20260126115658.27656-2-aalbersh@kernel.org Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- Documentation/filesystems/fsverity.rst | 16 ++++++++++++++++ fs/file_attr.c | 4 ++++ include/linux/fileattr.h | 6 +++--- include/uapi/linux/fs.h | 1 + 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst index 412cf11e3298..22b49b295d1f 100644 --- a/Documentation/filesystems/fsverity.rst +++ b/Documentation/filesystems/fsverity.rst @@ -341,6 +341,22 @@ the file has fs-verity enabled. This can perform better than FS_IOC_GETFLAGS and FS_IOC_MEASURE_VERITY because it doesn't require opening the file, and opening verity files can be expensive. +FS_IOC_FSGETXATTR +----------------- + +Since Linux v7.0, the FS_IOC_FSGETXATTR ioctl sets FS_XFLAG_VERITY (0x00020000) +in the returned flags when the file has verity enabled. Note that this attribute +cannot be set with FS_IOC_FSSETXATTR as enabling verity requires input +parameters. See FS_IOC_ENABLE_VERITY. + +file_getattr +------------ + +Since Linux v7.0, the file_getattr() syscall sets FS_XFLAG_VERITY (0x00020000) +in the returned flags when the file has verity enabled. Note that this attribute +cannot be set with file_setattr() as enabling verity requires input parameters. +See FS_IOC_ENABLE_VERITY. + .. _accessing_verity_files: Accessing verity files diff --git a/fs/file_attr.c b/fs/file_attr.c index f3704881c126..dfde87401817 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -36,6 +36,8 @@ void fileattr_fill_xflags(struct file_kattr *fa, u32 xflags) fa->flags |= FS_DAX_FL; if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) fa->flags |= FS_PROJINHERIT_FL; + if (fa->fsx_xflags & FS_XFLAG_VERITY) + fa->flags |= FS_VERITY_FL; } EXPORT_SYMBOL(fileattr_fill_xflags); @@ -66,6 +68,8 @@ void fileattr_fill_flags(struct file_kattr *fa, u32 flags) fa->fsx_xflags |= FS_XFLAG_DAX; if (fa->flags & FS_PROJINHERIT_FL) fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; + if (fa->flags & FS_VERITY_FL) + fa->fsx_xflags |= FS_XFLAG_VERITY; } EXPORT_SYMBOL(fileattr_fill_flags); diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h index f89dcfad3f8f..3780904a63a6 100644 --- a/include/linux/fileattr.h +++ b/include/linux/fileattr.h @@ -7,16 +7,16 @@ #define FS_COMMON_FL \ (FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NODUMP_FL | FS_NOATIME_FL | FS_DAX_FL | \ - FS_PROJINHERIT_FL) + FS_PROJINHERIT_FL | FS_VERITY_FL) #define FS_XFLAG_COMMON \ (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | \ FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \ - FS_XFLAG_PROJINHERIT) + FS_XFLAG_PROJINHERIT | FS_XFLAG_VERITY) /* Read-only inode flags */ #define FS_XFLAG_RDONLY_MASK \ - (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR) + (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR | FS_XFLAG_VERITY) /* Flags to indicate valid value of fsx_ fields */ #define FS_XFLAG_VALUES_MASK \ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 66ca526cf786..70b2b661f42c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -253,6 +253,7 @@ struct file_attr { #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +#define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is From fa19d42cc7915226db416999866171a456dac657 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Mon, 26 Jan 2026 12:56:58 +0100 Subject: [PATCH 09/13] fsverity: add tracepoints fs-verity previously had debug printk but it was removed. This patch adds trace points to similar places, as a better alternative. Signed-off-by: Andrey Albershteyn Reviewed-by: Darrick J. Wong [djwong: fix formatting] Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/20260126115658.27656-3-aalbersh@kernel.org Signed-off-by: Christian Brauner --- MAINTAINERS | 1 + fs/verity/enable.c | 4 + fs/verity/fsverity_private.h | 2 + fs/verity/init.c | 1 + fs/verity/verify.c | 9 ++ include/trace/events/fsverity.h | 146 ++++++++++++++++++++++++++++++++ 6 files changed, 163 insertions(+) create mode 100644 include/trace/events/fsverity.h diff --git a/MAINTAINERS b/MAINTAINERS index 5b11839cba9d..9761650d2ead 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10311,6 +10311,7 @@ T: git https://git.kernel.org/pub/scm/fs/fsverity/linux.git F: Documentation/filesystems/fsverity.rst F: fs/verity/ F: include/linux/fsverity.h +F: include/trace/events/fsverity.h F: include/uapi/linux/fsverity.h FT260 FTDI USB-HID TO I2C BRIDGE DRIVER diff --git a/fs/verity/enable.c b/fs/verity/enable.c index 95ec42b84797..8718d943b428 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -222,6 +222,8 @@ static int enable_verity(struct file *filp, if (err) goto out; + trace_fsverity_enable(inode, ¶ms); + /* * Start enabling verity on this file, serialized by the inode lock. * Fail if verity is already enabled or is already being enabled. @@ -264,6 +266,8 @@ static int enable_verity(struct file *filp, goto rollback; } + trace_fsverity_tree_done(inode, vi, ¶ms); + /* * Tell the filesystem to finish enabling verity on the file. * Serialized with ->begin_enable_verity() by the inode lock. diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index dd20b138d452..4b7ae1748f4e 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -161,4 +161,6 @@ static inline void fsverity_init_signature(void) void __init fsverity_init_workqueue(void); +#include + #endif /* _FSVERITY_PRIVATE_H */ diff --git a/fs/verity/init.c b/fs/verity/init.c index 6e8d33b50240..d65206608583 100644 --- a/fs/verity/init.c +++ b/fs/verity/init.c @@ -5,6 +5,7 @@ * Copyright 2019 Google LLC */ +#define CREATE_TRACE_POINTS #include "fsverity_private.h" #include diff --git a/fs/verity/verify.c b/fs/verity/verify.c index 86067c8b40cf..940b8b956d7e 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -135,6 +135,9 @@ static bool verify_data_block(struct inode *inode, struct fsverity_info *vi, /* Byte offset of the wanted hash relative to @addr */ unsigned int hoffset; } hblocks[FS_VERITY_MAX_LEVELS]; + + trace_fsverity_verify_data_block(inode, params, data_pos); + /* * The index of the previous level's block within that level; also the * index of that block's hash within the current level. @@ -214,6 +217,9 @@ static bool verify_data_block(struct inode *inode, struct fsverity_info *vi, want_hash = _want_hash; kunmap_local(haddr); put_page(hpage); + trace_fsverity_merkle_hit(inode, data_pos, hblock_idx, + level, + hoffset >> params->log_digestsize); goto descend; } hblocks[level].page = hpage; @@ -232,6 +238,9 @@ descend: unsigned long hblock_idx = hblocks[level - 1].index; unsigned int hoffset = hblocks[level - 1].hoffset; + trace_fsverity_verify_merkle_block(inode, hblock_idx, + level, hoffset >> params->log_digestsize); + fsverity_hash_block(params, haddr, real_hash); if (memcmp(want_hash, real_hash, hsize) != 0) goto corrupted; diff --git a/include/trace/events/fsverity.h b/include/trace/events/fsverity.h new file mode 100644 index 000000000000..a8c52f21cbd5 --- /dev/null +++ b/include/trace/events/fsverity.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fsverity + +#if !defined(_TRACE_FSVERITY_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FSVERITY_H + +#include + +struct fsverity_descriptor; +struct merkle_tree_params; +struct fsverity_info; + +TRACE_EVENT(fsverity_enable, + TP_PROTO(const struct inode *inode, + const struct merkle_tree_params *params), + TP_ARGS(inode, params), + TP_STRUCT__entry( + __field(ino_t, ino) + __field(u64, data_size) + __field(u64, tree_size) + __field(unsigned int, merkle_block) + __field(unsigned int, num_levels) + ), + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->data_size = i_size_read(inode); + __entry->tree_size = params->tree_size; + __entry->merkle_block = params->block_size; + __entry->num_levels = params->num_levels; + ), + TP_printk("ino %lu data_size %llu tree_size %llu merkle_block %u levels %u", + (unsigned long) __entry->ino, + __entry->data_size, + __entry->tree_size, + __entry->merkle_block, + __entry->num_levels) +); + +TRACE_EVENT(fsverity_tree_done, + TP_PROTO(const struct inode *inode, const struct fsverity_info *vi, + const struct merkle_tree_params *params), + TP_ARGS(inode, vi, params), + TP_STRUCT__entry( + __field(ino_t, ino) + __field(u64, data_size) + __field(u64, tree_size) + __field(unsigned int, merkle_block) + __field(unsigned int, levels) + __dynamic_array(u8, root_hash, params->digest_size) + __dynamic_array(u8, file_digest, params->digest_size) + ), + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->data_size = i_size_read(inode); + __entry->tree_size = params->tree_size; + __entry->merkle_block = params->block_size; + __entry->levels = params->num_levels; + memcpy(__get_dynamic_array(root_hash), vi->root_hash, __get_dynamic_array_len(root_hash)); + memcpy(__get_dynamic_array(file_digest), vi->file_digest, __get_dynamic_array_len(file_digest)); + ), + TP_printk("ino %lu data_size %llu tree_size %lld merkle_block %u levels %u root_hash %s digest %s", + (unsigned long) __entry->ino, + __entry->data_size, + __entry->tree_size, + __entry->merkle_block, + __entry->levels, + __print_hex_str(__get_dynamic_array(root_hash), __get_dynamic_array_len(root_hash)), + __print_hex_str(__get_dynamic_array(file_digest), __get_dynamic_array_len(file_digest))) +); + +TRACE_EVENT(fsverity_verify_data_block, + TP_PROTO(const struct inode *inode, + const struct merkle_tree_params *params, + u64 data_pos), + TP_ARGS(inode, params, data_pos), + TP_STRUCT__entry( + __field(ino_t, ino) + __field(u64, data_pos) + __field(unsigned int, merkle_block) + ), + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->data_pos = data_pos; + __entry->merkle_block = params->block_size; + ), + TP_printk("ino %lu data_pos %llu merkle_block %u", + (unsigned long) __entry->ino, + __entry->data_pos, + __entry->merkle_block) +); + +TRACE_EVENT(fsverity_merkle_hit, + TP_PROTO(const struct inode *inode, u64 data_pos, + unsigned long hblock_idx, unsigned int level, + unsigned int hidx), + TP_ARGS(inode, data_pos, hblock_idx, level, hidx), + TP_STRUCT__entry( + __field(ino_t, ino) + __field(u64, data_pos) + __field(unsigned long, hblock_idx) + __field(unsigned int, level) + __field(unsigned int, hidx) + ), + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->data_pos = data_pos; + __entry->hblock_idx = hblock_idx; + __entry->level = level; + __entry->hidx = hidx; + ), + TP_printk("ino %lu data_pos %llu hblock_idx %lu level %u hidx %u", + (unsigned long) __entry->ino, + __entry->data_pos, + __entry->hblock_idx, + __entry->level, + __entry->hidx) +); + +TRACE_EVENT(fsverity_verify_merkle_block, + TP_PROTO(const struct inode *inode, unsigned long hblock_idx, + unsigned int level, unsigned int hidx), + TP_ARGS(inode, hblock_idx, level, hidx), + TP_STRUCT__entry( + __field(ino_t, ino) + __field(unsigned long, hblock_idx) + __field(unsigned int, level) + __field(unsigned int, hidx) + ), + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->hblock_idx = hblock_idx; + __entry->level = level; + __entry->hidx = hidx; + ), + TP_printk("ino %lu hblock_idx %lu level %u hidx %u", + (unsigned long) __entry->ino, + __entry->hblock_idx, + __entry->level, + __entry->hidx) +); + +#endif /* _TRACE_FSVERITY_H */ + +/* This part must be outside protection */ +#include From a39162f77f49b618df5a721a1e48d8b903280fbd Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 29 Jan 2026 11:02:11 +0100 Subject: [PATCH 10/13] exportfs: clarify the documentation of open()/permission() expotrfs ops pidfs and nsfs recently gained support for encode/decode of file handles via name_to_handle_at(2)/open_by_handle_at(2). These special kernel filesystems have custom ->open() and ->permission() export methods, which nfsd does not respect and it was never meant to be used for exporting those filesystems by nfsd. Update kernel-doc comments to express the fact the those methods are for open_by_handle(2) system only and not compatible with nfsd. Reviewed-by: Jeff Layton Signed-off-by: Amir Goldstein Link: https://patch.msgid.link/20260129100212.49727-2-amir73il@gmail.com Reviewed-by: Chuck Lever Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 262e24d83313..0660953c3fb7 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -200,6 +200,10 @@ struct handle_to_path_ctx { * @get_parent: find the parent of a given directory * @commit_metadata: commit metadata changes to stable storage * + * Methods for open_by_handle(2) syscall with special kernel file systems: + * @permission: custom permission for opening a file by handle + * @open: custom open routine for opening file by handle + * * See Documentation/filesystems/nfs/exporting.rst for details on how to use * this interface correctly and the definition of the flags. * @@ -244,10 +248,14 @@ struct handle_to_path_ctx { * space cannot be allocated, a %ERR_PTR should be returned. * * @permission: - * Allow filesystems to specify a custom permission function. + * Allow filesystems to specify a custom permission function for the + * open_by_handle_at(2) syscall instead of the default permission check. + * This custom permission function is not respected by nfsd. * * @open: - * Allow filesystems to specify a custom open function. + * Allow filesystems to specify a custom open function for the + * open_by_handle_at(2) syscall instead of the default file_open_root(). + * This custom open function is not respected by nfsd. * * @commit_metadata: * @commit_metadata should commit metadata changes to stable storage. From b3c78bc53630d14a5770451ede3a30e7052f3b8b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 29 Jan 2026 11:02:12 +0100 Subject: [PATCH 11/13] nfsd: do not allow exporting of special kernel filesystems pidfs and nsfs recently gained support for encode/decode of file handles via name_to_handle_at(2)/open_by_handle_at(2). These special kernel filesystems have custom ->open() and ->permission() export methods, which nfsd does not respect and it was never meant to be used for exporting those filesystems by nfsd. Therefore, do not allow nfsd to export filesystems with custom ->open() or ->permission() methods. Fixes: b3caba8f7a34a ("pidfs: implement file handle support") Fixes: 5222470b2fbb3 ("nsfs: support file handles") Reviewed-by: Jeff Layton Signed-off-by: Amir Goldstein Link: https://patch.msgid.link/20260129100212.49727-3-amir73il@gmail.com Reviewed-by: Chuck Lever Signed-off-by: Christian Brauner --- fs/nfsd/export.c | 8 +++++--- include/linux/exportfs.h | 9 +++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9d55512d0cc9..baaa18b878d7 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -427,7 +427,8 @@ static int check_export(const struct path *path, int *flags, unsigned char *uuid * either a device number (so FS_REQUIRES_DEV needed) * or an FSID number (so NFSEXP_FSID or ->uuid is needed). * 2: We must be able to find an inode from a filehandle. - * This means that s_export_op must be set. + * This means that s_export_op must be set and comply with + * the requirements for remote filesystem export. * 3: We must not currently be on an idmapped mount. */ if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && @@ -437,8 +438,9 @@ static int check_export(const struct path *path, int *flags, unsigned char *uuid return -EINVAL; } - if (!exportfs_can_decode_fh(inode->i_sb->s_export_op)) { - dprintk("exp_export: export of invalid fs type.\n"); + if (!exportfs_may_export(inode->i_sb->s_export_op)) { + dprintk("exp_export: export of invalid fs type (%s).\n", + inode->i_sb->s_type->name); return -EINVAL; } diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 0660953c3fb7..8bcdba28b406 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -338,6 +338,15 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) return nop && nop->fh_to_dentry; } +static inline bool exportfs_may_export(const struct export_operations *nop) +{ + /* + * Do not allow nfs export for filesystems with custom ->open() or + * ->permission() ops, which nfsd does not respect (e.g. pidfs, nsfs). + */ + return exportfs_can_decode_fh(nop) && !nop->open && !nop->permission; +} + static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { From ab89060fbc92edd6e852bf0f533f29140afabe0e Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 27 Jan 2026 22:51:37 +0000 Subject: [PATCH 12/13] pidfs: return -EREMOTE when PIDFD_GET_INFO is called on another ns Currently it is not possible to distinguish between the case where a process has already exited and the case where a process is in a different namespace, as both return -ESRCH. glibc's pidfd_getpid() procfs-based implementation returns -EREMOTE in the latter, so that distinguishing the two is possible, as the fdinfo in procfs will list '0' as the PID in that case: https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/pidfd_getpid.c;h=860829cf07da2267484299ccb02861822c0d07b4;hb=HEAD#l121 Change the error code so that the kernel also returns -EREMOTE in that case. Fixes: 7477d7dce48a ("pidfs: allow to retrieve exit information") Signed-off-by: Luca Boccassi Link: https://patch.msgid.link/20260127225209.2293342-1-luca.boccassi@gmail.com Signed-off-by: Christian Brauner --- fs/pidfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index dba703d4ce4a..8e66d3993dd8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -329,7 +329,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) * namespace hierarchy. */ if (!pid_in_current_pidns(pid)) - return -ESRCH; + return -EREMOTE; attr = READ_ONCE(pid->attr); if (mask & PIDFD_INFO_EXIT) { From dedfae78f00960d703badc500422d10e1f12b2bc Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 3 Feb 2026 14:00:31 +0100 Subject: [PATCH 13/13] fs: add porting notes about readlink_copy() Calling convention has changed in ea382199071931d1 ("vfs: support caching symlink lengths in inodes") Signed-off-by: Mateusz Guzik Link: https://patch.msgid.link/20260203130032.315177-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- Documentation/filesystems/porting.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 3397937ed838..bd4128ccbb67 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1334,3 +1334,13 @@ end_creating() and the parent will be unlocked precisely when necessary. kill_litter_super() is gone; convert to DCACHE_PERSISTENT use (as all in-tree filesystems have done). + +--- + +**mandatory** + +readlink_copy() now requires link length as the 4th argument. Said length needs +to match what strlen() would return if it was ran on the string. + +However, if the string is freely accessible for the duration of inode's +lifetime, consider using inode_set_cached_link() instead.