From a48373e7d35a89f6f9b39f0d0da9bf158af054ee Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:45 -0800 Subject: [PATCH 01/67] xfs: start creating infrastructure for health monitoring Start creating helper functions and infrastructure to pass filesystem health events to a health monitoring file. Since this is an administrative interface, we only support a single health monitor process per filesystem, so we don't need to use anything fancy such as notifier chains (== tons of indirect calls). Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_fs.h | 7 ++ fs/xfs/xfs_health.c | 1 + fs/xfs/xfs_healthmon.c | 262 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_healthmon.h | 36 ++++++ fs/xfs/xfs_ioctl.c | 4 + fs/xfs/xfs_mount.c | 2 + fs/xfs/xfs_mount.h | 4 + 8 files changed, 317 insertions(+) create mode 100644 fs/xfs/xfs_healthmon.c create mode 100644 fs/xfs/xfs_healthmon.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 5bf501cf8271..1b7385e23b34 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -88,6 +88,7 @@ xfs-y += xfs_aops.o \ xfs_globals.o \ xfs_handle.o \ xfs_health.o \ + xfs_healthmon.o \ xfs_icache.o \ xfs_ioctl.o \ xfs_iomap.o \ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 12463ba766da..c58e55b3df40 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1003,6 +1003,12 @@ struct xfs_rtgroup_geometry { #define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */ #define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */ +struct xfs_health_monitor { + __u64 flags; /* flags */ + __u8 format; /* output format */ + __u8 pad[23]; /* zeroes */ +}; + /* * ioctl commands that are used by Linux filesystems */ @@ -1042,6 +1048,7 @@ struct xfs_rtgroup_geometry { #define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle) #define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head) #define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry) +#define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index fbb8886c72fe..3d50397f8f7c 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -19,6 +19,7 @@ #include "xfs_da_btree.h" #include "xfs_quota_defs.h" #include "xfs_rtgroup.h" +#include "xfs_healthmon.h" #include diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c new file mode 100644 index 000000000000..b7095ea55897 --- /dev/null +++ b/fs/xfs/xfs_healthmon.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2024-2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_trace.h" +#include "xfs_ag.h" +#include "xfs_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_quota_defs.h" +#include "xfs_rtgroup.h" +#include "xfs_healthmon.h" + +#include +#include +#include + +/* + * Live Health Monitoring + * ====================== + * + * Autonomous self-healing of XFS filesystems requires a means for the kernel + * to send filesystem health events to a monitoring daemon in userspace. To + * accomplish this, we establish a thread_with_file kthread object to handle + * translating internal events about filesystem health into a format that can + * be parsed easily by userspace. When those internal events occur, the core + * filesystem code calls this health monitor to convey the events to userspace. + * Userspace reads events from the file descriptor returned by the ioctl. + * + * The healthmon abstraction has a weak reference to the host filesystem mount + * so that the queueing and processing of the events do not pin the mount and + * cannot slow down the main filesystem. The healthmon object can exist past + * the end of the filesystem mount. + */ + +/* sign of a detached health monitor */ +#define DETACHED_MOUNT_COOKIE ((uintptr_t)0) + +/* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */ +static DEFINE_SPINLOCK(xfs_healthmon_lock); + +/* Grab a reference to the healthmon object for a given mount, if any. */ +static struct xfs_healthmon * +xfs_healthmon_get( + struct xfs_mount *mp) +{ + struct xfs_healthmon *hm; + + rcu_read_lock(); + hm = mp->m_healthmon; + if (hm && !refcount_inc_not_zero(&hm->ref)) + hm = NULL; + rcu_read_unlock(); + + return hm; +} + +/* + * Release the reference to a healthmon object. If there are no more holders, + * free the health monitor after an RCU grace period to eliminate possibility + * of races with xfs_healthmon_get. + */ +static void +xfs_healthmon_put( + struct xfs_healthmon *hm) +{ + if (refcount_dec_and_test(&hm->ref)) + kfree_rcu_mightsleep(hm); +} + +/* Attach a health monitor to an xfs_mount. Only one allowed at a time. */ +STATIC int +xfs_healthmon_attach( + struct xfs_mount *mp, + struct xfs_healthmon *hm) +{ + spin_lock(&xfs_healthmon_lock); + if (mp->m_healthmon != NULL) { + spin_unlock(&xfs_healthmon_lock); + return -EEXIST; + } + + refcount_inc(&hm->ref); + mp->m_healthmon = hm; + hm->mount_cookie = (uintptr_t)mp->m_super; + spin_unlock(&xfs_healthmon_lock); + + return 0; +} + +/* Detach a xfs mount from a specific healthmon instance. */ +STATIC void +xfs_healthmon_detach( + struct xfs_healthmon *hm) +{ + spin_lock(&xfs_healthmon_lock); + if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) { + spin_unlock(&xfs_healthmon_lock); + return; + } + + XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL; + hm->mount_cookie = DETACHED_MOUNT_COOKIE; + spin_unlock(&xfs_healthmon_lock); + + xfs_healthmon_put(hm); +} + +/* Detach the xfs mount from this healthmon instance. */ +void +xfs_healthmon_unmount( + struct xfs_mount *mp) +{ + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + xfs_healthmon_detach(hm); + xfs_healthmon_put(hm); +} + +STATIC ssize_t +xfs_healthmon_read_iter( + struct kiocb *iocb, + struct iov_iter *to) +{ + return -EIO; +} + +/* Free the health monitoring information. */ +STATIC int +xfs_healthmon_release( + struct inode *inode, + struct file *file) +{ + struct xfs_healthmon *hm = file->private_data; + + /* + * We might be closing the healthmon file before the filesystem + * unmounts, because userspace processes can terminate at any time and + * for any reason. Null out xfs_mount::m_healthmon so that another + * process can create another health monitor file. + */ + xfs_healthmon_detach(hm); + + xfs_healthmon_put(hm); + return 0; +} + +/* Validate ioctl parameters. */ +static inline bool +xfs_healthmon_validate( + const struct xfs_health_monitor *hmo) +{ + if (hmo->flags) + return false; + if (hmo->format) + return false; + if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad))) + return false; + return true; +} + +/* Emit some data about the health monitoring fd. */ +static void +xfs_healthmon_show_fdinfo( + struct seq_file *m, + struct file *file) +{ + struct xfs_healthmon *hm = file->private_data; + + seq_printf(m, "state:\t%s\ndev:\t%d:%d\n", + hm->mount_cookie == DETACHED_MOUNT_COOKIE ? + "dead" : "alive", + MAJOR(hm->dev), MINOR(hm->dev)); +} + +static const struct file_operations xfs_healthmon_fops = { + .owner = THIS_MODULE, + .show_fdinfo = xfs_healthmon_show_fdinfo, + .read_iter = xfs_healthmon_read_iter, + .release = xfs_healthmon_release, +}; + +/* + * Create a health monitoring file. Returns an index to the fd table or a + * negative errno. + */ +long +xfs_ioc_health_monitor( + struct file *file, + struct xfs_health_monitor __user *arg) +{ + struct xfs_health_monitor hmo; + struct xfs_healthmon *hm; + struct xfs_inode *ip = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip->i_mount; + int ret; + + /* + * The only intended user of the health monitoring system should be the + * xfs_healer daemon running on behalf of the whole filesystem in the + * initial user namespace. IOWs, we don't allow unprivileged userspace + * (they can use fsnotify) nor do we allow containers. + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (ip->i_ino != mp->m_sb.sb_rootino) + return -EPERM; + if (current_user_ns() != &init_user_ns) + return -EPERM; + + if (copy_from_user(&hmo, arg, sizeof(hmo))) + return -EFAULT; + + if (!xfs_healthmon_validate(&hmo)) + return -EINVAL; + + hm = kzalloc(sizeof(*hm), GFP_KERNEL); + if (!hm) + return -ENOMEM; + hm->dev = mp->m_super->s_dev; + refcount_set(&hm->ref, 1); + + /* + * Try to attach this health monitor to the xfs_mount. The monitor is + * considered live and will receive events if this succeeds. + */ + ret = xfs_healthmon_attach(mp, hm); + if (ret) + goto out_hm; + + /* + * Create the anonymous file and install a fd for it. If it succeeds, + * the file owns hm and can go away at any time, so we must not access + * it again. This must go last because we can't undo a fd table + * installation. + */ + ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm, + O_CLOEXEC | O_RDONLY); + if (ret < 0) + goto out_mp; + + return ret; + +out_mp: + xfs_healthmon_detach(hm); +out_hm: + ASSERT(refcount_read(&hm->ref) == 1); + xfs_healthmon_put(hm); + return ret; +} diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h new file mode 100644 index 000000000000..218d5aac87b0 --- /dev/null +++ b/fs/xfs/xfs_healthmon.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2024-2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_HEALTHMON_H__ +#define __XFS_HEALTHMON_H__ + +struct xfs_healthmon { + /* + * Weak reference to the xfs filesystem that is being monitored. It + * will be set to zero when the filesystem detaches from the monitor. + * Do not dereference this pointer. + */ + uintptr_t mount_cookie; + + /* + * Device number of the filesystem being monitored. This is for + * consistent tracing even after unmount. + */ + dev_t dev; + + /* + * Reference count of this structure. The open healthmon fd holds one + * ref, the xfs_mount holds another ref if it points to this object, + * and running event handlers hold their own refs. + */ + refcount_t ref; +}; + +void xfs_healthmon_unmount(struct xfs_mount *mp); + +long xfs_ioc_health_monitor(struct file *file, + struct xfs_health_monitor __user *arg); + +#endif /* __XFS_HEALTHMON_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 59eaad774371..c04c41ca924e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -41,6 +41,7 @@ #include "xfs_exchrange.h" #include "xfs_handle.h" #include "xfs_rtgroup.h" +#include "xfs_healthmon.h" #include #include @@ -1419,6 +1420,9 @@ xfs_file_ioctl( case XFS_IOC_COMMIT_RANGE: return xfs_ioc_commit_range(filp, arg); + case XFS_IOC_HEALTH_MONITOR: + return xfs_ioc_health_monitor(filp, arg); + default: return -ENOTTY; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 0953f6ae94ab..ab67c9191538 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -41,6 +41,7 @@ #include "xfs_rtrefcount_btree.h" #include "scrub/stats.h" #include "xfs_zone_alloc.h" +#include "xfs_healthmon.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; @@ -625,6 +626,7 @@ xfs_unmount_flush_inodes( cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp); xfs_health_unmount(mp); + xfs_healthmon_unmount(mp); } static void diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b871dfde372b..61c71128d171 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -13,6 +13,7 @@ struct xfs_ail; struct xfs_quotainfo; struct xfs_da_geometry; struct xfs_perag; +struct xfs_healthmon; /* dynamic preallocation free space thresholds, 5% down to 1% */ enum { @@ -342,6 +343,9 @@ typedef struct xfs_mount { /* Hook to feed dirent updates to an active online repair. */ struct xfs_hooks m_dir_update_hooks; + + /* Private data referring to a health monitor object. */ + struct xfs_healthmon *m_healthmon; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) From b3a289a2a9397b2e731f334d7d36623a0f9192c5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:46 -0800 Subject: [PATCH 02/67] xfs: create event queuing, formatting, and discovery infrastructure Create the basic infrastructure that we need to report health events to userspace. We need a compact form for recording critical information about an event and queueing them; a means to notice that we've lost some events; and a means to format the events into something that userspace can handle. Make the kernel export C structures via read(). In a previous iteration of this new subsystem, I wanted to explore data exchange formats that are more flexible and easier for humans to read than C structures. The thought being that when we want to rev (or worse, enlarge) the event format, it ought to be trivially easy to do that in a way that doesn't break old userspace. I looked at formats such as protobufs and capnproto. These look really nice in that extending the wire format is fairly easy, you can give it a data schema and it generates the serialization code for you, handles endianness problems, etc. The huge downside is that neither support C all that well. Too hard, and didn't want to port either of those huge sprawling libraries first to the kernel and then again to xfsprogs. Then I thought, how about JSON? Javascript objects are human readable, the kernel can emit json without much fuss (it's all just strings!) and there are plenty of interpreters for python/rust/c/etc. There's a proposed schema format for json, which means that xfs can publish a description of the events that kernel will emit. Userspace consumers (e.g. xfsprogs/xfs_healer) can embed the same schema document and use it to validate the incoming events from the kernel, which means it can discard events that it doesn't understand, or garbage being emitted due to bugs. However, json has a huge crutch -- javascript is well known for its vague definitions of what are numbers. This makes expressing a large number rather fraught, because the runtime is free to represent a number in nearly any way it wants. Stupider ones will truncate values to word size, others will roll out doubles for uint52_t (yes, fifty-two) with the resulting loss of precision. Not good when you're dealing with discrete units. It just so happens that python's json library is smart enough to see a sequence of digits and put them in a u64 (at least on x86_64/aarch64) but an actual javascript interpreter (pasting into Firefox) isn't necessarily so clever. It turns out that none of the proposed json schemas were ever ratified even in an open-consensus way, so json blobs are still just loosely structured blobs. The parsing in userspace was also noticeably slow and memory-consumptive. Hence only the C interface survives. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 47 ++++ fs/xfs/xfs_healthmon.c | 495 ++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_healthmon.h | 59 +++++ fs/xfs/xfs_trace.c | 2 + fs/xfs/xfs_trace.h | 171 ++++++++++++++ 5 files changed, 768 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index c58e55b3df40..22b86bc888de 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1003,12 +1003,59 @@ struct xfs_rtgroup_geometry { #define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */ #define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */ +/* Health monitor event domains */ + +/* affects the whole fs */ +#define XFS_HEALTH_MONITOR_DOMAIN_MOUNT (0) + +/* Health monitor event types */ + +/* status of the monitor itself */ +#define XFS_HEALTH_MONITOR_TYPE_RUNNING (0) +#define XFS_HEALTH_MONITOR_TYPE_LOST (1) + +/* lost events */ +struct xfs_health_monitor_lost { + __u64 count; +}; + +struct xfs_health_monitor_event { + /* XFS_HEALTH_MONITOR_DOMAIN_* */ + __u32 domain; + + /* XFS_HEALTH_MONITOR_TYPE_* */ + __u32 type; + + /* Timestamp of the event, in nanoseconds since the Unix epoch */ + __u64 time_ns; + + /* + * Details of the event. The primary clients are written in python + * and rust, so break this up because bindgen hates anonymous structs + * and unions. + */ + union { + struct xfs_health_monitor_lost lost; + } e; + + /* zeroes */ + __u64 pad[2]; +}; + struct xfs_health_monitor { __u64 flags; /* flags */ __u8 format; /* output format */ __u8 pad[23]; /* zeroes */ }; +/* Return all health status events, not just deltas */ +#define XFS_HEALTH_MONITOR_VERBOSE (1ULL << 0) + +#define XFS_HEALTH_MONITOR_ALL (XFS_HEALTH_MONITOR_VERBOSE) + +/* Initial return format version */ +#define XFS_HEALTH_MONITOR_FMT_V0 (0) + /* * ioctl commands that are used by Linux filesystems */ diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index b7095ea55897..f1c6782f5e39 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -45,6 +45,13 @@ /* sign of a detached health monitor */ #define DETACHED_MOUNT_COOKIE ((uintptr_t)0) +/* Constrain the number of event objects that can build up in memory. */ +#define XFS_HEALTHMON_MAX_EVENTS (SZ_32K / \ + sizeof(struct xfs_healthmon_event)) + +/* Constrain the size of the output buffer for read_iter. */ +#define XFS_HEALTHMON_MAX_OUTBUF SZ_64K + /* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */ static DEFINE_SPINLOCK(xfs_healthmon_lock); @@ -73,8 +80,20 @@ static void xfs_healthmon_put( struct xfs_healthmon *hm) { - if (refcount_dec_and_test(&hm->ref)) + if (refcount_dec_and_test(&hm->ref)) { + struct xfs_healthmon_event *event; + struct xfs_healthmon_event *next = hm->first_event; + + while ((event = next) != NULL) { + trace_xfs_healthmon_drop(hm, event); + next = event->next; + kfree(event); + } + + kfree(hm->buffer); + mutex_destroy(&hm->lock); kfree_rcu_mightsleep(hm); + } } /* Attach a health monitor to an xfs_mount. Only one allowed at a time. */ @@ -112,9 +131,182 @@ xfs_healthmon_detach( hm->mount_cookie = DETACHED_MOUNT_COOKIE; spin_unlock(&xfs_healthmon_lock); + trace_xfs_healthmon_detach(hm); xfs_healthmon_put(hm); } +static inline void xfs_healthmon_bump_events(struct xfs_healthmon *hm) +{ + hm->events++; + hm->total_events++; +} + +static inline void xfs_healthmon_bump_lost(struct xfs_healthmon *hm) +{ + hm->lost_prev_event++; + hm->total_lost++; +} + +/* + * If possible, merge a new event into an existing event. Returns whether or + * not it merged anything. + */ +static bool +xfs_healthmon_merge_events( + struct xfs_healthmon_event *existing, + const struct xfs_healthmon_event *new) +{ + if (!existing) + return false; + + /* type and domain must match to merge events */ + if (existing->type != new->type || + existing->domain != new->domain) + return false; + + switch (existing->type) { + case XFS_HEALTHMON_RUNNING: + /* should only ever be one of these events anyway */ + return false; + + case XFS_HEALTHMON_LOST: + existing->lostcount += new->lostcount; + return true; + } + + return false; +} + +/* Insert an event onto the start of the queue. */ +static inline void +__xfs_healthmon_insert( + struct xfs_healthmon *hm, + struct xfs_healthmon_event *event) +{ + struct timespec64 now; + + ktime_get_coarse_real_ts64(&now); + event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec; + + event->next = hm->first_event; + if (!hm->first_event) + hm->first_event = event; + if (!hm->last_event) + hm->last_event = event; + xfs_healthmon_bump_events(hm); + wake_up(&hm->wait); + + trace_xfs_healthmon_insert(hm, event); +} + +/* Push an event onto the end of the queue. */ +static inline void +__xfs_healthmon_push( + struct xfs_healthmon *hm, + struct xfs_healthmon_event *event) +{ + struct timespec64 now; + + ktime_get_coarse_real_ts64(&now); + event->time_ns = (now.tv_sec * NSEC_PER_SEC) + now.tv_nsec; + + if (!hm->first_event) + hm->first_event = event; + if (hm->last_event) + hm->last_event->next = event; + hm->last_event = event; + event->next = NULL; + xfs_healthmon_bump_events(hm); + wake_up(&hm->wait); + + trace_xfs_healthmon_push(hm, event); +} + +/* Deal with any previously lost events */ +static int +xfs_healthmon_clear_lost_prev( + struct xfs_healthmon *hm) +{ + struct xfs_healthmon_event lost_event = { + .type = XFS_HEALTHMON_LOST, + .domain = XFS_HEALTHMON_MOUNT, + .lostcount = hm->lost_prev_event, + }; + struct xfs_healthmon_event *event = NULL; + + if (xfs_healthmon_merge_events(hm->last_event, &lost_event)) { + trace_xfs_healthmon_merge(hm, hm->last_event); + wake_up(&hm->wait); + goto cleared; + } + + if (hm->events < XFS_HEALTHMON_MAX_EVENTS) + event = kmemdup(&lost_event, sizeof(struct xfs_healthmon_event), + GFP_NOFS); + if (!event) + return -ENOMEM; + + __xfs_healthmon_push(hm, event); +cleared: + hm->lost_prev_event = 0; + return 0; +} + +/* + * Push an event onto the end of the list after dealing with lost events and + * possibly full queues. + */ +STATIC int +xfs_healthmon_push( + struct xfs_healthmon *hm, + const struct xfs_healthmon_event *template) +{ + struct xfs_healthmon_event *event = NULL; + int error = 0; + + /* + * Locklessly check if the health monitor has already detached from the + * mount. If so, ignore the event. If we race with deactivation, + * we'll queue the event but never send it. + */ + if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) + return -ESHUTDOWN; + + mutex_lock(&hm->lock); + + /* Report previously lost events before we do anything else */ + if (hm->lost_prev_event) { + error = xfs_healthmon_clear_lost_prev(hm); + if (error) + goto out_unlock; + } + + /* Try to merge with the newest event */ + if (xfs_healthmon_merge_events(hm->last_event, template)) { + trace_xfs_healthmon_merge(hm, hm->last_event); + wake_up(&hm->wait); + goto out_unlock; + } + + /* Only create a heap event object if we're not already at capacity. */ + if (hm->events < XFS_HEALTHMON_MAX_EVENTS) + event = kmemdup(template, sizeof(struct xfs_healthmon_event), + GFP_NOFS); + if (!event) { + /* No memory means we lose the event */ + trace_xfs_healthmon_lost_event(hm); + xfs_healthmon_bump_lost(hm); + error = -ENOMEM; + goto out_unlock; + } + + __xfs_healthmon_push(hm, event); + +out_unlock: + mutex_unlock(&hm->lock); + return error; +} + /* Detach the xfs mount from this healthmon instance. */ void xfs_healthmon_unmount( @@ -129,12 +321,271 @@ xfs_healthmon_unmount( xfs_healthmon_put(hm); } +static inline void +xfs_healthmon_reset_outbuf( + struct xfs_healthmon *hm) +{ + hm->buftail = 0; + hm->bufhead = 0; +} + +static const unsigned int domain_map[] = { + [XFS_HEALTHMON_MOUNT] = XFS_HEALTH_MONITOR_DOMAIN_MOUNT, +}; + +static const unsigned int type_map[] = { + [XFS_HEALTHMON_RUNNING] = XFS_HEALTH_MONITOR_TYPE_RUNNING, + [XFS_HEALTHMON_LOST] = XFS_HEALTH_MONITOR_TYPE_LOST, +}; + +/* Render event as a V0 structure */ +STATIC int +xfs_healthmon_format_v0( + struct xfs_healthmon *hm, + const struct xfs_healthmon_event *event) +{ + struct xfs_health_monitor_event hme = { + .time_ns = event->time_ns, + }; + + trace_xfs_healthmon_format(hm, event); + + if (event->domain < 0 || event->domain >= ARRAY_SIZE(domain_map) || + event->type < 0 || event->type >= ARRAY_SIZE(type_map)) + return -EFSCORRUPTED; + + hme.domain = domain_map[event->domain]; + hme.type = type_map[event->type]; + + /* fill in the event-specific details */ + switch (event->domain) { + case XFS_HEALTHMON_MOUNT: + switch (event->type) { + case XFS_HEALTHMON_LOST: + hme.e.lost.count = event->lostcount; + break; + default: + break; + } + break; + default: + break; + } + + ASSERT(hm->bufhead + sizeof(hme) <= hm->bufsize); + + /* copy formatted object to the outbuf */ + if (hm->bufhead + sizeof(hme) <= hm->bufsize) { + memcpy(hm->buffer + hm->bufhead, &hme, sizeof(hme)); + hm->bufhead += sizeof(hme); + } + + return 0; +} + +/* How many bytes are waiting in the outbuf to be copied? */ +static inline size_t +xfs_healthmon_outbuf_bytes( + struct xfs_healthmon *hm) +{ + if (hm->bufhead > hm->buftail) + return hm->bufhead - hm->buftail; + return 0; +} + +/* + * Do we have something for userspace to read? This can mean unmount events, + * events pending in the queue, or pending bytes in the outbuf. + */ +static inline bool +xfs_healthmon_has_eventdata( + struct xfs_healthmon *hm) +{ + /* + * If the health monitor is already detached from the xfs_mount, we + * want reads to return 0 bytes even if there are no events, because + * userspace interprets that as EOF. If we race with deactivation, + * read_iter will take the necessary locks to discover that there are + * no events to send. + */ + if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) + return true; + + /* + * Either there are events waiting to be formatted into the buffer, or + * there's unread bytes in the buffer. + */ + return hm->events > 0 || xfs_healthmon_outbuf_bytes(hm) > 0; +} + +/* Try to copy the rest of the outbuf to the iov iter. */ +STATIC ssize_t +xfs_healthmon_copybuf( + struct xfs_healthmon *hm, + struct iov_iter *to) +{ + size_t to_copy; + size_t w = 0; + + trace_xfs_healthmon_copybuf(hm, to); + + to_copy = xfs_healthmon_outbuf_bytes(hm); + if (to_copy) { + w = copy_to_iter(hm->buffer + hm->buftail, to_copy, to); + if (!w) + return -EFAULT; + + hm->buftail += w; + } + + /* + * Nothing left to copy? Reset the output buffer cursors to the start + * since there's no live data in the buffer. + */ + if (xfs_healthmon_outbuf_bytes(hm) == 0) + xfs_healthmon_reset_outbuf(hm); + return w; +} + +/* + * Return a health monitoring event for formatting into the output buffer if + * there's enough space in the outbuf and an event waiting for us. Caller + * must hold i_rwsem on the healthmon file. + */ +static inline struct xfs_healthmon_event * +xfs_healthmon_format_pop( + struct xfs_healthmon *hm) +{ + struct xfs_healthmon_event *event; + + if (hm->bufhead + sizeof(*event) > hm->bufsize) + return NULL; + + mutex_lock(&hm->lock); + event = hm->first_event; + if (event) { + if (hm->last_event == event) + hm->last_event = NULL; + hm->first_event = event->next; + hm->events--; + + trace_xfs_healthmon_pop(hm, event); + } + mutex_unlock(&hm->lock); + return event; +} + +/* Allocate formatting buffer */ +STATIC int +xfs_healthmon_alloc_outbuf( + struct xfs_healthmon *hm, + size_t user_bufsize) +{ + void *outbuf; + size_t bufsize = + min(XFS_HEALTHMON_MAX_OUTBUF, max(PAGE_SIZE, user_bufsize)); + + outbuf = kzalloc(bufsize, GFP_KERNEL); + if (!outbuf) { + if (bufsize == PAGE_SIZE) + return -ENOMEM; + + bufsize = PAGE_SIZE; + outbuf = kzalloc(bufsize, GFP_KERNEL); + if (!outbuf) + return -ENOMEM; + } + + hm->buffer = outbuf; + hm->bufsize = bufsize; + hm->bufhead = 0; + hm->buftail = 0; + + return 0; +} + +/* + * Convey queued event data to userspace. First copy any remaining bytes in + * the outbuf, then format the oldest event into the outbuf and copy that too. + */ STATIC ssize_t xfs_healthmon_read_iter( struct kiocb *iocb, struct iov_iter *to) { - return -EIO; + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct xfs_healthmon *hm = file->private_data; + struct xfs_healthmon_event *event; + size_t copied = 0; + ssize_t ret = 0; + + if (file->f_flags & O_NONBLOCK) { + if (!xfs_healthmon_has_eventdata(hm) || !inode_trylock(inode)) + return -EAGAIN; + } else { + ret = wait_event_interruptible(hm->wait, + xfs_healthmon_has_eventdata(hm)); + if (ret) + return ret; + + inode_lock(inode); + } + + if (hm->bufsize == 0) { + ret = xfs_healthmon_alloc_outbuf(hm, iov_iter_count(to)); + if (ret) + goto out_unlock; + } + + trace_xfs_healthmon_read_start(hm); + + /* + * If there's anything left in the output buffer, copy that before + * formatting more events. + */ + ret = xfs_healthmon_copybuf(hm, to); + if (ret < 0) + goto out_unlock; + copied += ret; + + while (iov_iter_count(to) > 0) { + /* Format the next events into the outbuf until it's full. */ + while ((event = xfs_healthmon_format_pop(hm)) != NULL) { + ret = xfs_healthmon_format_v0(hm, event); + kfree(event); + if (ret) + goto out_unlock; + } + + /* Copy anything formatted into outbuf to userspace */ + ret = xfs_healthmon_copybuf(hm, to); + if (ret <= 0) + break; + + copied += ret; + } + +out_unlock: + trace_xfs_healthmon_read_finish(hm); + inode_unlock(inode); + return copied ?: ret; +} + +/* Poll for available events. */ +STATIC __poll_t +xfs_healthmon_poll( + struct file *file, + struct poll_table_struct *wait) +{ + struct xfs_healthmon *hm = file->private_data; + __poll_t mask = 0; + + poll_wait(file, &hm->wait, wait); + + if (xfs_healthmon_has_eventdata(hm)) + mask |= EPOLLIN; + return mask; } /* Free the health monitoring information. */ @@ -145,6 +596,8 @@ xfs_healthmon_release( { struct xfs_healthmon *hm = file->private_data; + trace_xfs_healthmon_release(hm); + /* * We might be closing the healthmon file before the filesystem * unmounts, because userspace processes can terminate at any time and @@ -153,6 +606,12 @@ xfs_healthmon_release( */ xfs_healthmon_detach(hm); + /* + * Wake up any readers that might be left. There shouldn't be any + * because the only users of the waiter are read and poll. + */ + wake_up_all(&hm->wait); + xfs_healthmon_put(hm); return 0; } @@ -162,9 +621,9 @@ static inline bool xfs_healthmon_validate( const struct xfs_health_monitor *hmo) { - if (hmo->flags) + if (hmo->flags & ~XFS_HEALTH_MONITOR_ALL) return false; - if (hmo->format) + if (hmo->format != XFS_HEALTH_MONITOR_FMT_V0) return false; if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad))) return false; @@ -179,16 +638,21 @@ xfs_healthmon_show_fdinfo( { struct xfs_healthmon *hm = file->private_data; - seq_printf(m, "state:\t%s\ndev:\t%d:%d\n", + mutex_lock(&hm->lock); + seq_printf(m, "state:\t%s\ndev:\t%d:%d\nformat:\tv0\nevents:\t%llu\nlost:\t%llu\n", hm->mount_cookie == DETACHED_MOUNT_COOKIE ? "dead" : "alive", - MAJOR(hm->dev), MINOR(hm->dev)); + MAJOR(hm->dev), MINOR(hm->dev), + hm->total_events, + hm->total_lost); + mutex_unlock(&hm->lock); } static const struct file_operations xfs_healthmon_fops = { .owner = THIS_MODULE, .show_fdinfo = xfs_healthmon_show_fdinfo, .read_iter = xfs_healthmon_read_iter, + .poll = xfs_healthmon_poll, .release = xfs_healthmon_release, }; @@ -202,6 +666,7 @@ xfs_ioc_health_monitor( struct xfs_health_monitor __user *arg) { struct xfs_health_monitor hmo; + struct xfs_healthmon_event *running_event; struct xfs_healthmon *hm; struct xfs_inode *ip = XFS_I(file_inode(file)); struct xfs_mount *mp = ip->i_mount; @@ -232,6 +697,22 @@ xfs_ioc_health_monitor( hm->dev = mp->m_super->s_dev; refcount_set(&hm->ref, 1); + mutex_init(&hm->lock); + init_waitqueue_head(&hm->wait); + + if (hmo.flags & XFS_HEALTH_MONITOR_VERBOSE) + hm->verbose = true; + + /* Queue up the first event that lets the client know we're running. */ + running_event = kzalloc(sizeof(struct xfs_healthmon_event), GFP_NOFS); + if (!running_event) { + ret = -ENOMEM; + goto out_hm; + } + running_event->type = XFS_HEALTHMON_RUNNING; + running_event->domain = XFS_HEALTHMON_MOUNT; + __xfs_healthmon_insert(hm, running_event); + /* * Try to attach this health monitor to the xfs_mount. The monitor is * considered live and will receive events if this succeeds. @@ -251,6 +732,8 @@ xfs_ioc_health_monitor( if (ret < 0) goto out_mp; + trace_xfs_healthmon_create(mp->m_super->s_dev, hmo.flags, hmo.format); + return ret; out_mp: diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h index 218d5aac87b0..554ec6212544 100644 --- a/fs/xfs/xfs_healthmon.h +++ b/fs/xfs/xfs_healthmon.h @@ -26,10 +26,69 @@ struct xfs_healthmon { * and running event handlers hold their own refs. */ refcount_t ref; + + /* lock for event list and event counters */ + struct mutex lock; + + /* list of event objects */ + struct xfs_healthmon_event *first_event; + struct xfs_healthmon_event *last_event; + + /* number of events in the list */ + unsigned int events; + + /* do we want all events? */ + bool verbose:1; + + /* waiter so read/poll can sleep until the arrival of events */ + struct wait_queue_head wait; + + /* + * Buffer for formatting events for a read_iter call. Events are + * formatted into the buffer at bufhead, and buftail determines where + * to start a copy_iter to get those events to userspace. All buffer + * fields are protected by inode_lock. + */ + char *buffer; + size_t bufsize; + size_t bufhead; + size_t buftail; + + /* did we lose previous events? */ + unsigned long long lost_prev_event; + + /* total counts of events observed and lost events */ + unsigned long long total_events; + unsigned long long total_lost; }; void xfs_healthmon_unmount(struct xfs_mount *mp); +enum xfs_healthmon_type { + XFS_HEALTHMON_RUNNING, /* monitor running */ + XFS_HEALTHMON_LOST, /* message lost */ +}; + +enum xfs_healthmon_domain { + XFS_HEALTHMON_MOUNT, /* affects the whole fs */ +}; + +struct xfs_healthmon_event { + struct xfs_healthmon_event *next; + + enum xfs_healthmon_type type; + enum xfs_healthmon_domain domain; + + uint64_t time_ns; + + union { + /* lost events */ + struct { + uint64_t lostcount; + }; + }; +}; + long xfs_ioc_health_monitor(struct file *file, struct xfs_health_monitor __user *arg); diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index a60556dbd172..d42b864a3837 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -51,6 +51,8 @@ #include "xfs_rtgroup.h" #include "xfs_zone_alloc.h" #include "xfs_zone_priv.h" +#include "xfs_health.h" +#include "xfs_healthmon.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f70afbf3cb19..04727470b3b4 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -103,6 +103,8 @@ struct xfs_refcount_intent; struct xfs_metadir_update; struct xfs_rtgroup; struct xfs_open_zone; +struct xfs_healthmon_event; +struct xfs_healthmon; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -5906,6 +5908,175 @@ DEFINE_EVENT(xfs_freeblocks_resv_class, name, \ DEFINE_FREEBLOCKS_RESV_EVENT(xfs_freecounter_reserved); DEFINE_FREEBLOCKS_RESV_EVENT(xfs_freecounter_enospc); +TRACE_EVENT(xfs_healthmon_lost_event, + TP_PROTO(const struct xfs_healthmon *hm), + TP_ARGS(hm), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long long, lost_prev) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->lost_prev = hm->lost_prev_event; + ), + TP_printk("dev %d:%d lost_prev %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->lost_prev) +); + +#define XFS_HEALTHMON_FLAGS_STRINGS \ + { XFS_HEALTH_MONITOR_VERBOSE, "verbose" } +#define XFS_HEALTHMON_FMT_STRINGS \ + { XFS_HEALTH_MONITOR_FMT_V0, "v0" } + +TRACE_EVENT(xfs_healthmon_create, + TP_PROTO(dev_t dev, u64 flags, u8 format), + TP_ARGS(dev, flags, format), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, flags) + __field(u8, format) + ), + TP_fast_assign( + __entry->dev = dev; + __entry->flags = flags; + __entry->format = format; + ), + TP_printk("dev %d:%d flags %s format %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_flags(__entry->flags, "|", XFS_HEALTHMON_FLAGS_STRINGS), + __print_symbolic(__entry->format, XFS_HEALTHMON_FMT_STRINGS)) +); + +TRACE_EVENT(xfs_healthmon_copybuf, + TP_PROTO(const struct xfs_healthmon *hm, const struct iov_iter *iov), + TP_ARGS(hm, iov), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(size_t, bufsize) + __field(size_t, inpos) + __field(size_t, outpos) + __field(size_t, to_copy) + __field(size_t, iter_count) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->bufsize = hm->bufsize; + __entry->inpos = hm->bufhead; + __entry->outpos = hm->buftail; + if (hm->bufhead > hm->buftail) + __entry->to_copy = hm->bufhead - hm->buftail; + else + __entry->to_copy = 0; + __entry->iter_count = iov_iter_count(iov); + ), + TP_printk("dev %d:%d bufsize %zu in_pos %zu out_pos %zu to_copy %zu iter_count %zu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->bufsize, + __entry->inpos, + __entry->outpos, + __entry->to_copy, + __entry->iter_count) +); + +DECLARE_EVENT_CLASS(xfs_healthmon_class, + TP_PROTO(const struct xfs_healthmon *hm), + TP_ARGS(hm), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, events) + __field(unsigned long long, lost_prev) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->events = hm->events; + __entry->lost_prev = hm->lost_prev_event; + ), + TP_printk("dev %d:%d events %u lost_prev? %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->events, + __entry->lost_prev) +); +#define DEFINE_HEALTHMON_EVENT(name) \ +DEFINE_EVENT(xfs_healthmon_class, name, \ + TP_PROTO(const struct xfs_healthmon *hm), \ + TP_ARGS(hm)) +DEFINE_HEALTHMON_EVENT(xfs_healthmon_read_start); +DEFINE_HEALTHMON_EVENT(xfs_healthmon_read_finish); +DEFINE_HEALTHMON_EVENT(xfs_healthmon_release); +DEFINE_HEALTHMON_EVENT(xfs_healthmon_detach); + +#define XFS_HEALTHMON_TYPE_STRINGS \ + { XFS_HEALTHMON_LOST, "lost" } + +#define XFS_HEALTHMON_DOMAIN_STRINGS \ + { XFS_HEALTHMON_MOUNT, "mount" } + +TRACE_DEFINE_ENUM(XFS_HEALTHMON_LOST); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_MOUNT); + +DECLARE_EVENT_CLASS(xfs_healthmon_event_class, + TP_PROTO(const struct xfs_healthmon *hm, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned int, domain) + __field(unsigned int, mask) + __field(unsigned long long, ino) + __field(unsigned int, gen) + __field(unsigned int, group) + __field(unsigned long long, offset) + __field(unsigned long long, length) + __field(unsigned long long, lostcount) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = event->type; + __entry->domain = event->domain; + __entry->mask = 0; + __entry->group = 0; + __entry->ino = 0; + __entry->gen = 0; + __entry->offset = 0; + __entry->length = 0; + __entry->lostcount = 0; + switch (__entry->domain) { + case XFS_HEALTHMON_MOUNT: + switch (__entry->type) { + case XFS_HEALTHMON_LOST: + __entry->lostcount = event->lostcount; + break; + } + break; + } + ), + TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS), + __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS), + __entry->mask, + __entry->ino, + __entry->gen, + __entry->offset, + __entry->length, + __entry->group, + __entry->lostcount) +); +#define DEFINE_HEALTHMONEVENT_EVENT(name) \ +DEFINE_EVENT(xfs_healthmon_event_class, name, \ + TP_PROTO(const struct xfs_healthmon *hm, \ + const struct xfs_healthmon_event *event), \ + TP_ARGS(hm, event)) +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_insert); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_push); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_pop); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_format); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_format_overflow); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_drop); +DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_merge); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH From 25ca57fa3624cae9c6b5c6d3fc7f38318ca1402e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:47 -0800 Subject: [PATCH 03/67] xfs: convey filesystem unmount events to the health monitor In xfs_healthmon_unmount, send events to xfs_healer so that it knows that nothing further can be done for the filesystem. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 3 +++ fs/xfs/xfs_healthmon.c | 32 +++++++++++++++++++++++++++++++- fs/xfs/xfs_healthmon.h | 4 ++++ fs/xfs/xfs_trace.h | 6 +++++- 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 22b86bc888de..59de6ab69fb3 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1014,6 +1014,9 @@ struct xfs_rtgroup_geometry { #define XFS_HEALTH_MONITOR_TYPE_RUNNING (0) #define XFS_HEALTH_MONITOR_TYPE_LOST (1) +/* filesystem was unmounted */ +#define XFS_HEALTH_MONITOR_TYPE_UNMOUNT (2) + /* lost events */ struct xfs_health_monitor_lost { __u64 count; diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index f1c6782f5e39..c218838e6e59 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -90,6 +90,7 @@ xfs_healthmon_put( kfree(event); } + kfree(hm->unmount_event); kfree(hm->buffer); mutex_destroy(&hm->lock); kfree_rcu_mightsleep(hm); @@ -166,6 +167,7 @@ xfs_healthmon_merge_events( switch (existing->type) { case XFS_HEALTHMON_RUNNING: + case XFS_HEALTHMON_UNMOUNT: /* should only ever be one of these events anyway */ return false; @@ -307,7 +309,10 @@ out_unlock: return error; } -/* Detach the xfs mount from this healthmon instance. */ +/* + * Report that the filesystem is being unmounted, then detach the xfs mount + * from this healthmon instance. + */ void xfs_healthmon_unmount( struct xfs_mount *mp) @@ -317,6 +322,17 @@ xfs_healthmon_unmount( if (!hm) return; + trace_xfs_healthmon_report_unmount(hm); + + /* + * Insert the unmount notification at the start of the event queue so + * that userspace knows the filesystem went away as soon as possible. + * There's nothing actionable for userspace after an unmount. Once + * we've inserted the unmount event, hm no longer owns that event. + */ + __xfs_healthmon_insert(hm, hm->unmount_event); + hm->unmount_event = NULL; + xfs_healthmon_detach(hm); xfs_healthmon_put(hm); } @@ -713,6 +729,20 @@ xfs_ioc_health_monitor( running_event->domain = XFS_HEALTHMON_MOUNT; __xfs_healthmon_insert(hm, running_event); + /* + * Preallocate the unmount event so that we can't fail to notify the + * filesystem later. This is key for triggering fast exit of the + * xfs_healer daemon. + */ + hm->unmount_event = kzalloc(sizeof(struct xfs_healthmon_event), + GFP_NOFS); + if (!hm->unmount_event) { + ret = -ENOMEM; + goto out_hm; + } + hm->unmount_event->type = XFS_HEALTHMON_UNMOUNT; + hm->unmount_event->domain = XFS_HEALTHMON_MOUNT; + /* * Try to attach this health monitor to the xfs_mount. The monitor is * considered live and will receive events if this succeeds. diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h index 554ec6212544..3044bb46485d 100644 --- a/fs/xfs/xfs_healthmon.h +++ b/fs/xfs/xfs_healthmon.h @@ -34,6 +34,9 @@ struct xfs_healthmon { struct xfs_healthmon_event *first_event; struct xfs_healthmon_event *last_event; + /* preallocated event for unmount */ + struct xfs_healthmon_event *unmount_event; + /* number of events in the list */ unsigned int events; @@ -67,6 +70,7 @@ void xfs_healthmon_unmount(struct xfs_mount *mp); enum xfs_healthmon_type { XFS_HEALTHMON_RUNNING, /* monitor running */ XFS_HEALTHMON_LOST, /* message lost */ + XFS_HEALTHMON_UNMOUNT, /* filesystem is unmounting */ }; enum xfs_healthmon_domain { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 04727470b3b4..305cae8f497b 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -6005,14 +6005,18 @@ DEFINE_HEALTHMON_EVENT(xfs_healthmon_read_start); DEFINE_HEALTHMON_EVENT(xfs_healthmon_read_finish); DEFINE_HEALTHMON_EVENT(xfs_healthmon_release); DEFINE_HEALTHMON_EVENT(xfs_healthmon_detach); +DEFINE_HEALTHMON_EVENT(xfs_healthmon_report_unmount); #define XFS_HEALTHMON_TYPE_STRINGS \ - { XFS_HEALTHMON_LOST, "lost" } + { XFS_HEALTHMON_LOST, "lost" }, \ + { XFS_HEALTHMON_UNMOUNT, "unmount" } #define XFS_HEALTHMON_DOMAIN_STRINGS \ { XFS_HEALTHMON_MOUNT, "mount" } TRACE_DEFINE_ENUM(XFS_HEALTHMON_LOST); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_UNMOUNT); + TRACE_DEFINE_ENUM(XFS_HEALTHMON_MOUNT); DECLARE_EVENT_CLASS(xfs_healthmon_event_class, From 5eb4cb18e445d09f64ef4b7c8fdc3b2296cb0702 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:47 -0800 Subject: [PATCH 04/67] xfs: convey metadata health events to the health monitor Connect the filesystem metadata health event collection system to the health monitor so that xfs can send events to xfs_healer as it collects information. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 35 +++++++ fs/xfs/libxfs/xfs_health.h | 5 + fs/xfs/xfs_health.c | 123 +++++++++++++++++++++++++ fs/xfs/xfs_healthmon.c | 181 +++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_healthmon.h | 39 ++++++++ fs/xfs/xfs_trace.h | 130 +++++++++++++++++++++++++- 6 files changed, 511 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 59de6ab69fb3..04e1dcf61257 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1008,6 +1008,12 @@ struct xfs_rtgroup_geometry { /* affects the whole fs */ #define XFS_HEALTH_MONITOR_DOMAIN_MOUNT (0) +/* metadata health events */ +#define XFS_HEALTH_MONITOR_DOMAIN_FS (1) +#define XFS_HEALTH_MONITOR_DOMAIN_AG (2) +#define XFS_HEALTH_MONITOR_DOMAIN_INODE (3) +#define XFS_HEALTH_MONITOR_DOMAIN_RTGROUP (4) + /* Health monitor event types */ /* status of the monitor itself */ @@ -1017,11 +1023,37 @@ struct xfs_rtgroup_geometry { /* filesystem was unmounted */ #define XFS_HEALTH_MONITOR_TYPE_UNMOUNT (2) +/* metadata health events */ +#define XFS_HEALTH_MONITOR_TYPE_SICK (3) +#define XFS_HEALTH_MONITOR_TYPE_CORRUPT (4) +#define XFS_HEALTH_MONITOR_TYPE_HEALTHY (5) + /* lost events */ struct xfs_health_monitor_lost { __u64 count; }; +/* fs/rt metadata */ +struct xfs_health_monitor_fs { + /* XFS_FSOP_GEOM_SICK_* flags */ + __u32 mask; +}; + +/* ag/rtgroup metadata */ +struct xfs_health_monitor_group { + /* XFS_{AG,RTGROUP}_SICK_* flags */ + __u32 mask; + __u32 gno; +}; + +/* inode metadata */ +struct xfs_health_monitor_inode { + /* XFS_BS_SICK_* flags */ + __u32 mask; + __u32 gen; + __u64 ino; +}; + struct xfs_health_monitor_event { /* XFS_HEALTH_MONITOR_DOMAIN_* */ __u32 domain; @@ -1039,6 +1071,9 @@ struct xfs_health_monitor_event { */ union { struct xfs_health_monitor_lost lost; + struct xfs_health_monitor_fs fs; + struct xfs_health_monitor_group group; + struct xfs_health_monitor_inode inode; } e; /* zeroes */ diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index b31000f7190c..1d45cf5789e8 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -289,4 +289,9 @@ void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs); #define xfs_metadata_is_sick(error) \ (unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC)) +unsigned int xfs_healthmon_inode_mask(unsigned int sick_mask); +unsigned int xfs_healthmon_rtgroup_mask(unsigned int sick_mask); +unsigned int xfs_healthmon_perag_mask(unsigned int sick_mask); +unsigned int xfs_healthmon_fs_mask(unsigned int sick_mask); + #endif /* __XFS_HEALTH_H__ */ diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 3d50397f8f7c..f243c06fd447 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -108,14 +108,19 @@ xfs_fs_mark_sick( struct xfs_mount *mp, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_sick(mp, mask); spin_lock(&mp->m_sb_lock); + old_mask = mp->m_fs_sick; mp->m_fs_sick |= mask; spin_unlock(&mp->m_sb_lock); fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_fs(mp, XFS_HEALTHMON_SICK, old_mask, mask); } /* Mark per-fs metadata as having been checked and found unhealthy by fsck. */ @@ -124,15 +129,21 @@ xfs_fs_mark_corrupt( struct xfs_mount *mp, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_corrupt(mp, mask); spin_lock(&mp->m_sb_lock); + old_mask = mp->m_fs_sick; mp->m_fs_sick |= mask; mp->m_fs_checked |= mask; spin_unlock(&mp->m_sb_lock); fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_fs(mp, XFS_HEALTHMON_CORRUPT, old_mask, + mask); } /* Mark a per-fs metadata healed. */ @@ -141,15 +152,22 @@ xfs_fs_mark_healthy( struct xfs_mount *mp, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_healthy(mp, mask); spin_lock(&mp->m_sb_lock); + old_mask = mp->m_fs_sick; mp->m_fs_sick &= ~mask; if (!(mp->m_fs_sick & XFS_SICK_FS_PRIMARY)) mp->m_fs_sick &= ~XFS_SICK_FS_SECONDARY; mp->m_fs_checked |= mask; spin_unlock(&mp->m_sb_lock); + + if (mask) + xfs_healthmon_report_fs(mp, XFS_HEALTHMON_HEALTHY, old_mask, + mask); } /* Sample which per-fs metadata are unhealthy. */ @@ -199,14 +217,20 @@ xfs_group_mark_sick( struct xfs_group *xg, unsigned int mask) { + unsigned int old_mask; + xfs_group_check_mask(xg, mask); trace_xfs_group_mark_sick(xg, mask); spin_lock(&xg->xg_state_lock); + old_mask = xg->xg_sick; xg->xg_sick |= mask; spin_unlock(&xg->xg_state_lock); fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_group(xg, XFS_HEALTHMON_SICK, old_mask, + mask); } /* @@ -217,15 +241,21 @@ xfs_group_mark_corrupt( struct xfs_group *xg, unsigned int mask) { + unsigned int old_mask; + xfs_group_check_mask(xg, mask); trace_xfs_group_mark_corrupt(xg, mask); spin_lock(&xg->xg_state_lock); + old_mask = xg->xg_sick; xg->xg_sick |= mask; xg->xg_checked |= mask; spin_unlock(&xg->xg_state_lock); fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_group(xg, XFS_HEALTHMON_CORRUPT, old_mask, + mask); } /* @@ -236,15 +266,22 @@ xfs_group_mark_healthy( struct xfs_group *xg, unsigned int mask) { + unsigned int old_mask; + xfs_group_check_mask(xg, mask); trace_xfs_group_mark_healthy(xg, mask); spin_lock(&xg->xg_state_lock); + old_mask = xg->xg_sick; xg->xg_sick &= ~mask; if (!(xg->xg_sick & XFS_SICK_AG_PRIMARY)) xg->xg_sick &= ~XFS_SICK_AG_SECONDARY; xg->xg_checked |= mask; spin_unlock(&xg->xg_state_lock); + + if (mask) + xfs_healthmon_report_group(xg, XFS_HEALTHMON_HEALTHY, old_mask, + mask); } /* Sample which per-ag metadata are unhealthy. */ @@ -283,10 +320,13 @@ xfs_inode_mark_sick( struct xfs_inode *ip, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_sick(ip, mask); spin_lock(&ip->i_flags_lock); + old_mask = ip->i_sick; ip->i_sick |= mask; spin_unlock(&ip->i_flags_lock); @@ -300,6 +340,9 @@ xfs_inode_mark_sick( spin_unlock(&VFS_I(ip)->i_lock); fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_inode(ip, XFS_HEALTHMON_SICK, old_mask, + mask); } /* Mark inode metadata as having been checked and found unhealthy by fsck. */ @@ -308,10 +351,13 @@ xfs_inode_mark_corrupt( struct xfs_inode *ip, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_corrupt(ip, mask); spin_lock(&ip->i_flags_lock); + old_mask = ip->i_sick; ip->i_sick |= mask; ip->i_checked |= mask; spin_unlock(&ip->i_flags_lock); @@ -326,6 +372,9 @@ xfs_inode_mark_corrupt( spin_unlock(&VFS_I(ip)->i_lock); fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS); + if (mask) + xfs_healthmon_report_inode(ip, XFS_HEALTHMON_CORRUPT, old_mask, + mask); } /* Mark parts of an inode healed. */ @@ -334,15 +383,22 @@ xfs_inode_mark_healthy( struct xfs_inode *ip, unsigned int mask) { + unsigned int old_mask; + ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_healthy(ip, mask); spin_lock(&ip->i_flags_lock); + old_mask = ip->i_sick; ip->i_sick &= ~mask; if (!(ip->i_sick & XFS_SICK_INO_PRIMARY)) ip->i_sick &= ~XFS_SICK_INO_SECONDARY; ip->i_checked |= mask; spin_unlock(&ip->i_flags_lock); + + if (mask) + xfs_healthmon_report_inode(ip, XFS_HEALTHMON_HEALTHY, old_mask, + mask); } /* Sample which parts of an inode are unhealthy. */ @@ -422,6 +478,25 @@ xfs_fsop_geom_health( } } +/* + * Translate XFS_SICK_FS_* into XFS_FSOP_GEOM_SICK_* except for the rt free + * space codes, which are sent via the rtgroup events. + */ +unsigned int +xfs_healthmon_fs_mask( + unsigned int sick_mask) +{ + const struct ioctl_sick_map *m; + unsigned int ioctl_mask = 0; + + for_each_sick_map(fs_map, m) { + if (sick_mask & m->sick_mask) + ioctl_mask |= m->ioctl_mask; + } + + return ioctl_mask; +} + static const struct ioctl_sick_map ag_map[] = { { XFS_SICK_AG_SB, XFS_AG_GEOM_SICK_SB }, { XFS_SICK_AG_AGF, XFS_AG_GEOM_SICK_AGF }, @@ -458,6 +533,22 @@ xfs_ag_geom_health( } } +/* Translate XFS_SICK_AG_* into XFS_AG_GEOM_SICK_*. */ +unsigned int +xfs_healthmon_perag_mask( + unsigned int sick_mask) +{ + const struct ioctl_sick_map *m; + unsigned int ioctl_mask = 0; + + for_each_sick_map(ag_map, m) { + if (sick_mask & m->sick_mask) + ioctl_mask |= m->ioctl_mask; + } + + return ioctl_mask; +} + static const struct ioctl_sick_map rtgroup_map[] = { { XFS_SICK_RG_SUPER, XFS_RTGROUP_GEOM_SICK_SUPER }, { XFS_SICK_RG_BITMAP, XFS_RTGROUP_GEOM_SICK_BITMAP }, @@ -488,6 +579,22 @@ xfs_rtgroup_geom_health( } } +/* Translate XFS_SICK_RG_* into XFS_RTGROUP_GEOM_SICK_*. */ +unsigned int +xfs_healthmon_rtgroup_mask( + unsigned int sick_mask) +{ + const struct ioctl_sick_map *m; + unsigned int ioctl_mask = 0; + + for_each_sick_map(rtgroup_map, m) { + if (sick_mask & m->sick_mask) + ioctl_mask |= m->ioctl_mask; + } + + return ioctl_mask; +} + static const struct ioctl_sick_map ino_map[] = { { XFS_SICK_INO_CORE, XFS_BS_SICK_INODE }, { XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD }, @@ -526,6 +633,22 @@ xfs_bulkstat_health( } } +/* Translate XFS_SICK_INO_* into XFS_BS_SICK_*. */ +unsigned int +xfs_healthmon_inode_mask( + unsigned int sick_mask) +{ + const struct ioctl_sick_map *m; + unsigned int ioctl_mask = 0; + + for_each_sick_map(ino_map, m) { + if (sick_mask & m->sick_mask) + ioctl_mask |= m->ioctl_mask; + } + + return ioctl_mask; +} + /* Mark a block mapping sick. */ void xfs_bmap_mark_sick( diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index c218838e6e59..0039a79822e8 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -18,6 +18,7 @@ #include "xfs_da_btree.h" #include "xfs_quota_defs.h" #include "xfs_rtgroup.h" +#include "xfs_health.h" #include "xfs_healthmon.h" #include @@ -174,6 +175,33 @@ xfs_healthmon_merge_events( case XFS_HEALTHMON_LOST: existing->lostcount += new->lostcount; return true; + + case XFS_HEALTHMON_SICK: + case XFS_HEALTHMON_CORRUPT: + case XFS_HEALTHMON_HEALTHY: + switch (existing->domain) { + case XFS_HEALTHMON_FS: + existing->fsmask |= new->fsmask; + return true; + case XFS_HEALTHMON_AG: + case XFS_HEALTHMON_RTGROUP: + if (existing->group == new->group){ + existing->grpmask |= new->grpmask; + return true; + } + return false; + case XFS_HEALTHMON_INODE: + if (existing->ino == new->ino && + existing->gen == new->gen) { + existing->imask |= new->imask; + return true; + } + return false; + default: + ASSERT(0); + return false; + } + return false; } return false; @@ -337,6 +365,135 @@ xfs_healthmon_unmount( xfs_healthmon_put(hm); } +/* Compute the reporting mask for non-unmount metadata health events. */ +static inline unsigned int +metadata_event_mask( + struct xfs_healthmon *hm, + enum xfs_healthmon_type type, + unsigned int old_mask, + unsigned int new_mask) +{ + /* If we want all events, return all events. */ + if (hm->verbose) + return new_mask; + + switch (type) { + case XFS_HEALTHMON_SICK: + /* Always report runtime corruptions */ + return new_mask; + case XFS_HEALTHMON_CORRUPT: + /* Only report new fsck errors */ + return new_mask & ~old_mask; + case XFS_HEALTHMON_HEALTHY: + /* Only report healthy metadata that got fixed */ + return new_mask & old_mask; + default: + ASSERT(0); + break; + } + + return 0; +} + +/* Report XFS_FS_SICK_* events to healthmon */ +void +xfs_healthmon_report_fs( + struct xfs_mount *mp, + enum xfs_healthmon_type type, + unsigned int old_mask, + unsigned int new_mask) +{ + struct xfs_healthmon_event event = { + .type = type, + .domain = XFS_HEALTHMON_FS, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + event.fsmask = metadata_event_mask(hm, type, old_mask, new_mask) & + ~XFS_SICK_FS_SECONDARY; + trace_xfs_healthmon_report_fs(hm, old_mask, new_mask, &event); + + if (event.fsmask) + xfs_healthmon_push(hm, &event); + + xfs_healthmon_put(hm); +} + +/* Report XFS_SICK_(AG|RG)* flags to healthmon */ +void +xfs_healthmon_report_group( + struct xfs_group *xg, + enum xfs_healthmon_type type, + unsigned int old_mask, + unsigned int new_mask) +{ + struct xfs_healthmon_event event = { + .type = type, + .group = xg->xg_gno, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(xg->xg_mount); + + if (!hm) + return; + + switch (xg->xg_type) { + case XG_TYPE_RTG: + event.domain = XFS_HEALTHMON_RTGROUP; + event.grpmask = metadata_event_mask(hm, type, old_mask, + new_mask) & + ~XFS_SICK_RG_SECONDARY; + break; + case XG_TYPE_AG: + event.domain = XFS_HEALTHMON_AG; + event.grpmask = metadata_event_mask(hm, type, old_mask, + new_mask) & + ~XFS_SICK_AG_SECONDARY; + break; + default: + ASSERT(0); + break; + } + + trace_xfs_healthmon_report_group(hm, old_mask, new_mask, &event); + + if (event.grpmask) + xfs_healthmon_push(hm, &event); + + xfs_healthmon_put(hm); +} + +/* Report XFS_SICK_INO_* flags to healthmon */ +void +xfs_healthmon_report_inode( + struct xfs_inode *ip, + enum xfs_healthmon_type type, + unsigned int old_mask, + unsigned int new_mask) +{ + struct xfs_healthmon_event event = { + .type = type, + .domain = XFS_HEALTHMON_INODE, + .ino = ip->i_ino, + .gen = VFS_I(ip)->i_generation, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount); + + if (!hm) + return; + + event.imask = metadata_event_mask(hm, type, old_mask, new_mask) & + ~XFS_SICK_INO_SECONDARY; + trace_xfs_healthmon_report_inode(hm, old_mask, event.imask, &event); + + if (event.imask) + xfs_healthmon_push(hm, &event); + + xfs_healthmon_put(hm); +} + static inline void xfs_healthmon_reset_outbuf( struct xfs_healthmon *hm) @@ -347,11 +504,19 @@ xfs_healthmon_reset_outbuf( static const unsigned int domain_map[] = { [XFS_HEALTHMON_MOUNT] = XFS_HEALTH_MONITOR_DOMAIN_MOUNT, + [XFS_HEALTHMON_FS] = XFS_HEALTH_MONITOR_DOMAIN_FS, + [XFS_HEALTHMON_AG] = XFS_HEALTH_MONITOR_DOMAIN_AG, + [XFS_HEALTHMON_INODE] = XFS_HEALTH_MONITOR_DOMAIN_INODE, + [XFS_HEALTHMON_RTGROUP] = XFS_HEALTH_MONITOR_DOMAIN_RTGROUP, }; static const unsigned int type_map[] = { [XFS_HEALTHMON_RUNNING] = XFS_HEALTH_MONITOR_TYPE_RUNNING, [XFS_HEALTHMON_LOST] = XFS_HEALTH_MONITOR_TYPE_LOST, + [XFS_HEALTHMON_SICK] = XFS_HEALTH_MONITOR_TYPE_SICK, + [XFS_HEALTHMON_CORRUPT] = XFS_HEALTH_MONITOR_TYPE_CORRUPT, + [XFS_HEALTHMON_HEALTHY] = XFS_HEALTH_MONITOR_TYPE_HEALTHY, + [XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT, }; /* Render event as a V0 structure */ @@ -384,6 +549,22 @@ xfs_healthmon_format_v0( break; } break; + case XFS_HEALTHMON_FS: + hme.e.fs.mask = xfs_healthmon_fs_mask(event->fsmask); + break; + case XFS_HEALTHMON_RTGROUP: + hme.e.group.mask = xfs_healthmon_rtgroup_mask(event->grpmask); + hme.e.group.gno = event->group; + break; + case XFS_HEALTHMON_AG: + hme.e.group.mask = xfs_healthmon_perag_mask(event->grpmask); + hme.e.group.gno = event->group; + break; + case XFS_HEALTHMON_INODE: + hme.e.inode.mask = xfs_healthmon_inode_mask(event->imask); + hme.e.inode.ino = event->ino; + hme.e.inode.gen = event->gen; + break; default: break; } diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h index 3044bb46485d..121e59426395 100644 --- a/fs/xfs/xfs_healthmon.h +++ b/fs/xfs/xfs_healthmon.h @@ -71,10 +71,21 @@ enum xfs_healthmon_type { XFS_HEALTHMON_RUNNING, /* monitor running */ XFS_HEALTHMON_LOST, /* message lost */ XFS_HEALTHMON_UNMOUNT, /* filesystem is unmounting */ + + /* metadata health events */ + XFS_HEALTHMON_SICK, /* runtime corruption observed */ + XFS_HEALTHMON_CORRUPT, /* fsck reported corruption */ + XFS_HEALTHMON_HEALTHY, /* fsck reported healthy structure */ }; enum xfs_healthmon_domain { XFS_HEALTHMON_MOUNT, /* affects the whole fs */ + + /* metadata health events */ + XFS_HEALTHMON_FS, /* main filesystem metadata */ + XFS_HEALTHMON_AG, /* allocation group metadata */ + XFS_HEALTHMON_INODE, /* inode metadata */ + XFS_HEALTHMON_RTGROUP, /* realtime group metadata */ }; struct xfs_healthmon_event { @@ -90,9 +101,37 @@ struct xfs_healthmon_event { struct { uint64_t lostcount; }; + /* fs/rt metadata */ + struct { + /* XFS_SICK_* flags */ + unsigned int fsmask; + }; + /* ag/rtgroup metadata */ + struct { + /* XFS_SICK_(AG|RG)* flags */ + unsigned int grpmask; + unsigned int group; + }; + /* inode metadata */ + struct { + /* XFS_SICK_INO_* flags */ + unsigned int imask; + uint32_t gen; + xfs_ino_t ino; + }; }; }; +void xfs_healthmon_report_fs(struct xfs_mount *mp, + enum xfs_healthmon_type type, unsigned int old_mask, + unsigned int new_mask); +void xfs_healthmon_report_group(struct xfs_group *xg, + enum xfs_healthmon_type type, unsigned int old_mask, + unsigned int new_mask); +void xfs_healthmon_report_inode(struct xfs_inode *ip, + enum xfs_healthmon_type type, unsigned int old_mask, + unsigned int new_mask); + long xfs_ioc_health_monitor(struct file *file, struct xfs_health_monitor __user *arg); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 305cae8f497b..debe9846418a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -6009,15 +6009,29 @@ DEFINE_HEALTHMON_EVENT(xfs_healthmon_report_unmount); #define XFS_HEALTHMON_TYPE_STRINGS \ { XFS_HEALTHMON_LOST, "lost" }, \ - { XFS_HEALTHMON_UNMOUNT, "unmount" } + { XFS_HEALTHMON_UNMOUNT, "unmount" }, \ + { XFS_HEALTHMON_SICK, "sick" }, \ + { XFS_HEALTHMON_CORRUPT, "corrupt" }, \ + { XFS_HEALTHMON_HEALTHY, "healthy" } #define XFS_HEALTHMON_DOMAIN_STRINGS \ - { XFS_HEALTHMON_MOUNT, "mount" } + { XFS_HEALTHMON_MOUNT, "mount" }, \ + { XFS_HEALTHMON_FS, "fs" }, \ + { XFS_HEALTHMON_AG, "ag" }, \ + { XFS_HEALTHMON_INODE, "inode" }, \ + { XFS_HEALTHMON_RTGROUP, "rtgroup" } TRACE_DEFINE_ENUM(XFS_HEALTHMON_LOST); TRACE_DEFINE_ENUM(XFS_HEALTHMON_UNMOUNT); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_SICK); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_CORRUPT); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_HEALTHY); TRACE_DEFINE_ENUM(XFS_HEALTHMON_MOUNT); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_FS); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_AG); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_INODE); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_RTGROUP); DECLARE_EVENT_CLASS(xfs_healthmon_event_class, TP_PROTO(const struct xfs_healthmon *hm, @@ -6054,6 +6068,19 @@ DECLARE_EVENT_CLASS(xfs_healthmon_event_class, break; } break; + case XFS_HEALTHMON_FS: + __entry->mask = event->fsmask; + break; + case XFS_HEALTHMON_AG: + case XFS_HEALTHMON_RTGROUP: + __entry->mask = event->grpmask; + __entry->group = event->group; + break; + case XFS_HEALTHMON_INODE: + __entry->mask = event->imask; + __entry->ino = event->ino; + __entry->gen = event->gen; + break; } ), TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu", @@ -6081,6 +6108,105 @@ DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_format_overflow); DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_drop); DEFINE_HEALTHMONEVENT_EVENT(xfs_healthmon_merge); +TRACE_EVENT(xfs_healthmon_report_fs, + TP_PROTO(const struct xfs_healthmon *hm, + unsigned int old_mask, unsigned int new_mask, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, old_mask, new_mask, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned int, domain) + __field(unsigned int, old_mask) + __field(unsigned int, new_mask) + __field(unsigned int, fsmask) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = event->type; + __entry->domain = event->domain; + __entry->old_mask = old_mask; + __entry->new_mask = new_mask; + __entry->fsmask = event->fsmask; + ), + TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x fsmask 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS), + __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS), + __entry->old_mask, + __entry->new_mask, + __entry->fsmask) +); + +TRACE_EVENT(xfs_healthmon_report_group, + TP_PROTO(const struct xfs_healthmon *hm, + unsigned int old_mask, unsigned int new_mask, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, old_mask, new_mask, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned int, domain) + __field(unsigned int, old_mask) + __field(unsigned int, new_mask) + __field(unsigned int, grpmask) + __field(unsigned int, group) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = event->type; + __entry->domain = event->domain; + __entry->old_mask = old_mask; + __entry->new_mask = new_mask; + __entry->grpmask = event->grpmask; + __entry->group = event->group; + ), + TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x grpmask 0x%x group 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS), + __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS), + __entry->old_mask, + __entry->new_mask, + __entry->grpmask, + __entry->group) +); + +TRACE_EVENT(xfs_healthmon_report_inode, + TP_PROTO(const struct xfs_healthmon *hm, + unsigned int old_mask, unsigned int new_mask, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, old_mask, new_mask, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned int, domain) + __field(unsigned int, old_mask) + __field(unsigned int, new_mask) + __field(unsigned int, imask) + __field(unsigned long long, ino) + __field(unsigned int, gen) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = event->type; + __entry->domain = event->domain; + __entry->old_mask = old_mask; + __entry->new_mask = new_mask; + __entry->imask = event->imask; + __entry->ino = event->ino; + __entry->gen = event->gen; + ), + TP_printk("dev %d:%d type %s domain %s oldmask 0x%x newmask 0x%x imask 0x%x ino 0x%llx gen 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XFS_HEALTHMON_TYPE_STRINGS), + __print_symbolic(__entry->domain, XFS_HEALTHMON_DOMAIN_STRINGS), + __entry->old_mask, + __entry->new_mask, + __entry->imask, + __entry->ino, + __entry->gen) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH From 74c4795e50f816dbf5cf094691fc4f95bbc729ad Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:48 -0800 Subject: [PATCH 05/67] xfs: convey filesystem shutdown events to the health monitor Connect the filesystem shutdown code to the health monitor so that xfs can send events about that to the xfs_healer daemon. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 18 +++++++++++ fs/xfs/xfs_fsops.c | 2 ++ fs/xfs/xfs_healthmon.c | 70 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_healthmon.h | 9 ++++++ fs/xfs/xfs_trace.h | 23 +++++++++++++- 5 files changed, 121 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 04e1dcf61257..c8f7011a7ef8 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1028,6 +1028,9 @@ struct xfs_rtgroup_geometry { #define XFS_HEALTH_MONITOR_TYPE_CORRUPT (4) #define XFS_HEALTH_MONITOR_TYPE_HEALTHY (5) +/* filesystem shutdown */ +#define XFS_HEALTH_MONITOR_TYPE_SHUTDOWN (6) + /* lost events */ struct xfs_health_monitor_lost { __u64 count; @@ -1054,6 +1057,20 @@ struct xfs_health_monitor_inode { __u64 ino; }; +/* shutdown reasons */ +#define XFS_HEALTH_SHUTDOWN_META_IO_ERROR (1u << 0) +#define XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR (1u << 1) +#define XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT (1u << 2) +#define XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE (1u << 3) +#define XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK (1u << 4) +#define XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED (1u << 5) + +/* shutdown */ +struct xfs_health_monitor_shutdown { + /* XFS_HEALTH_SHUTDOWN_* flags */ + __u32 reasons; +}; + struct xfs_health_monitor_event { /* XFS_HEALTH_MONITOR_DOMAIN_* */ __u32 domain; @@ -1074,6 +1091,7 @@ struct xfs_health_monitor_event { struct xfs_health_monitor_fs fs; struct xfs_health_monitor_group group; struct xfs_health_monitor_inode inode; + struct xfs_health_monitor_shutdown shutdown; } e; /* zeroes */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index b7c21f68edc7..368173bf8a40 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -25,6 +25,7 @@ #include "xfs_rtrmap_btree.h" #include "xfs_rtrefcount_btree.h" #include "xfs_metafile.h" +#include "xfs_healthmon.h" #include @@ -544,6 +545,7 @@ xfs_do_force_shutdown( xfs_stack_trace(); fserror_report_shutdown(mp->m_super, GFP_KERNEL); + xfs_healthmon_report_shutdown(mp, flags); } /* diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index 0039a79822e8..97f764e79541 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -20,6 +20,7 @@ #include "xfs_rtgroup.h" #include "xfs_health.h" #include "xfs_healthmon.h" +#include "xfs_fsops.h" #include #include @@ -202,6 +203,11 @@ xfs_healthmon_merge_events( return false; } return false; + + case XFS_HEALTHMON_SHUTDOWN: + /* yes, we can race to shutdown */ + existing->flags |= new->flags; + return true; } return false; @@ -494,6 +500,28 @@ xfs_healthmon_report_inode( xfs_healthmon_put(hm); } +/* Add a shutdown event to the reporting queue. */ +void +xfs_healthmon_report_shutdown( + struct xfs_mount *mp, + uint32_t flags) +{ + struct xfs_healthmon_event event = { + .type = XFS_HEALTHMON_SHUTDOWN, + .domain = XFS_HEALTHMON_MOUNT, + .flags = flags, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + trace_xfs_healthmon_report_shutdown(hm, flags); + + xfs_healthmon_push(hm, &event); + xfs_healthmon_put(hm); +} + static inline void xfs_healthmon_reset_outbuf( struct xfs_healthmon *hm) @@ -502,6 +530,44 @@ xfs_healthmon_reset_outbuf( hm->bufhead = 0; } +struct flags_map { + unsigned int in_mask; + unsigned int out_mask; +}; + +static const struct flags_map shutdown_map[] = { + { SHUTDOWN_META_IO_ERROR, XFS_HEALTH_SHUTDOWN_META_IO_ERROR }, + { SHUTDOWN_LOG_IO_ERROR, XFS_HEALTH_SHUTDOWN_LOG_IO_ERROR }, + { SHUTDOWN_FORCE_UMOUNT, XFS_HEALTH_SHUTDOWN_FORCE_UMOUNT }, + { SHUTDOWN_CORRUPT_INCORE, XFS_HEALTH_SHUTDOWN_CORRUPT_INCORE }, + { SHUTDOWN_CORRUPT_ONDISK, XFS_HEALTH_SHUTDOWN_CORRUPT_ONDISK }, + { SHUTDOWN_DEVICE_REMOVED, XFS_HEALTH_SHUTDOWN_DEVICE_REMOVED }, +}; + +static inline unsigned int +__map_flags( + const struct flags_map *map, + size_t array_len, + unsigned int flags) +{ + const struct flags_map *m; + unsigned int ret = 0; + + for (m = map; m < map + array_len; m++) { + if (flags & m->in_mask) + ret |= m->out_mask; + } + + return ret; +} + +#define map_flags(map, flags) __map_flags((map), ARRAY_SIZE(map), (flags)) + +static inline unsigned int shutdown_mask(unsigned int in) +{ + return map_flags(shutdown_map, in); +} + static const unsigned int domain_map[] = { [XFS_HEALTHMON_MOUNT] = XFS_HEALTH_MONITOR_DOMAIN_MOUNT, [XFS_HEALTHMON_FS] = XFS_HEALTH_MONITOR_DOMAIN_FS, @@ -517,6 +583,7 @@ static const unsigned int type_map[] = { [XFS_HEALTHMON_CORRUPT] = XFS_HEALTH_MONITOR_TYPE_CORRUPT, [XFS_HEALTHMON_HEALTHY] = XFS_HEALTH_MONITOR_TYPE_HEALTHY, [XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT, + [XFS_HEALTHMON_SHUTDOWN] = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN, }; /* Render event as a V0 structure */ @@ -545,6 +612,9 @@ xfs_healthmon_format_v0( case XFS_HEALTHMON_LOST: hme.e.lost.count = event->lostcount; break; + case XFS_HEALTHMON_SHUTDOWN: + hme.e.shutdown.reasons = shutdown_mask(event->flags); + break; default: break; } diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h index 121e59426395..1f68b5d65a8e 100644 --- a/fs/xfs/xfs_healthmon.h +++ b/fs/xfs/xfs_healthmon.h @@ -72,6 +72,9 @@ enum xfs_healthmon_type { XFS_HEALTHMON_LOST, /* message lost */ XFS_HEALTHMON_UNMOUNT, /* filesystem is unmounting */ + /* filesystem shutdown */ + XFS_HEALTHMON_SHUTDOWN, + /* metadata health events */ XFS_HEALTHMON_SICK, /* runtime corruption observed */ XFS_HEALTHMON_CORRUPT, /* fsck reported corruption */ @@ -119,6 +122,10 @@ struct xfs_healthmon_event { uint32_t gen; xfs_ino_t ino; }; + /* shutdown */ + struct { + unsigned int flags; + }; }; }; @@ -132,6 +139,8 @@ void xfs_healthmon_report_inode(struct xfs_inode *ip, enum xfs_healthmon_type type, unsigned int old_mask, unsigned int new_mask); +void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags); + long xfs_ioc_health_monitor(struct file *file, struct xfs_health_monitor __user *arg); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index debe9846418a..ec99a6d3dd31 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -6012,7 +6012,8 @@ DEFINE_HEALTHMON_EVENT(xfs_healthmon_report_unmount); { XFS_HEALTHMON_UNMOUNT, "unmount" }, \ { XFS_HEALTHMON_SICK, "sick" }, \ { XFS_HEALTHMON_CORRUPT, "corrupt" }, \ - { XFS_HEALTHMON_HEALTHY, "healthy" } + { XFS_HEALTHMON_HEALTHY, "healthy" }, \ + { XFS_HEALTHMON_SHUTDOWN, "shutdown" } #define XFS_HEALTHMON_DOMAIN_STRINGS \ { XFS_HEALTHMON_MOUNT, "mount" }, \ @@ -6022,6 +6023,7 @@ DEFINE_HEALTHMON_EVENT(xfs_healthmon_report_unmount); { XFS_HEALTHMON_RTGROUP, "rtgroup" } TRACE_DEFINE_ENUM(XFS_HEALTHMON_LOST); +TRACE_DEFINE_ENUM(XFS_HEALTHMON_SHUTDOWN); TRACE_DEFINE_ENUM(XFS_HEALTHMON_UNMOUNT); TRACE_DEFINE_ENUM(XFS_HEALTHMON_SICK); TRACE_DEFINE_ENUM(XFS_HEALTHMON_CORRUPT); @@ -6063,6 +6065,9 @@ DECLARE_EVENT_CLASS(xfs_healthmon_event_class, switch (__entry->domain) { case XFS_HEALTHMON_MOUNT: switch (__entry->type) { + case XFS_HEALTHMON_SHUTDOWN: + __entry->mask = event->flags; + break; case XFS_HEALTHMON_LOST: __entry->lostcount = event->lostcount; break; @@ -6207,6 +6212,22 @@ TRACE_EVENT(xfs_healthmon_report_inode, __entry->gen) ); +TRACE_EVENT(xfs_healthmon_report_shutdown, + TP_PROTO(const struct xfs_healthmon *hm, uint32_t shutdown_flags), + TP_ARGS(hm, shutdown_flags), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(uint32_t, shutdown_flags) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->shutdown_flags = shutdown_flags; + ), + TP_printk("dev %d:%d shutdown_flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_flags(__entry->shutdown_flags, "|", XFS_SHUTDOWN_STRINGS)) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH From e76e0e3fc9957a5183ddc51dc84c3e471125ab06 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:49 -0800 Subject: [PATCH 06/67] xfs: convey externally discovered fsdax media errors to the health monitor Connect the fsdax media failure notification code to the health monitor so that xfs can send events about that to the xfs_healer daemon. Later on we'll add the ability for the xfs_scrub media scan (phase 6) to report the errors that it finds to the kernel so that those are also logged by xfs_healer. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 15 +++++++++ fs/xfs/xfs_healthmon.c | 66 +++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_healthmon.h | 16 +++++++++ fs/xfs/xfs_notify_failure.c | 17 +++++++--- fs/xfs/xfs_trace.c | 1 + fs/xfs/xfs_trace.h | 38 +++++++++++++++++++++ 6 files changed, 148 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index c8f7011a7ef8..38aeb1b0d87b 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1014,6 +1014,11 @@ struct xfs_rtgroup_geometry { #define XFS_HEALTH_MONITOR_DOMAIN_INODE (3) #define XFS_HEALTH_MONITOR_DOMAIN_RTGROUP (4) +/* disk events */ +#define XFS_HEALTH_MONITOR_DOMAIN_DATADEV (5) +#define XFS_HEALTH_MONITOR_DOMAIN_RTDEV (6) +#define XFS_HEALTH_MONITOR_DOMAIN_LOGDEV (7) + /* Health monitor event types */ /* status of the monitor itself */ @@ -1031,6 +1036,9 @@ struct xfs_rtgroup_geometry { /* filesystem shutdown */ #define XFS_HEALTH_MONITOR_TYPE_SHUTDOWN (6) +/* media errors */ +#define XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR (7) + /* lost events */ struct xfs_health_monitor_lost { __u64 count; @@ -1071,6 +1079,12 @@ struct xfs_health_monitor_shutdown { __u32 reasons; }; +/* disk media errors */ +struct xfs_health_monitor_media { + __u64 daddr; + __u64 bbcount; +}; + struct xfs_health_monitor_event { /* XFS_HEALTH_MONITOR_DOMAIN_* */ __u32 domain; @@ -1092,6 +1106,7 @@ struct xfs_health_monitor_event { struct xfs_health_monitor_group group; struct xfs_health_monitor_inode inode; struct xfs_health_monitor_shutdown shutdown; + struct xfs_health_monitor_media media; } e; /* zeroes */ diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index 97f764e79541..773bd4414d94 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -21,6 +21,7 @@ #include "xfs_health.h" #include "xfs_healthmon.h" #include "xfs_fsops.h" +#include "xfs_notify_failure.h" #include #include @@ -208,6 +209,19 @@ xfs_healthmon_merge_events( /* yes, we can race to shutdown */ existing->flags |= new->flags; return true; + + case XFS_HEALTHMON_MEDIA_ERROR: + /* physically adjacent errors can merge */ + if (existing->daddr + existing->bbcount == new->daddr) { + existing->bbcount += new->bbcount; + return true; + } + if (new->daddr + new->bbcount == existing->daddr) { + existing->daddr = new->daddr; + existing->bbcount += new->bbcount; + return true; + } + return false; } return false; @@ -522,6 +536,48 @@ xfs_healthmon_report_shutdown( xfs_healthmon_put(hm); } +static inline enum xfs_healthmon_domain +media_error_domain( + enum xfs_device fdev) +{ + switch (fdev) { + case XFS_DEV_DATA: + return XFS_HEALTHMON_DATADEV; + case XFS_DEV_LOG: + return XFS_HEALTHMON_LOGDEV; + case XFS_DEV_RT: + return XFS_HEALTHMON_RTDEV; + } + + ASSERT(0); + return 0; +} + +/* Add a media error event to the reporting queue. */ +void +xfs_healthmon_report_media( + struct xfs_mount *mp, + enum xfs_device fdev, + xfs_daddr_t daddr, + uint64_t bbcount) +{ + struct xfs_healthmon_event event = { + .type = XFS_HEALTHMON_MEDIA_ERROR, + .domain = media_error_domain(fdev), + .daddr = daddr, + .bbcount = bbcount, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(mp); + + if (!hm) + return; + + trace_xfs_healthmon_report_media(hm, fdev, &event); + + xfs_healthmon_push(hm, &event); + xfs_healthmon_put(hm); +} + static inline void xfs_healthmon_reset_outbuf( struct xfs_healthmon *hm) @@ -574,6 +630,9 @@ static const unsigned int domain_map[] = { [XFS_HEALTHMON_AG] = XFS_HEALTH_MONITOR_DOMAIN_AG, [XFS_HEALTHMON_INODE] = XFS_HEALTH_MONITOR_DOMAIN_INODE, [XFS_HEALTHMON_RTGROUP] = XFS_HEALTH_MONITOR_DOMAIN_RTGROUP, + [XFS_HEALTHMON_DATADEV] = XFS_HEALTH_MONITOR_DOMAIN_DATADEV, + [XFS_HEALTHMON_RTDEV] = XFS_HEALTH_MONITOR_DOMAIN_RTDEV, + [XFS_HEALTHMON_LOGDEV] = XFS_HEALTH_MONITOR_DOMAIN_LOGDEV, }; static const unsigned int type_map[] = { @@ -584,6 +643,7 @@ static const unsigned int type_map[] = { [XFS_HEALTHMON_HEALTHY] = XFS_HEALTH_MONITOR_TYPE_HEALTHY, [XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT, [XFS_HEALTHMON_SHUTDOWN] = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN, + [XFS_HEALTHMON_MEDIA_ERROR] = XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR, }; /* Render event as a V0 structure */ @@ -635,6 +695,12 @@ xfs_healthmon_format_v0( hme.e.inode.ino = event->ino; hme.e.inode.gen = event->gen; break; + case XFS_HEALTHMON_DATADEV: + case XFS_HEALTHMON_LOGDEV: + case XFS_HEALTHMON_RTDEV: + hme.e.media.daddr = event->daddr; + hme.e.media.bbcount = event->bbcount; + break; default: break; } diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h index 1f68b5d65a8e..54536aac4278 100644 --- a/fs/xfs/xfs_healthmon.h +++ b/fs/xfs/xfs_healthmon.h @@ -79,6 +79,9 @@ enum xfs_healthmon_type { XFS_HEALTHMON_SICK, /* runtime corruption observed */ XFS_HEALTHMON_CORRUPT, /* fsck reported corruption */ XFS_HEALTHMON_HEALTHY, /* fsck reported healthy structure */ + + /* media errors */ + XFS_HEALTHMON_MEDIA_ERROR, }; enum xfs_healthmon_domain { @@ -89,6 +92,11 @@ enum xfs_healthmon_domain { XFS_HEALTHMON_AG, /* allocation group metadata */ XFS_HEALTHMON_INODE, /* inode metadata */ XFS_HEALTHMON_RTGROUP, /* realtime group metadata */ + + /* media errors */ + XFS_HEALTHMON_DATADEV, + XFS_HEALTHMON_RTDEV, + XFS_HEALTHMON_LOGDEV, }; struct xfs_healthmon_event { @@ -126,6 +134,11 @@ struct xfs_healthmon_event { struct { unsigned int flags; }; + /* media errors */ + struct { + xfs_daddr_t daddr; + uint64_t bbcount; + }; }; }; @@ -141,6 +154,9 @@ void xfs_healthmon_report_inode(struct xfs_inode *ip, void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags); +void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev, + xfs_daddr_t daddr, uint64_t bbcount); + long xfs_ioc_health_monitor(struct file *file, struct xfs_health_monitor __user *arg); diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 6d5002413c2c..1edc4ddd10cd 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -22,6 +22,7 @@ #include "xfs_notify_failure.h" #include "xfs_rtgroup.h" #include "xfs_rtrmap_btree.h" +#include "xfs_healthmon.h" #include #include @@ -219,6 +220,8 @@ xfs_dax_notify_logdev_failure( if (error) return error; + xfs_healthmon_report_media(mp, XFS_DEV_LOG, daddr, bblen); + /* * In the pre-remove case the failure notification is attempting to * trigger a force unmount. The expectation is that the device is @@ -252,16 +255,20 @@ xfs_dax_notify_dev_failure( uint64_t bblen; struct xfs_group *xg = NULL; - if (!xfs_has_rmapbt(mp)) { - xfs_debug(mp, "notify_failure() needs rmapbt enabled!"); - return -EOPNOTSUPP; - } - error = xfs_dax_translate_range(xfs_group_type_buftarg(mp, type), offset, len, &daddr, &bblen); if (error) return error; + xfs_healthmon_report_media(mp, + type == XG_TYPE_RTG ? XFS_DEV_RT : XFS_DEV_DATA, + daddr, bblen); + + if (!xfs_has_rmapbt(mp)) { + xfs_debug(mp, "notify_failure() needs rmapbt enabled!"); + return -EOPNOTSUPP; + } + if (type == XG_TYPE_RTG) { start_bno = xfs_daddr_to_rtb(mp, daddr); end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1); diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index d42b864a3837..08ddab700a6c 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -53,6 +53,7 @@ #include "xfs_zone_priv.h" #include "xfs_health.h" #include "xfs_healthmon.h" +#include "xfs_notify_failure.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ec99a6d3dd31..fe7295a4e917 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -6086,6 +6086,12 @@ DECLARE_EVENT_CLASS(xfs_healthmon_event_class, __entry->ino = event->ino; __entry->gen = event->gen; break; + case XFS_HEALTHMON_DATADEV: + case XFS_HEALTHMON_LOGDEV: + case XFS_HEALTHMON_RTDEV: + __entry->offset = event->daddr; + __entry->length = event->bbcount; + break; } ), TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu", @@ -6228,6 +6234,38 @@ TRACE_EVENT(xfs_healthmon_report_shutdown, __print_flags(__entry->shutdown_flags, "|", XFS_SHUTDOWN_STRINGS)) ); +#define XFS_DEVICE_STRINGS \ + { XFS_DEV_DATA, "datadev" }, \ + { XFS_DEV_RT, "rtdev" }, \ + { XFS_DEV_LOG, "logdev" } + +TRACE_DEFINE_ENUM(XFS_DEV_DATA); +TRACE_DEFINE_ENUM(XFS_DEV_RT); +TRACE_DEFINE_ENUM(XFS_DEV_LOG); + +TRACE_EVENT(xfs_healthmon_report_media, + TP_PROTO(const struct xfs_healthmon *hm, enum xfs_device fdev, + const struct xfs_healthmon_event *event), + TP_ARGS(hm, fdev, event), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, error_dev) + __field(uint64_t, daddr) + __field(uint64_t, bbcount) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->error_dev = fdev; + __entry->daddr = event->daddr; + __entry->bbcount = event->bbcount; + ), + TP_printk("dev %d:%d %s daddr 0x%llx bbcount 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->error_dev, XFS_DEVICE_STRINGS), + __entry->daddr, + __entry->bbcount) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH From dfa8bad3a8796ce1ca4f1d15158e2ecfb9c5c014 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:50 -0800 Subject: [PATCH 07/67] xfs: convey file I/O errors to the health monitor Connect the fserror reporting to the health monitor so that xfs can send events about file I/O errors to the xfs_healer daemon. These events are entirely informational because xfs cannot regenerate user data, so hopefully the fsnotify I/O error event gets noticed by the relevant management systems. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 24 ++++++++++++ fs/xfs/xfs_healthmon.c | 85 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_healthmon.h | 21 +++++++++++ fs/xfs/xfs_super.c | 12 ++++++ fs/xfs/xfs_trace.c | 2 + fs/xfs/xfs_trace.h | 54 +++++++++++++++++++++++++++ 6 files changed, 198 insertions(+) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 38aeb1b0d87b..4ec1b2aede97 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1019,6 +1019,9 @@ struct xfs_rtgroup_geometry { #define XFS_HEALTH_MONITOR_DOMAIN_RTDEV (6) #define XFS_HEALTH_MONITOR_DOMAIN_LOGDEV (7) +/* file range events */ +#define XFS_HEALTH_MONITOR_DOMAIN_FILERANGE (8) + /* Health monitor event types */ /* status of the monitor itself */ @@ -1039,6 +1042,17 @@ struct xfs_rtgroup_geometry { /* media errors */ #define XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR (7) +/* pagecache I/O to a file range failed */ +#define XFS_HEALTH_MONITOR_TYPE_BUFREAD (8) +#define XFS_HEALTH_MONITOR_TYPE_BUFWRITE (9) + +/* direct I/O to a file range failed */ +#define XFS_HEALTH_MONITOR_TYPE_DIOREAD (10) +#define XFS_HEALTH_MONITOR_TYPE_DIOWRITE (11) + +/* out of band media error reported for a file range */ +#define XFS_HEALTH_MONITOR_TYPE_DATALOST (12) + /* lost events */ struct xfs_health_monitor_lost { __u64 count; @@ -1079,6 +1093,15 @@ struct xfs_health_monitor_shutdown { __u32 reasons; }; +/* file range events */ +struct xfs_health_monitor_filerange { + __u64 pos; + __u64 len; + __u64 ino; + __u32 gen; + __u32 error; +}; + /* disk media errors */ struct xfs_health_monitor_media { __u64 daddr; @@ -1107,6 +1130,7 @@ struct xfs_health_monitor_event { struct xfs_health_monitor_inode inode; struct xfs_health_monitor_shutdown shutdown; struct xfs_health_monitor_media media; + struct xfs_health_monitor_filerange filerange; } e; /* zeroes */ diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index 773bd4414d94..1bb4b0adf247 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -22,10 +22,12 @@ #include "xfs_healthmon.h" #include "xfs_fsops.h" #include "xfs_notify_failure.h" +#include "xfs_file.h" #include #include #include +#include /* * Live Health Monitoring @@ -222,6 +224,27 @@ xfs_healthmon_merge_events( return true; } return false; + + case XFS_HEALTHMON_BUFREAD: + case XFS_HEALTHMON_BUFWRITE: + case XFS_HEALTHMON_DIOREAD: + case XFS_HEALTHMON_DIOWRITE: + case XFS_HEALTHMON_DATALOST: + /* logically adjacent file ranges can merge */ + if (existing->fino != new->fino || existing->fgen != new->fgen) + return false; + + if (existing->fpos + existing->flen == new->fpos) { + existing->flen += new->flen; + return true; + } + + if (new->fpos + new->flen == existing->fpos) { + existing->fpos = new->fpos; + existing->flen += new->flen; + return true; + } + return false; } return false; @@ -578,6 +601,55 @@ xfs_healthmon_report_media( xfs_healthmon_put(hm); } +static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action) +{ + switch (action) { + case FSERR_BUFFERED_READ: + return XFS_HEALTHMON_BUFREAD; + case FSERR_BUFFERED_WRITE: + return XFS_HEALTHMON_BUFWRITE; + case FSERR_DIRECTIO_READ: + return XFS_HEALTHMON_DIOREAD; + case FSERR_DIRECTIO_WRITE: + return XFS_HEALTHMON_DIOWRITE; + case FSERR_DATA_LOST: + return XFS_HEALTHMON_DATALOST; + case FSERR_METADATA: + /* filtered out by xfs_fs_report_error */ + break; + } + + ASSERT(0); + return -1; +} + +/* Add a file io error event to the reporting queue. */ +void +xfs_healthmon_report_file_ioerror( + struct xfs_inode *ip, + const struct fserror_event *p) +{ + struct xfs_healthmon_event event = { + .type = file_ioerr_type(p->type), + .domain = XFS_HEALTHMON_FILERANGE, + .fino = ip->i_ino, + .fgen = VFS_I(ip)->i_generation, + .fpos = p->pos, + .flen = p->len, + /* send positive error number to userspace */ + .error = -p->error, + }; + struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount); + + if (!hm) + return; + + trace_xfs_healthmon_report_file_ioerror(hm, p); + + xfs_healthmon_push(hm, &event); + xfs_healthmon_put(hm); +} + static inline void xfs_healthmon_reset_outbuf( struct xfs_healthmon *hm) @@ -633,6 +705,7 @@ static const unsigned int domain_map[] = { [XFS_HEALTHMON_DATADEV] = XFS_HEALTH_MONITOR_DOMAIN_DATADEV, [XFS_HEALTHMON_RTDEV] = XFS_HEALTH_MONITOR_DOMAIN_RTDEV, [XFS_HEALTHMON_LOGDEV] = XFS_HEALTH_MONITOR_DOMAIN_LOGDEV, + [XFS_HEALTHMON_FILERANGE] = XFS_HEALTH_MONITOR_DOMAIN_FILERANGE, }; static const unsigned int type_map[] = { @@ -644,6 +717,11 @@ static const unsigned int type_map[] = { [XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT, [XFS_HEALTHMON_SHUTDOWN] = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN, [XFS_HEALTHMON_MEDIA_ERROR] = XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR, + [XFS_HEALTHMON_BUFREAD] = XFS_HEALTH_MONITOR_TYPE_BUFREAD, + [XFS_HEALTHMON_BUFWRITE] = XFS_HEALTH_MONITOR_TYPE_BUFWRITE, + [XFS_HEALTHMON_DIOREAD] = XFS_HEALTH_MONITOR_TYPE_DIOREAD, + [XFS_HEALTHMON_DIOWRITE] = XFS_HEALTH_MONITOR_TYPE_DIOWRITE, + [XFS_HEALTHMON_DATALOST] = XFS_HEALTH_MONITOR_TYPE_DATALOST, }; /* Render event as a V0 structure */ @@ -701,6 +779,13 @@ xfs_healthmon_format_v0( hme.e.media.daddr = event->daddr; hme.e.media.bbcount = event->bbcount; break; + case XFS_HEALTHMON_FILERANGE: + hme.e.filerange.ino = event->fino; + hme.e.filerange.gen = event->fgen; + hme.e.filerange.pos = event->fpos; + hme.e.filerange.len = event->flen; + hme.e.filerange.error = abs(event->error); + break; default: break; } diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h index 54536aac4278..0e936507037f 100644 --- a/fs/xfs/xfs_healthmon.h +++ b/fs/xfs/xfs_healthmon.h @@ -82,6 +82,13 @@ enum xfs_healthmon_type { /* media errors */ XFS_HEALTHMON_MEDIA_ERROR, + + /* file range events */ + XFS_HEALTHMON_BUFREAD, + XFS_HEALTHMON_BUFWRITE, + XFS_HEALTHMON_DIOREAD, + XFS_HEALTHMON_DIOWRITE, + XFS_HEALTHMON_DATALOST, }; enum xfs_healthmon_domain { @@ -97,6 +104,9 @@ enum xfs_healthmon_domain { XFS_HEALTHMON_DATADEV, XFS_HEALTHMON_RTDEV, XFS_HEALTHMON_LOGDEV, + + /* file range events */ + XFS_HEALTHMON_FILERANGE, }; struct xfs_healthmon_event { @@ -139,6 +149,14 @@ struct xfs_healthmon_event { xfs_daddr_t daddr; uint64_t bbcount; }; + /* file range events */ + struct { + xfs_ino_t fino; + loff_t fpos; + uint64_t flen; + uint32_t fgen; + int error; + }; }; }; @@ -157,6 +175,9 @@ void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags); void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev, xfs_daddr_t daddr, uint64_t bbcount); +void xfs_healthmon_report_file_ioerror(struct xfs_inode *ip, + const struct fserror_event *p); + long xfs_ioc_health_monitor(struct file *file, struct xfs_health_monitor __user *arg); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bc71aa9dcee8..d0cef9ce6b89 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -47,12 +47,14 @@ #include "xfs_parent.h" #include "xfs_rtalloc.h" #include "xfs_zone_alloc.h" +#include "xfs_healthmon.h" #include "scrub/stats.h" #include "scrub/rcbag_btree.h" #include #include #include +#include static const struct super_operations xfs_super_operations; @@ -1301,6 +1303,15 @@ xfs_fs_show_stats( return 0; } +static void +xfs_fs_report_error( + const struct fserror_event *event) +{ + /* healthmon already knows about non-inode and metadata errors */ + if (event->inode && event->type != FSERR_METADATA) + xfs_healthmon_report_file_ioerror(XFS_I(event->inode), event); +} + static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, @@ -1317,6 +1328,7 @@ static const struct super_operations xfs_super_operations = { .free_cached_objects = xfs_fs_free_cached_objects, .shutdown = xfs_fs_shutdown, .show_stats = xfs_fs_show_stats, + .report_error = xfs_fs_report_error, }; static int diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 08ddab700a6c..3ae449646eb9 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -54,6 +54,8 @@ #include "xfs_health.h" #include "xfs_healthmon.h" #include "xfs_notify_failure.h" +#include "xfs_file.h" +#include /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index fe7295a4e917..0cf487775358 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -105,6 +105,7 @@ struct xfs_rtgroup; struct xfs_open_zone; struct xfs_healthmon_event; struct xfs_healthmon; +struct fserror_event; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -6092,6 +6093,12 @@ DECLARE_EVENT_CLASS(xfs_healthmon_event_class, __entry->offset = event->daddr; __entry->length = event->bbcount; break; + case XFS_HEALTHMON_FILERANGE: + __entry->ino = event->fino; + __entry->gen = event->fgen; + __entry->offset = event->fpos; + __entry->length = event->flen; + break; } ), TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu", @@ -6266,6 +6273,53 @@ TRACE_EVENT(xfs_healthmon_report_media, __entry->bbcount) ); +#define FS_ERROR_STRINGS \ + { FSERR_BUFFERED_READ, "buffered_read" }, \ + { FSERR_BUFFERED_WRITE, "buffered_write" }, \ + { FSERR_DIRECTIO_READ, "directio_read" }, \ + { FSERR_DIRECTIO_WRITE, "directio_write" }, \ + { FSERR_DATA_LOST, "data_lost" }, \ + { FSERR_METADATA, "metadata" } + +TRACE_DEFINE_ENUM(FSERR_BUFFERED_READ); +TRACE_DEFINE_ENUM(FSERR_BUFFERED_WRITE); +TRACE_DEFINE_ENUM(FSERR_DIRECTIO_READ); +TRACE_DEFINE_ENUM(FSERR_DIRECTIO_WRITE); +TRACE_DEFINE_ENUM(FSERR_DATA_LOST); +TRACE_DEFINE_ENUM(FSERR_METADATA); + +TRACE_EVENT(xfs_healthmon_report_file_ioerror, + TP_PROTO(const struct xfs_healthmon *hm, + const struct fserror_event *p), + TP_ARGS(hm, p), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, type) + __field(unsigned long long, ino) + __field(unsigned int, gen) + __field(long long, pos) + __field(unsigned long long, len) + __field(int, error) + ), + TP_fast_assign( + __entry->dev = hm->dev; + __entry->type = p->type; + __entry->ino = XFS_I(p->inode)->i_ino; + __entry->gen = p->inode->i_generation; + __entry->pos = p->pos; + __entry->len = p->len; + __entry->error = p->error; + ), + TP_printk("dev %d:%d ino 0x%llx gen 0x%x op %s pos 0x%llx bytecount 0x%llx error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->gen, + __print_symbolic(__entry->type, FS_ERROR_STRINGS), + __entry->pos, + __entry->len, + __entry->error) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH From c0e719cb36672b69a06da65ac4ec71e9a599dff5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:51 -0800 Subject: [PATCH 08/67] xfs: allow toggling verbose logging on the health monitoring file Make it so that we can reconfigure the health monitoring device by calling the XFS_IOC_HEALTH_MONITOR ioctl on it. As of right now we can only toggle the verbose flag, but this is less annoying than having to closing the monitor fd and reopen it. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_healthmon.c | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index 1bb4b0adf247..4a8cbd879322 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -23,6 +23,7 @@ #include "xfs_fsops.h" #include "xfs_notify_failure.h" #include "xfs_file.h" +#include "xfs_ioctl.h" #include #include @@ -1066,12 +1067,55 @@ xfs_healthmon_show_fdinfo( mutex_unlock(&hm->lock); } +/* Reconfigure the health monitor. */ +STATIC long +xfs_healthmon_reconfigure( + struct file *file, + unsigned int cmd, + void __user *arg) +{ + struct xfs_health_monitor hmo; + struct xfs_healthmon *hm = file->private_data; + + if (copy_from_user(&hmo, arg, sizeof(hmo))) + return -EFAULT; + + if (!xfs_healthmon_validate(&hmo)) + return -EINVAL; + + mutex_lock(&hm->lock); + hm->verbose = !!(hmo.flags & XFS_HEALTH_MONITOR_VERBOSE); + mutex_unlock(&hm->lock); + + return 0; +} + +/* Handle ioctls for the health monitoring thread. */ +STATIC long +xfs_healthmon_ioctl( + struct file *file, + unsigned int cmd, + unsigned long p) +{ + void __user *arg = (void __user *)p; + + switch (cmd) { + case XFS_IOC_HEALTH_MONITOR: + return xfs_healthmon_reconfigure(file, cmd, arg); + default: + break; + } + + return -ENOTTY; +} + static const struct file_operations xfs_healthmon_fops = { .owner = THIS_MODULE, .show_fdinfo = xfs_healthmon_show_fdinfo, .read_iter = xfs_healthmon_read_iter, .poll = xfs_healthmon_poll, .release = xfs_healthmon_release, + .unlocked_ioctl = xfs_healthmon_ioctl, }; /* From 8b85dc4090e1c72c6d42acd823514cce67cd54fc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:51 -0800 Subject: [PATCH 09/67] xfs: check if an open file is on the health monitored fs Create a new ioctl for the healthmon file that checks that a given fd points to the same filesystem that the healthmon file is monitoring. This allows xfs_healer to check that when it reopens a mountpoint to perform repairs, the file that it gets matches the filesystem that generated the corruption report. (Note that xfs_healer doesn't maintain an open fd to a filesystem that it's monitoring so that it doesn't pin the mount.) Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 12 +++++++++++- fs/xfs/xfs_healthmon.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 4ec1b2aede97..a01303c5de6c 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1151,6 +1151,15 @@ struct xfs_health_monitor { /* Initial return format version */ #define XFS_HEALTH_MONITOR_FMT_V0 (0) +/* + * Check that a given fd points to the same filesystem that the health monitor + * is monitoring. + */ +struct xfs_health_file_on_monitored_fs { + __s32 fd; + __u32 flags; /* zero for now */ +}; + /* * ioctl commands that are used by Linux filesystems */ @@ -1191,7 +1200,8 @@ struct xfs_health_monitor { #define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head) #define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry) #define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor) - +#define XFS_IOC_HEALTH_FD_ON_MONITORED_FS \ + _IOW ('X', 69, struct xfs_health_file_on_monitored_fs) /* * ioctl commands that replace IRIX syssgi()'s */ diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c index 4a8cbd879322..3030fa93c1e5 100644 --- a/fs/xfs/xfs_healthmon.c +++ b/fs/xfs/xfs_healthmon.c @@ -1090,6 +1090,38 @@ xfs_healthmon_reconfigure( return 0; } +/* Does the fd point to the same filesystem as the one we're monitoring? */ +STATIC long +xfs_healthmon_file_on_monitored_fs( + struct file *file, + unsigned int cmd, + void __user *arg) +{ + struct xfs_health_file_on_monitored_fs hms; + struct xfs_healthmon *hm = file->private_data; + struct inode *hms_inode; + + if (copy_from_user(&hms, arg, sizeof(hms))) + return -EFAULT; + + if (hms.flags) + return -EINVAL; + + CLASS(fd, hms_fd)(hms.fd); + if (fd_empty(hms_fd)) + return -EBADF; + + hms_inode = file_inode(fd_file(hms_fd)); + mutex_lock(&hm->lock); + if (hm->mount_cookie != (uintptr_t)hms_inode->i_sb) { + mutex_unlock(&hm->lock); + return -ESTALE; + } + + mutex_unlock(&hm->lock); + return 0; +} + /* Handle ioctls for the health monitoring thread. */ STATIC long xfs_healthmon_ioctl( @@ -1102,6 +1134,8 @@ xfs_healthmon_ioctl( switch (cmd) { case XFS_IOC_HEALTH_MONITOR: return xfs_healthmon_reconfigure(file, cmd, arg); + case XFS_IOC_HEALTH_FD_ON_MONITORED_FS: + return xfs_healthmon_file_on_monitored_fs(file, cmd, arg); default: break; } From b8accfd65d31f25b9df15ec2419179b6fa0b21d5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 18:06:52 -0800 Subject: [PATCH 10/67] xfs: add media verification ioctl Add a new privileged ioctl so that xfs_scrub can ask the kernel to verify the media of the devices backing an xfs filesystem, and have any resulting media errors reported to fsnotify and xfs_healer. To accomplish this, the kernel allocates a folio between the base page size and 1MB, and issues read IOs to a gradually incrementing range of one of the storage devices underlying an xfs filesystem. If any error occurs, that raw error is reported to the calling process. If the error happens to be one of the ones that the kernel considers indicative of data loss, then it will also be reported to xfs_healthmon and fsnotify. Driving the verification from the kernel enables xfs (and by extension xfs_scrub) to have precise control over the size and error handling of IOs that are issued to the underlying block device, and to emit notifications about problems to other relevant kernel subsystems immediately. Note that the caller is also allowed to reduce the size of the IO and to ask for a relaxation period after each IO. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_fs.h | 30 +++ fs/xfs/xfs_ioctl.c | 3 + fs/xfs/xfs_trace.h | 98 +++++++++ fs/xfs/xfs_verify_media.c | 445 ++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_verify_media.h | 13 ++ 6 files changed, 590 insertions(+) create mode 100644 fs/xfs/xfs_verify_media.c create mode 100644 fs/xfs/xfs_verify_media.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 1b7385e23b34..9f7133e02576 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -106,6 +106,7 @@ xfs-y += xfs_aops.o \ xfs_symlink.o \ xfs_sysfs.o \ xfs_trans.o \ + xfs_verify_media.o \ xfs_xattr.o # low-level transaction/log code diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index a01303c5de6c..d165de607d17 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1160,6 +1160,34 @@ struct xfs_health_file_on_monitored_fs { __u32 flags; /* zero for now */ }; +/* Verify the media of the underlying devices */ +struct xfs_verify_media { + __u32 me_dev; /* I: XFS_DEV_{DATA,LOG,RT} */ + __u32 me_flags; /* I: XFS_VERIFY_MEDIA_* */ + + /* + * IO: inclusive start of disk range to verify, in 512b blocks. + * Will be adjusted upwards as media verification succeeds. + */ + __u64 me_start_daddr; + + /* + * IO: exclusive end of the disk range to verify, in 512b blocks. + * Can be adjusted downwards to match device size. + */ + __u64 me_end_daddr; + + __u32 me_ioerror; /* O: I/O error (positive) */ + __u32 me_max_io_size; /* I: maximum IO size in bytes */ + + __u32 me_rest_us; /* I: rest time between IOs, usecs */ + __u32 me_pad; /* zero */ +}; + +#define XFS_VERIFY_MEDIA_REPORT (1 << 0) /* report to fsnotify */ + +#define XFS_VERIFY_MEDIA_FLAGS (XFS_VERIFY_MEDIA_REPORT) + /* * ioctl commands that are used by Linux filesystems */ @@ -1202,6 +1230,8 @@ struct xfs_health_file_on_monitored_fs { #define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor) #define XFS_IOC_HEALTH_FD_ON_MONITORED_FS \ _IOW ('X', 69, struct xfs_health_file_on_monitored_fs) +#define XFS_IOC_VERIFY_MEDIA _IOWR('X', 70, struct xfs_verify_media) + /* * ioctl commands that replace IRIX syssgi()'s */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c04c41ca924e..80a005999d2d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -42,6 +42,7 @@ #include "xfs_handle.h" #include "xfs_rtgroup.h" #include "xfs_healthmon.h" +#include "xfs_verify_media.h" #include #include @@ -1422,6 +1423,8 @@ xfs_file_ioctl( case XFS_IOC_HEALTH_MONITOR: return xfs_ioc_health_monitor(filp, arg); + case XFS_IOC_VERIFY_MEDIA: + return xfs_ioc_verify_media(filp, arg); default: return -ENOTTY; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 0cf487775358..3483461cf462 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -6320,6 +6320,104 @@ TRACE_EVENT(xfs_healthmon_report_file_ioerror, __entry->error) ); +TRACE_EVENT(xfs_verify_media, + TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me, + dev_t fdev, xfs_daddr_t daddr, uint64_t bbcount, + const struct folio *folio), + TP_ARGS(mp, me, fdev, daddr, bbcount, folio), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, fdev) + __field(xfs_daddr_t, start_daddr) + __field(xfs_daddr_t, end_daddr) + __field(unsigned int, flags) + __field(xfs_daddr_t, daddr) + __field(uint64_t, bbcount) + __field(unsigned int, bufsize) + ), + TP_fast_assign( + __entry->dev = mp->m_ddev_targp->bt_dev; + __entry->fdev = fdev; + __entry->start_daddr = me->me_start_daddr; + __entry->end_daddr = me->me_end_daddr; + __entry->flags = me->me_flags; + __entry->daddr = daddr; + __entry->bbcount = bbcount; + __entry->bufsize = folio_size(folio); + ), + TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx flags 0x%x daddr 0x%llx bbcount 0x%llx bufsize 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->fdev), MINOR(__entry->fdev), + __entry->start_daddr, + __entry->end_daddr, + __entry->flags, + __entry->daddr, + __entry->bbcount, + __entry->bufsize) +); + +TRACE_EVENT(xfs_verify_media_end, + TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me, + dev_t fdev), + TP_ARGS(mp, me, fdev), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, fdev) + __field(xfs_daddr_t, start_daddr) + __field(xfs_daddr_t, end_daddr) + __field(int, ioerror) + ), + TP_fast_assign( + __entry->dev = mp->m_ddev_targp->bt_dev; + __entry->fdev = fdev; + __entry->start_daddr = me->me_start_daddr; + __entry->end_daddr = me->me_end_daddr; + __entry->ioerror = me->me_ioerror; + ), + TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx ioerror %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->fdev), MINOR(__entry->fdev), + __entry->start_daddr, + __entry->end_daddr, + __entry->ioerror) +); + +TRACE_EVENT(xfs_verify_media_error, + TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me, + dev_t fdev, xfs_daddr_t daddr, uint64_t bbcount, + blk_status_t status), + TP_ARGS(mp, me, fdev, daddr, bbcount, status), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, fdev) + __field(xfs_daddr_t, start_daddr) + __field(xfs_daddr_t, end_daddr) + __field(unsigned int, flags) + __field(xfs_daddr_t, daddr) + __field(uint64_t, bbcount) + __field(int, error) + ), + TP_fast_assign( + __entry->dev = mp->m_ddev_targp->bt_dev; + __entry->fdev = fdev; + __entry->start_daddr = me->me_start_daddr; + __entry->end_daddr = me->me_end_daddr; + __entry->flags = me->me_flags; + __entry->daddr = daddr; + __entry->bbcount = bbcount; + __entry->error = blk_status_to_errno(status); + ), + TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx flags 0x%x daddr 0x%llx bbcount 0x%llx error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->fdev), MINOR(__entry->fdev), + __entry->start_daddr, + __entry->end_daddr, + __entry->flags, + __entry->daddr, + __entry->bbcount, + __entry->error) +); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c new file mode 100644 index 000000000000..f4f620c98d92 --- /dev/null +++ b/fs/xfs/xfs_verify_media.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_bit.h" +#include "xfs_btree.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_trans.h" +#include "xfs_alloc.h" +#include "xfs_ag.h" +#include "xfs_rmap.h" +#include "xfs_rmap_btree.h" +#include "xfs_rtgroup.h" +#include "xfs_rtrmap_btree.h" +#include "xfs_health.h" +#include "xfs_healthmon.h" +#include "xfs_trace.h" +#include "xfs_verify_media.h" + +#include + +struct xfs_group_data_lost { + xfs_agblock_t startblock; + xfs_extlen_t blockcount; +}; + +/* Report lost file data from rmap records */ +static int +xfs_verify_report_data_lost( + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *rec, + void *data) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_inode *ip; + struct xfs_group_data_lost *lost = data; + xfs_fileoff_t fileoff = rec->rm_offset; + xfs_extlen_t blocks = rec->rm_blockcount; + const bool is_attr = + (rec->rm_flags & XFS_RMAP_ATTR_FORK); + const xfs_agblock_t lost_end = + lost->startblock + lost->blockcount; + const xfs_agblock_t rmap_end = + rec->rm_startblock + rec->rm_blockcount; + int error = 0; + + if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner)) + return 0; + + error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, 0, 0, &ip); + if (error) + return 0; + + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) { + xfs_bmap_mark_sick(ip, is_attr ? XFS_ATTR_FORK : XFS_DATA_FORK); + goto out_rele; + } + + if (is_attr) { + xfs_inode_mark_sick(ip, XFS_SICK_INO_XATTR); + goto out_rele; + } + + if (lost->startblock > rec->rm_startblock) { + fileoff += lost->startblock - rec->rm_startblock; + blocks -= lost->startblock - rec->rm_startblock; + } + if (rmap_end > lost_end) + blocks -= rmap_end - lost_end; + + fserror_report_data_lost(VFS_I(ip), XFS_FSB_TO_B(mp, fileoff), + XFS_FSB_TO_B(mp, blocks), GFP_NOFS); + +out_rele: + xfs_irele(ip); + return 0; +} + +/* Walk reverse mappings to look for all file data loss */ +static int +xfs_verify_report_losses( + struct xfs_mount *mp, + enum xfs_group_type type, + xfs_daddr_t daddr, + u64 bblen) +{ + struct xfs_group *xg = NULL; + struct xfs_trans *tp; + xfs_fsblock_t start_bno, end_bno; + uint32_t start_gno, end_gno; + int error; + + if (type == XG_TYPE_RTG) { + start_bno = xfs_daddr_to_rtb(mp, daddr); + end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1); + } else { + start_bno = XFS_DADDR_TO_FSB(mp, daddr); + end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1); + } + + tp = xfs_trans_alloc_empty(mp); + start_gno = xfs_fsb_to_gno(mp, start_bno, type); + end_gno = xfs_fsb_to_gno(mp, end_bno, type); + while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) { + struct xfs_buf *agf_bp = NULL; + struct xfs_rtgroup *rtg = NULL; + struct xfs_btree_cur *cur; + struct xfs_rmap_irec ri_low = { }; + struct xfs_rmap_irec ri_high; + struct xfs_group_data_lost lost; + + if (type == XG_TYPE_AG) { + struct xfs_perag *pag = to_perag(xg); + + error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp); + if (error) { + xfs_perag_put(pag); + break; + } + + cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag); + } else { + rtg = to_rtg(xg); + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + cur = xfs_rtrmapbt_init_cursor(tp, rtg); + } + + /* + * Set the rmap range from ri_low to ri_high, which represents + * a [start, end] where we looking for the files or metadata. + */ + memset(&ri_high, 0xFF, sizeof(ri_high)); + if (xg->xg_gno == start_gno) + ri_low.rm_startblock = + xfs_fsb_to_gbno(mp, start_bno, type); + if (xg->xg_gno == end_gno) + ri_high.rm_startblock = + xfs_fsb_to_gbno(mp, end_bno, type); + + lost.startblock = ri_low.rm_startblock; + lost.blockcount = min(xg->xg_block_count, + ri_high.rm_startblock + 1) - + ri_low.rm_startblock; + + error = xfs_rmap_query_range(cur, &ri_low, &ri_high, + xfs_verify_report_data_lost, &lost); + xfs_btree_del_cursor(cur, error); + if (agf_bp) + xfs_trans_brelse(tp, agf_bp); + if (rtg) + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + if (error) { + xfs_group_put(xg); + break; + } + } + + xfs_trans_cancel(tp); + return 0; +} + +/* + * Compute the desired verify IO size. + * + * To minimize command overhead, we'd like to create bios that are 1MB, though + * we allow the user to ask for a smaller size. + */ +static unsigned int +xfs_verify_iosize( + const struct xfs_verify_media *me, + struct xfs_buftarg *btp, + uint64_t bbcount) +{ + unsigned int iosize = + min_not_zero(SZ_1M, me->me_max_io_size); + + BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT); + ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev)); + + return clamp(iosize, bdev_logical_block_size(btp->bt_bdev), + BBTOB(bbcount)); +} + +/* Allocate as much memory as we can get for verification buffer. */ +static struct folio * +xfs_verify_alloc_folio( + const unsigned int iosize) +{ + unsigned int order = get_order(iosize); + + while (order > 0) { + struct folio *folio = + folio_alloc(GFP_KERNEL | __GFP_NORETRY, order); + + if (folio) + return folio; + order--; + } + + return folio_alloc(GFP_KERNEL, 0); +} + +/* Report any kind of problem verifying media */ +static void +xfs_verify_media_error( + struct xfs_mount *mp, + struct xfs_verify_media *me, + struct xfs_buftarg *btp, + xfs_daddr_t daddr, + unsigned int bio_bbcount, + blk_status_t bio_status) +{ + trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr, + bio_bbcount, bio_status); + + /* + * Pass any error, I/O or otherwise, up to the caller if we didn't + * successfully verify any bytes at all. + */ + if (me->me_start_daddr == daddr) + me->me_ioerror = -blk_status_to_errno(bio_status); + + /* + * PI validation failures, medium errors, or general IO errors are + * treated as indicators of data loss. Everything else are (hopefully) + * transient errors and are not reported to healthmon or fsnotify. + */ + switch (bio_status) { + case BLK_STS_PROTECTION: + case BLK_STS_IOERR: + case BLK_STS_MEDIUM: + break; + default: + return; + } + + if (!(me->me_flags & XFS_VERIFY_MEDIA_REPORT)) + return; + + xfs_healthmon_report_media(mp, me->me_dev, daddr, bio_bbcount); + + if (!xfs_has_rmapbt(mp)) + return; + + switch (me->me_dev) { + case XFS_DEV_DATA: + xfs_verify_report_losses(mp, XG_TYPE_AG, daddr, bio_bbcount); + break; + case XFS_DEV_RT: + xfs_verify_report_losses(mp, XG_TYPE_RTG, daddr, bio_bbcount); + break; + } +} + +/* Verify the media of an xfs device by submitting read requests to the disk. */ +static int +xfs_verify_media( + struct xfs_mount *mp, + struct xfs_verify_media *me) +{ + struct xfs_buftarg *btp = NULL; + struct bio *bio; + struct folio *folio; + xfs_daddr_t daddr; + uint64_t bbcount; + int error = 0; + + me->me_ioerror = 0; + + switch (me->me_dev) { + case XFS_DEV_DATA: + btp = mp->m_ddev_targp; + break; + case XFS_DEV_LOG: + if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) + btp = mp->m_logdev_targp; + break; + case XFS_DEV_RT: + btp = mp->m_rtdev_targp; + break; + } + if (!btp) + return -ENODEV; + + /* + * If the caller told us to verify beyond the end of the disk, tell the + * user exactly where that was. + */ + if (me->me_end_daddr > btp->bt_nr_sectors) + me->me_end_daddr = btp->bt_nr_sectors; + + /* start and end have to be aligned to the lba size */ + if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr), + bdev_logical_block_size(btp->bt_bdev))) + return -EINVAL; + + /* + * end_daddr is the exclusive end of the range, so if start_daddr + * reaches there (or beyond), there's no work to be done. + */ + if (me->me_start_daddr >= me->me_end_daddr) + return 0; + + /* + * There are three ranges involved here: + * + * - [me->me_start_daddr, me->me_end_daddr) is the range that the + * user wants to verify. end_daddr can be beyond the end of the + * disk; we'll constrain it to the end if necessary. + * + * - [daddr, me->me_end_daddr) is the range that we have not yet + * verified. We update daddr after each successful read. + * me->me_start_daddr is set to daddr before returning. + * + * - [daddr, daddr + bio_bbcount) is the range that we're currently + * verifying. + */ + daddr = me->me_start_daddr; + bbcount = min_t(sector_t, me->me_end_daddr, btp->bt_nr_sectors) - + me->me_start_daddr; + + folio = xfs_verify_alloc_folio(xfs_verify_iosize(me, btp, bbcount)); + if (!folio) + return -ENOMEM; + + trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount, + folio); + + bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL); + if (!bio) { + error = -ENOMEM; + goto out_folio; + } + + while (bbcount > 0) { + unsigned int bio_bbcount; + blk_status_t bio_status; + + bio_reset(bio, btp->bt_bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = daddr; + bio_add_folio_nofail(bio, folio, + min(bbcount << SECTOR_SHIFT, folio_size(folio)), + 0); + + /* + * Save the length of the bio before we submit it, because we + * need the original daddr and length for reporting IO errors + * if the bio fails. + */ + bio_bbcount = bio->bi_iter.bi_size >> SECTOR_SHIFT; + submit_bio_wait(bio); + bio_status = bio->bi_status; + if (bio_status != BLK_STS_OK) { + xfs_verify_media_error(mp, me, btp, daddr, bio_bbcount, + bio_status); + error = 0; + break; + } + + daddr += bio_bbcount; + bbcount -= bio_bbcount; + + if (bbcount == 0) + break; + + if (me->me_rest_us) { + ktime_t expires; + + expires = ktime_add_ns(ktime_get(), + me->me_rest_us * 1000); + set_current_state(TASK_KILLABLE); + schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); + } + + if (fatal_signal_pending(current)) { + error = -EINTR; + break; + } + + cond_resched(); + } + + bio_put(bio); +out_folio: + folio_put(folio); + + if (error) + return error; + + /* + * Advance start_daddr to the end of what we verified if there wasn't + * an operational error. + */ + me->me_start_daddr = daddr; + trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev); + return 0; +} + +int +xfs_ioc_verify_media( + struct file *file, + struct xfs_verify_media __user *arg) +{ + struct xfs_verify_media me; + struct xfs_inode *ip = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip->i_mount; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&me, arg, sizeof(me))) + return -EFAULT; + + if (me.me_pad) + return -EINVAL; + if (me.me_flags & ~XFS_VERIFY_MEDIA_FLAGS) + return -EINVAL; + + switch (me.me_dev) { + case XFS_DEV_DATA: + case XFS_DEV_LOG: + case XFS_DEV_RT: + break; + default: + return -EINVAL; + } + + error = xfs_verify_media(mp, &me); + if (error) + return error; + + if (copy_to_user(arg, &me, sizeof(me))) + return -EFAULT; + + return 0; +} diff --git a/fs/xfs/xfs_verify_media.h b/fs/xfs/xfs_verify_media.h new file mode 100644 index 000000000000..dc6eee9c8863 --- /dev/null +++ b/fs/xfs/xfs_verify_media.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2026 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_VERIFY_MEDIA_H__ +#define __XFS_VERIFY_MEDIA_H__ + +struct xfs_verify_media; +int xfs_ioc_verify_media(struct file *file, + struct xfs_verify_media __user *arg); + +#endif /* __XFS_VERIFY_MEDIA_H__ */ From 2d4521e4c00cafcd195f5e6fe5ee75b5c0680b8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:17 +0100 Subject: [PATCH 11/67] xfs: add a xlog_write_one_vec helper Add a wrapper for xlog_write for the two callers who need to build a log_vec and add it to a single-entry chain instead of duplicating the code. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 35 +++++++++++++++++++++-------------- fs/xfs/xfs_log_cil.c | 11 +---------- fs/xfs/xfs_log_priv.h | 2 ++ 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index d4544ccafea5..c6fa258df844 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -848,6 +848,26 @@ xlog_wait_on_iclog( return 0; } +int +xlog_write_one_vec( + struct xlog *log, + struct xfs_cil_ctx *ctx, + struct xfs_log_iovec *reg, + struct xlog_ticket *ticket) +{ + struct xfs_log_vec lv = { + .lv_niovecs = 1, + .lv_iovecp = reg, + }; + LIST_HEAD (lv_chain); + + /* account for space used by record data */ + ticket->t_curr_res -= reg->i_len; + + list_add(&lv.lv_list, &lv_chain); + return xlog_write(log, ctx, &lv_chain, ticket, reg->i_len); +} + /* * Write out an unmount record using the ticket provided. We have to account for * the data space used in the unmount ticket as this write is not done from a @@ -876,21 +896,8 @@ xlog_write_unmount_record( .i_len = sizeof(unmount_rec), .i_type = XLOG_REG_TYPE_UNMOUNT, }; - struct xfs_log_vec vec = { - .lv_niovecs = 1, - .lv_iovecp = ®, - }; - LIST_HEAD(lv_chain); - list_add(&vec.lv_list, &lv_chain); - BUILD_BUG_ON((sizeof(struct xlog_op_header) + - sizeof(struct xfs_unmount_log_format)) != - sizeof(unmount_rec)); - - /* account for space used by record data */ - ticket->t_curr_res -= sizeof(unmount_rec); - - return xlog_write(log, NULL, &lv_chain, ticket, reg.i_len); + return xlog_write_one_vec(log, NULL, ®, ticket); } /* diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 778ac47adb8c..83aa06e19cfb 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -1098,13 +1098,7 @@ xlog_cil_write_commit_record( .i_len = sizeof(struct xlog_op_header), .i_type = XLOG_REG_TYPE_COMMIT, }; - struct xfs_log_vec vec = { - .lv_niovecs = 1, - .lv_iovecp = ®, - }; int error; - LIST_HEAD(lv_chain); - list_add(&vec.lv_list, &lv_chain); if (xlog_is_shutdown(log)) return -EIO; @@ -1112,10 +1106,7 @@ xlog_cil_write_commit_record( error = xlog_cil_order_write(ctx->cil, ctx->sequence, _COMMIT_RECORD); if (error) return error; - - /* account for space used by record data */ - ctx->ticket->t_curr_res -= reg.i_len; - error = xlog_write(log, ctx, &lv_chain, ctx->ticket, reg.i_len); + error = xlog_write_one_vec(log, ctx, ®, ctx->ticket); if (error) xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); return error; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 0fe59f0525aa..d2410e78b7f5 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -507,6 +507,8 @@ void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, struct list_head *lv_chain, struct xlog_ticket *tic, uint32_t len); +int xlog_write_one_vec(struct xlog *log, struct xfs_cil_ctx *ctx, + struct xfs_log_iovec *reg, struct xlog_ticket *ticket); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); From c53fbeedbe9098ba2e355fe646f3fe93e57f3f0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:18 +0100 Subject: [PATCH 12/67] xfs: set lv_bytes in xlog_write_one_vec lv_bytes is mostly just use by the CIL code, but has crept into the low-level log writing code to decide on a full or partial iclog write. Ensure it is valid even for the special log writes that don't go through the CIL by initializing it in xlog_write_one_vec. Note that even without this fix, the checkpoint commits would never trigger a partial iclog write, as they have no payload beyond the opheader. The unmount record on the other hand could in theory trigger a an overflow of the iclog, but given that is has never been seen in the wild this has probably been masked by the small size of it and the fact that the unmount process does multiple log forces before writing the unmount record and we thus usually operate on an empty or almost empty iclog. Fixes: 110dc24ad2ae ("xfs: log vector rounding leaks log space") Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c6fa258df844..4869eacfde24 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -858,14 +858,15 @@ xlog_write_one_vec( struct xfs_log_vec lv = { .lv_niovecs = 1, .lv_iovecp = reg, + .lv_bytes = reg->i_len, }; LIST_HEAD (lv_chain); /* account for space used by record data */ - ticket->t_curr_res -= reg->i_len; + ticket->t_curr_res -= lv.lv_bytes; list_add(&lv.lv_list, &lv_chain); - return xlog_write(log, ctx, &lv_chain, ticket, reg->i_len); + return xlog_write(log, ctx, &lv_chain, ticket, lv.lv_bytes); } /* From 8e7625344321105f5a52f59a3c7b3475a9a9e098 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:19 +0100 Subject: [PATCH 13/67] xfs: improve the ->iop_format interface Export a higher level interface to format log items. The xlog_format_buf structure is hidden inside xfs_log_cil.c and only accessed using two helpers (and a wrapper build on top), hiding details of log iovecs from the log items. This also allows simply using an index into lv_iovecp instead of keeping a cursor vec. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_attr_item.c | 27 +++++----- fs/xfs/xfs_bmap_item.c | 10 ++-- fs/xfs/xfs_buf_item.c | 19 +++---- fs/xfs/xfs_dquot_item.c | 9 ++-- fs/xfs/xfs_exchmaps_item.c | 11 ++-- fs/xfs/xfs_extfree_item.c | 10 ++-- fs/xfs/xfs_icreate_item.c | 6 +-- fs/xfs/xfs_inode_item.c | 49 +++++++++--------- fs/xfs/xfs_log.c | 56 --------------------- fs/xfs/xfs_log.h | 53 ++++---------------- fs/xfs/xfs_log_cil.c | 100 ++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_refcount_item.c | 10 ++-- fs/xfs/xfs_rmap_item.c | 10 ++-- fs/xfs/xfs_trans.h | 4 +- 14 files changed, 180 insertions(+), 194 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index e8fa326ac995..ad2956f78eca 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -192,10 +192,9 @@ xfs_attri_item_size( STATIC void xfs_attri_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; struct xfs_attri_log_nameval *nv = attrip->attri_nameval; attrip->attri_format.alfi_type = XFS_LI_ATTRI; @@ -220,24 +219,23 @@ xfs_attri_item_format( if (nv->new_value.iov_len > 0) attrip->attri_format.alfi_size++; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT, - &attrip->attri_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTRI_FORMAT, &attrip->attri_format, sizeof(struct xfs_attri_log_format)); - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base, + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NAME, nv->name.iov_base, nv->name.iov_len); if (nv->new_name.iov_len > 0) - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWNAME, - nv->new_name.iov_base, nv->new_name.iov_len); + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NEWNAME, + nv->new_name.iov_base, nv->new_name.iov_len); if (nv->value.iov_len > 0) - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE, - nv->value.iov_base, nv->value.iov_len); + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_VALUE, + nv->value.iov_base, nv->value.iov_len); if (nv->new_value.iov_len > 0) - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NEWVALUE, - nv->new_value.iov_base, nv->new_value.iov_len); + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTR_NEWVALUE, + nv->new_value.iov_base, nv->new_value.iov_len); } /* @@ -322,16 +320,15 @@ xfs_attrd_item_size( */ STATIC void xfs_attrd_item_format( - struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xfs_log_item *lip, + struct xlog_format_buf *lfb) { struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; attrdp->attrd_format.alfd_type = XFS_LI_ATTRD; attrdp->attrd_format.alfd_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRD_FORMAT, + xlog_format_copy(lfb, XLOG_REG_TYPE_ATTRD_FORMAT, &attrdp->attrd_format, sizeof(struct xfs_attrd_log_format)); } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 80f0c4bcc483..f38ed63fe86b 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -92,10 +92,9 @@ unsigned int xfs_bui_log_space(unsigned int nr) STATIC void xfs_bui_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_bui_log_item *buip = BUI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&buip->bui_next_extent) == buip->bui_format.bui_nextents); @@ -103,7 +102,7 @@ xfs_bui_item_format( buip->bui_format.bui_type = XFS_LI_BUI; buip->bui_format.bui_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_BUI_FORMAT, &buip->bui_format, xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents)); } @@ -188,15 +187,14 @@ unsigned int xfs_bud_log_space(void) STATIC void xfs_bud_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_bud_log_item *budp = BUD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; budp->bud_format.bud_type = XFS_LI_BUD; budp->bud_format.bud_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_BUD_FORMAT, &budp->bud_format, sizeof(struct xfs_bud_log_format)); } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index f4c5be67826e..cb2a36374ed4 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -263,24 +263,21 @@ xfs_buf_item_size( static inline void xfs_buf_item_copy_iovec( - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp, + struct xlog_format_buf *lfb, struct xfs_buf *bp, uint offset, int first_bit, uint nbits) { offset += first_bit * XFS_BLF_CHUNK; - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK, - xfs_buf_offset(bp, offset), + xlog_format_copy(lfb, XLOG_REG_TYPE_BCHUNK, xfs_buf_offset(bp, offset), nbits * XFS_BLF_CHUNK); } static void xfs_buf_item_format_segment( struct xfs_buf_log_item *bip, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp, + struct xlog_format_buf *lfb, uint offset, struct xfs_buf_log_format *blfp) { @@ -308,7 +305,7 @@ xfs_buf_item_format_segment( return; } - blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); + blfp = xlog_format_copy(lfb, XLOG_REG_TYPE_BFORMAT, blfp, base_size); blfp->blf_size = 1; if (bip->bli_flags & XFS_BLI_STALE) { @@ -331,8 +328,7 @@ xfs_buf_item_format_segment( nbits = xfs_contig_bits(blfp->blf_data_map, blfp->blf_map_size, first_bit); ASSERT(nbits > 0); - xfs_buf_item_copy_iovec(lv, vecp, bp, offset, - first_bit, nbits); + xfs_buf_item_copy_iovec(lfb, bp, offset, first_bit, nbits); blfp->blf_size++; /* @@ -357,11 +353,10 @@ xfs_buf_item_format_segment( STATIC void xfs_buf_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - struct xfs_log_iovec *vecp = NULL; uint offset = 0; int i; @@ -398,7 +393,7 @@ xfs_buf_item_format( } for (i = 0; i < bip->bli_format_count; i++) { - xfs_buf_item_format_segment(bip, lv, &vecp, offset, + xfs_buf_item_format_segment(bip, lfb, offset, &bip->bli_formats[i]); offset += BBTOB(bp->b_maps[i].bm_len); } diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index b374cd9f1900..8bc7f43093a2 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -44,25 +44,24 @@ xfs_qm_dquot_logitem_size( STATIC void xfs_qm_dquot_logitem_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_disk_dquot ddq; struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; struct xfs_dq_logformat *qlf; - qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT); + qlf = xlog_format_start(lfb, XLOG_REG_TYPE_QFORMAT); qlf->qlf_type = XFS_LI_DQUOT; qlf->qlf_size = 2; qlf->qlf_id = qlip->qli_dquot->q_id; qlf->qlf_blkno = qlip->qli_dquot->q_blkno; qlf->qlf_len = 1; qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset; - xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat)); + xlog_format_commit(lfb, sizeof(struct xfs_dq_logformat)); xfs_dquot_to_disk(&ddq, qlip->qli_dquot); - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, &ddq, + xlog_format_copy(lfb, XLOG_REG_TYPE_DQUOT, &ddq, sizeof(struct xfs_disk_dquot)); } diff --git a/fs/xfs/xfs_exchmaps_item.c b/fs/xfs/xfs_exchmaps_item.c index 229cbe0adf17..10d6fbeff651 100644 --- a/fs/xfs/xfs_exchmaps_item.c +++ b/fs/xfs/xfs_exchmaps_item.c @@ -83,16 +83,14 @@ xfs_xmi_item_size( STATIC void xfs_xmi_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; xmi_lip->xmi_format.xmi_type = XFS_LI_XMI; xmi_lip->xmi_format.xmi_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMI_FORMAT, - &xmi_lip->xmi_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_XMI_FORMAT, &xmi_lip->xmi_format, sizeof(struct xfs_xmi_log_format)); } @@ -166,15 +164,14 @@ xfs_xmd_item_size( STATIC void xfs_xmd_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; xmd_lip->xmd_format.xmd_type = XFS_LI_XMD; xmd_lip->xmd_format.xmd_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format, sizeof(struct xfs_xmd_log_format)); } diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 418ddab590e0..3d1edc43e6fb 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -98,10 +98,9 @@ unsigned int xfs_efi_log_space(unsigned int nr) STATIC void xfs_efi_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&efip->efi_next_extent) == efip->efi_format.efi_nextents); @@ -110,7 +109,7 @@ xfs_efi_item_format( efip->efi_format.efi_type = lip->li_type; efip->efi_format.efi_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents)); } @@ -277,10 +276,9 @@ unsigned int xfs_efd_log_space(unsigned int nr) STATIC void xfs_efd_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_efd_log_item *efdp = EFD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); ASSERT(lip->li_type == XFS_LI_EFD || lip->li_type == XFS_LI_EFD_RT); @@ -288,7 +286,7 @@ xfs_efd_item_format( efdp->efd_format.efd_type = lip->li_type; efdp->efd_format.efd_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents)); } diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index f83ec2bd0583..004dd22393dc 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -49,13 +49,11 @@ xfs_icreate_item_size( STATIC void xfs_icreate_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_icreate_item *icp = ICR_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE, - &icp->ic_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_ICREATE, &icp->ic_format, sizeof(struct xfs_icreate_log)); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 2eb0c6011a2e..81dfe70e173d 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -336,8 +336,7 @@ STATIC void xfs_inode_item_format_data_fork( struct xfs_inode_log_item *iip, struct xfs_inode_log_format *ilf, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp) + struct xlog_format_buf *lfb) { struct xfs_inode *ip = iip->ili_inode; size_t data_bytes; @@ -354,9 +353,9 @@ xfs_inode_item_format_data_fork( ASSERT(xfs_iext_count(&ip->i_df) > 0); - p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); + p = xlog_format_start(lfb, XLOG_REG_TYPE_IEXT); data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); - xlog_finish_iovec(lv, *vecp, data_bytes); + xlog_format_commit(lfb, data_bytes); ASSERT(data_bytes <= ip->i_df.if_bytes); @@ -374,7 +373,7 @@ xfs_inode_item_format_data_fork( if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { ASSERT(ip->i_df.if_broot != NULL); - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT, + xlog_format_copy(lfb, XLOG_REG_TYPE_IBROOT, ip->i_df.if_broot, ip->i_df.if_broot_bytes); ilf->ilf_dsize = ip->i_df.if_broot_bytes; @@ -392,8 +391,9 @@ xfs_inode_item_format_data_fork( ip->i_df.if_bytes > 0) { ASSERT(ip->i_df.if_data != NULL); ASSERT(ip->i_disk_size > 0); - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, - ip->i_df.if_data, ip->i_df.if_bytes); + xlog_format_copy(lfb, XLOG_REG_TYPE_ILOCAL, + ip->i_df.if_data, + ip->i_df.if_bytes); ilf->ilf_dsize = (unsigned)ip->i_df.if_bytes; ilf->ilf_size++; } else { @@ -416,8 +416,7 @@ STATIC void xfs_inode_item_format_attr_fork( struct xfs_inode_log_item *iip, struct xfs_inode_log_format *ilf, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp) + struct xlog_format_buf *lfb) { struct xfs_inode *ip = iip->ili_inode; size_t data_bytes; @@ -435,9 +434,9 @@ xfs_inode_item_format_attr_fork( ASSERT(xfs_iext_count(&ip->i_af) == ip->i_af.if_nextents); - p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); + p = xlog_format_start(lfb, XLOG_REG_TYPE_IATTR_EXT); data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); - xlog_finish_iovec(lv, *vecp, data_bytes); + xlog_format_commit(lfb, data_bytes); ilf->ilf_asize = data_bytes; ilf->ilf_size++; @@ -453,7 +452,7 @@ xfs_inode_item_format_attr_fork( ip->i_af.if_broot_bytes > 0) { ASSERT(ip->i_af.if_broot != NULL); - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT, + xlog_format_copy(lfb, XLOG_REG_TYPE_IATTR_BROOT, ip->i_af.if_broot, ip->i_af.if_broot_bytes); ilf->ilf_asize = ip->i_af.if_broot_bytes; @@ -469,8 +468,9 @@ xfs_inode_item_format_attr_fork( if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_af.if_bytes > 0) { ASSERT(ip->i_af.if_data != NULL); - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, - ip->i_af.if_data, ip->i_af.if_bytes); + xlog_format_copy(lfb, XLOG_REG_TYPE_IATTR_LOCAL, + ip->i_af.if_data, + ip->i_af.if_bytes); ilf->ilf_asize = (unsigned)ip->i_af.if_bytes; ilf->ilf_size++; } else { @@ -619,14 +619,13 @@ xfs_inode_to_log_dinode( static void xfs_inode_item_format_core( struct xfs_inode *ip, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp) + struct xlog_format_buf *lfb) { struct xfs_log_dinode *dic; - dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE); + dic = xlog_format_start(lfb, XLOG_REG_TYPE_ICORE); xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn); - xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_mount)); + xlog_format_commit(lfb, xfs_log_dinode_size(ip->i_mount)); } /* @@ -644,14 +643,13 @@ xfs_inode_item_format_core( STATIC void xfs_inode_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - struct xfs_log_iovec *vecp = NULL; struct xfs_inode_log_format *ilf; - ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); + ilf = xlog_format_start(lfb, XLOG_REG_TYPE_IFORMAT); ilf->ilf_type = XFS_LI_INODE; ilf->ilf_ino = ip->i_ino; ilf->ilf_blkno = ip->i_imap.im_blkno; @@ -668,13 +666,12 @@ xfs_inode_item_format( ilf->ilf_asize = 0; ilf->ilf_pad = 0; memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u)); + xlog_format_commit(lfb, sizeof(*ilf)); - xlog_finish_iovec(lv, vecp, sizeof(*ilf)); - - xfs_inode_item_format_core(ip, lv, &vecp); - xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); + xfs_inode_item_format_core(ip, lfb); + xfs_inode_item_format_data_fork(iip, ilf, lfb); if (xfs_inode_has_attr_fork(ip)) { - xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp); + xfs_inode_item_format_attr_fork(iip, ilf, lfb); } else { iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4869eacfde24..1d2cdea1e3ac 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -74,62 +74,6 @@ xlog_iclogs_empty( static int xfs_log_cover(struct xfs_mount *); -/* - * We need to make sure the buffer pointer returned is naturally aligned for the - * biggest basic data type we put into it. We have already accounted for this - * padding when sizing the buffer. - * - * However, this padding does not get written into the log, and hence we have to - * track the space used by the log vectors separately to prevent log space hangs - * due to inaccurate accounting (i.e. a leak) of the used log space through the - * CIL context ticket. - * - * We also add space for the xlog_op_header that describes this region in the - * log. This prepends the data region we return to the caller to copy their data - * into, so do all the static initialisation of the ophdr now. Because the ophdr - * is not 8 byte aligned, we have to be careful to ensure that we align the - * start of the buffer such that the region we return to the call is 8 byte - * aligned and packed against the tail of the ophdr. - */ -void * -xlog_prepare_iovec( - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp, - uint type) -{ - struct xfs_log_iovec *vec = *vecp; - struct xlog_op_header *oph; - uint32_t len; - void *buf; - - if (vec) { - ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); - vec++; - } else { - vec = &lv->lv_iovecp[0]; - } - - len = lv->lv_buf_used + sizeof(struct xlog_op_header); - if (!IS_ALIGNED(len, sizeof(uint64_t))) { - lv->lv_buf_used = round_up(len, sizeof(uint64_t)) - - sizeof(struct xlog_op_header); - } - - vec->i_type = type; - vec->i_addr = lv->lv_buf + lv->lv_buf_used; - - oph = vec->i_addr; - oph->oh_clientid = XFS_TRANSACTION; - oph->oh_res2 = 0; - oph->oh_flags = 0; - - buf = vec->i_addr + sizeof(struct xlog_op_header); - ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t))); - - *vecp = vec; - return buf; -} - static inline void xlog_grant_sub_space( struct xlog_grant_head *head, diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index dcc1f44ed68f..c4930e925fed 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -6,6 +6,7 @@ #ifndef __XFS_LOG_H__ #define __XFS_LOG_H__ +struct xlog_format_buf; struct xfs_cil_ctx; struct xfs_log_vec { @@ -70,58 +71,24 @@ xlog_calc_iovec_len(int len) return roundup(len, sizeof(uint32_t)); } -void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, - uint type); - -static inline void -xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, - int data_len) -{ - struct xlog_op_header *oph = vec->i_addr; - int len; - - /* - * Always round up the length to the correct alignment so callers don't - * need to know anything about this log vec layout requirement. This - * means we have to zero the area the data to be written does not cover. - * This is complicated by fact the payload region is offset into the - * logvec region by the opheader that tracks the payload. - */ - len = xlog_calc_iovec_len(data_len); - if (len - data_len != 0) { - char *buf = vec->i_addr + sizeof(struct xlog_op_header); - - memset(buf + data_len, 0, len - data_len); - } - - /* - * The opheader tracks aligned payload length, whilst the logvec tracks - * the overall region length. - */ - oph->oh_len = cpu_to_be32(len); - - len += sizeof(struct xlog_op_header); - lv->lv_buf_used += len; - lv->lv_bytes += len; - vec->i_len = len; - - /* Catch buffer overruns */ - ASSERT((void *)lv->lv_buf + lv->lv_bytes <= - (void *)lv + lv->lv_alloc_size); -} +void *xlog_format_start(struct xlog_format_buf *lfb, uint16_t type); +void xlog_format_commit(struct xlog_format_buf *lfb, unsigned int data_len); /* * Copy the amount of data requested by the caller into a new log iovec. */ static inline void * -xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, - uint type, void *data, int len) +xlog_format_copy( + struct xlog_format_buf *lfb, + uint16_t type, + void *data, + unsigned int len) { void *buf; - buf = xlog_prepare_iovec(lv, vecp, type); + buf = xlog_format_start(lfb, type); memcpy(buf, data, len); - xlog_finish_iovec(lv, *vecp, len); + xlog_format_commit(lfb, len); return buf; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 83aa06e19cfb..bc25012ac5c0 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -409,6 +409,102 @@ xfs_cil_prepare_item( lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; } +struct xlog_format_buf { + struct xfs_log_vec *lv; + unsigned int idx; +}; + +/* + * We need to make sure the buffer pointer returned is naturally aligned for the + * biggest basic data type we put into it. We have already accounted for this + * padding when sizing the buffer. + * + * However, this padding does not get written into the log, and hence we have to + * track the space used by the log vectors separately to prevent log space hangs + * due to inaccurate accounting (i.e. a leak) of the used log space through the + * CIL context ticket. + * + * We also add space for the xlog_op_header that describes this region in the + * log. This prepends the data region we return to the caller to copy their data + * into, so do all the static initialisation of the ophdr now. Because the ophdr + * is not 8 byte aligned, we have to be careful to ensure that we align the + * start of the buffer such that the region we return to the call is 8 byte + * aligned and packed against the tail of the ophdr. + */ +void * +xlog_format_start( + struct xlog_format_buf *lfb, + uint16_t type) +{ + struct xfs_log_vec *lv = lfb->lv; + struct xfs_log_iovec *vec = &lv->lv_iovecp[lfb->idx]; + struct xlog_op_header *oph; + uint32_t len; + void *buf; + + ASSERT(lfb->idx < lv->lv_niovecs); + + len = lv->lv_buf_used + sizeof(struct xlog_op_header); + if (!IS_ALIGNED(len, sizeof(uint64_t))) { + lv->lv_buf_used = round_up(len, sizeof(uint64_t)) - + sizeof(struct xlog_op_header); + } + + vec->i_type = type; + vec->i_addr = lv->lv_buf + lv->lv_buf_used; + + oph = vec->i_addr; + oph->oh_clientid = XFS_TRANSACTION; + oph->oh_res2 = 0; + oph->oh_flags = 0; + + buf = vec->i_addr + sizeof(struct xlog_op_header); + ASSERT(IS_ALIGNED((unsigned long)buf, sizeof(uint64_t))); + return buf; +} + +void +xlog_format_commit( + struct xlog_format_buf *lfb, + unsigned int data_len) +{ + struct xfs_log_vec *lv = lfb->lv; + struct xfs_log_iovec *vec = &lv->lv_iovecp[lfb->idx]; + struct xlog_op_header *oph = vec->i_addr; + int len; + + /* + * Always round up the length to the correct alignment so callers don't + * need to know anything about this log vec layout requirement. This + * means we have to zero the area the data to be written does not cover. + * This is complicated by fact the payload region is offset into the + * logvec region by the opheader that tracks the payload. + */ + len = xlog_calc_iovec_len(data_len); + if (len - data_len != 0) { + char *buf = vec->i_addr + sizeof(struct xlog_op_header); + + memset(buf + data_len, 0, len - data_len); + } + + /* + * The opheader tracks aligned payload length, whilst the logvec tracks + * the overall region length. + */ + oph->oh_len = cpu_to_be32(len); + + len += sizeof(struct xlog_op_header); + lv->lv_buf_used += len; + lv->lv_bytes += len; + vec->i_len = len; + + /* Catch buffer overruns */ + ASSERT((void *)lv->lv_buf + lv->lv_bytes <= + (void *)lv + lv->lv_alloc_size); + + lfb->idx++; +} + /* * Format log item into a flat buffers * @@ -454,6 +550,7 @@ xlog_cil_insert_format_items( list_for_each_entry(lip, &tp->t_items, li_trans) { struct xfs_log_vec *lv = lip->li_lv; struct xfs_log_vec *shadow = lip->li_lv_shadow; + struct xlog_format_buf lfb = { }; /* Skip items which aren't dirty in this transaction. */ if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) @@ -501,8 +598,9 @@ xlog_cil_insert_format_items( lv->lv_item = lip; } + lfb.lv = lv; ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); - lip->li_ops->iop_format(lip, lv); + lip->li_ops->iop_format(lip, &lfb); xfs_cil_prepare_item(log, lip, lv, diff_len); } } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 3728234699a2..a41f5b577e22 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -93,10 +93,9 @@ unsigned int xfs_cui_log_space(unsigned int nr) STATIC void xfs_cui_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_cui_log_item *cuip = CUI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&cuip->cui_next_extent) == cuip->cui_format.cui_nextents); @@ -105,7 +104,7 @@ xfs_cui_item_format( cuip->cui_format.cui_type = lip->li_type; cuip->cui_format.cui_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); } @@ -199,17 +198,16 @@ unsigned int xfs_cud_log_space(void) STATIC void xfs_cud_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_cud_log_item *cudp = CUD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(lip->li_type == XFS_LI_CUD || lip->li_type == XFS_LI_CUD_RT); cudp->cud_format.cud_type = lip->li_type; cudp->cud_format.cud_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, sizeof(struct xfs_cud_log_format)); } diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 15f0903f6fd4..8bf04b101156 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -92,10 +92,9 @@ unsigned int xfs_rui_log_space(unsigned int nr) STATIC void xfs_rui_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_rui_log_item *ruip = RUI_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&ruip->rui_next_extent) == ruip->rui_format.rui_nextents); @@ -105,7 +104,7 @@ xfs_rui_item_format( ruip->rui_format.rui_type = lip->li_type; ruip->rui_format.rui_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents)); } @@ -200,17 +199,16 @@ unsigned int xfs_rud_log_space(void) STATIC void xfs_rud_item_format( struct xfs_log_item *lip, - struct xfs_log_vec *lv) + struct xlog_format_buf *lfb) { struct xfs_rud_log_item *rudp = RUD_ITEM(lip); - struct xfs_log_iovec *vecp = NULL; ASSERT(lip->li_type == XFS_LI_RUD || lip->li_type == XFS_LI_RUD_RT); rudp->rud_format.rud_type = lip->li_type; rudp->rud_format.rud_size = 1; - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format, + xlog_format_copy(lfb, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format, sizeof(struct xfs_rud_log_format)); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7fb860f645a3..8830600b3e72 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -9,6 +9,7 @@ /* kernel only transaction subsystem defines */ struct xlog; +struct xlog_format_buf; struct xfs_buf; struct xfs_buftarg; struct xfs_efd_log_item; @@ -70,7 +71,8 @@ struct xfs_log_item { struct xfs_item_ops { unsigned flags; void (*iop_size)(struct xfs_log_item *, int *, int *); - void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *); + void (*iop_format)(struct xfs_log_item *lip, + struct xlog_format_buf *lfb); void (*iop_pin)(struct xfs_log_item *); void (*iop_unpin)(struct xfs_log_item *, int remove); uint64_t (*iop_sort)(struct xfs_log_item *lip); From 027410591418bded6ba6051151d88fc6fb8a7614 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:20 +0100 Subject: [PATCH 14/67] xfs: move struct xfs_log_iovec to xfs_log_priv.h This structure is now only used by the core logging and CIL code. Also remove the unused typedef. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_log_format.h | 7 ------- fs/xfs/xfs_log_priv.h | 6 ++++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 908e7060428c..3f5a24dda907 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -184,13 +184,6 @@ struct xlog_rec_header { #define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size) #endif /* __i386__ */ -/* not an on-disk structure, but needed by log recovery in userspace */ -struct xfs_log_iovec { - void *i_addr; /* beginning address of region */ - int i_len; /* length in bytes of region */ - uint i_type; /* type of region */ -}; - /* * Transaction Header definitions. * diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index d2410e78b7f5..b7b3f61aa2ae 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -13,6 +13,12 @@ struct xlog; struct xlog_ticket; struct xfs_mount; +struct xfs_log_iovec { + void *i_addr;/* beginning address of region */ + int i_len; /* length in bytes of region */ + uint i_type; /* type of region */ +}; + /* * get client id from packed copy. * From 2499d91180142f18fcd472ab52f37655fd787bf7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:21 +0100 Subject: [PATCH 15/67] xfs: move struct xfs_log_vec to xfs_log_priv.h The log_vec is a private type for the log/CIL code and should not be exposed to anything else. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.h | 12 ------------ fs/xfs/xfs_log_priv.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index c4930e925fed..0f23812b0b31 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -9,18 +9,6 @@ struct xlog_format_buf; struct xfs_cil_ctx; -struct xfs_log_vec { - struct list_head lv_list; /* CIL lv chain ptrs */ - uint32_t lv_order_id; /* chain ordering info */ - int lv_niovecs; /* number of iovecs in lv */ - struct xfs_log_iovec *lv_iovecp; /* iovec array */ - struct xfs_log_item *lv_item; /* owner */ - char *lv_buf; /* formatted buffer */ - int lv_bytes; /* accounted space in buffer */ - int lv_buf_used; /* buffer space used so far */ - int lv_alloc_size; /* size of allocated lv */ -}; - /* Region types for iovec's i_type */ #define XLOG_REG_TYPE_BFORMAT 1 #define XLOG_REG_TYPE_BCHUNK 2 diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b7b3f61aa2ae..cf1e4ce61a8c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -19,6 +19,18 @@ struct xfs_log_iovec { uint i_type; /* type of region */ }; +struct xfs_log_vec { + struct list_head lv_list; /* CIL lv chain ptrs */ + uint32_t lv_order_id; /* chain ordering info */ + int lv_niovecs; /* number of iovecs in lv */ + struct xfs_log_iovec *lv_iovecp; /* iovec array */ + struct xfs_log_item *lv_item; /* owner */ + char *lv_buf; /* formatted buffer */ + int lv_bytes; /* accounted space in buffer */ + int lv_buf_used; /* buffer space used so far */ + int lv_alloc_size; /* size of allocated lv */ +}; + /* * get client id from packed copy. * From a82d7aac758161c310345bb5066f344c21ee783b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:22 +0100 Subject: [PATCH 16/67] xfs: regularize iclog space accounting in xlog_write_partial When xlog_write_partial splits a log region over multiple iclogs, it has to include the continuation ophder in the length requested for the new iclog. Currently is simply adds that to the request, which makes the accounting of the used space below look slightly different from the other users of iclog space that decrement it. To prepare for more code sharing, add the ophdr size to the len variable that tracks the number of bytes still are left in this xlog_write operation before the calling xlog_write_get_more_iclog_space, and then decrement it later when consuming that space. This changes the value of len when xlog_write_get_more_iclog_space returns an error, but as nothing looks at len in that case the difference doesn't matter. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1d2cdea1e3ac..ddb1a13d4850 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2050,10 +2050,10 @@ xlog_write_partial( * consumes hasn't been accounted to the lv we are * writing. */ + *len += sizeof(struct xlog_op_header); error = xlog_write_get_more_iclog_space(ticket, - &iclog, log_offset, - *len + sizeof(struct xlog_op_header), - record_cnt, data_cnt); + &iclog, log_offset, *len, record_cnt, + data_cnt); if (error) return error; @@ -2066,6 +2066,7 @@ xlog_write_partial( ticket->t_curr_res -= sizeof(struct xlog_op_header); *log_offset += sizeof(struct xlog_op_header); *data_cnt += sizeof(struct xlog_op_header); + *len -= sizeof(struct xlog_op_header); /* * If rlen fits in the iclog, then end the region From a3eb1f9cf85ff88939f5a9d360efc3eb73469afd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:23 +0100 Subject: [PATCH 17/67] xfs: improve the calling convention for the xlog_write helpers The xlog_write chain passes around the same seven variables that are often passed by reference. Add a xlog_write_data structure to contain them to improve code generation and readability. This change increases the generated code size by about 140 bytes for my x86_64 build, which is hopefully worth the much easier to follow code: $ size fs/xfs/xfs_log.o* text data bss dec hex filename 29300 1730 176 31206 79e6 fs/xfs/xfs_log.o 29160 1730 176 31066 795a fs/xfs/xfs_log.o.old Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 187 +++++++++++++++++++---------------------------- 1 file changed, 77 insertions(+), 110 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ddb1a13d4850..90b72dcff466 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -22,6 +22,15 @@ #include "xfs_health.h" #include "xfs_zone_alloc.h" +struct xlog_write_data { + struct xlog_ticket *ticket; + struct xlog_in_core *iclog; + uint32_t bytes_left; + uint32_t record_cnt; + uint32_t data_cnt; + int log_offset; +}; + struct kmem_cache *xfs_log_ticket_cache; /* Local miscellaneous function prototypes */ @@ -43,10 +52,7 @@ STATIC void xlog_state_do_callback( STATIC int xlog_state_get_iclog_space( struct xlog *log, - int len, - struct xlog_in_core **iclog, - struct xlog_ticket *ticket, - int *logoffsetp); + struct xlog_write_data *data); STATIC void xlog_sync( struct xlog *log, @@ -1876,23 +1882,19 @@ xlog_print_trans( static inline void xlog_write_iovec( - struct xlog_in_core *iclog, - uint32_t *log_offset, - void *data, - uint32_t write_len, - int *bytes_left, - uint32_t *record_cnt, - uint32_t *data_cnt) + struct xlog_write_data *data, + void *buf, + uint32_t buf_len) { - ASSERT(*log_offset < iclog->ic_log->l_iclog_size); - ASSERT(*log_offset % sizeof(int32_t) == 0); - ASSERT(write_len % sizeof(int32_t) == 0); + ASSERT(data->log_offset < data->iclog->ic_log->l_iclog_size); + ASSERT(data->log_offset % sizeof(int32_t) == 0); + ASSERT(buf_len % sizeof(int32_t) == 0); - memcpy(iclog->ic_datap + *log_offset, data, write_len); - *log_offset += write_len; - *bytes_left -= write_len; - (*record_cnt)++; - *data_cnt += write_len; + memcpy(data->iclog->ic_datap + data->log_offset, buf, buf_len); + data->log_offset += buf_len; + data->bytes_left -= buf_len; + data->record_cnt++; + data->data_cnt += buf_len; } /* @@ -1902,17 +1904,12 @@ xlog_write_iovec( static void xlog_write_full( struct xfs_log_vec *lv, - struct xlog_ticket *ticket, - struct xlog_in_core *iclog, - uint32_t *log_offset, - uint32_t *len, - uint32_t *record_cnt, - uint32_t *data_cnt) + struct xlog_write_data *data) { int index; - ASSERT(*log_offset + *len <= iclog->ic_size || - iclog->ic_state == XLOG_STATE_WANT_SYNC); + ASSERT(data->log_offset + data->bytes_left <= data->iclog->ic_size || + data->iclog->ic_state == XLOG_STATE_WANT_SYNC); /* * Ordered log vectors have no regions to write so this @@ -1922,40 +1919,32 @@ xlog_write_full( struct xfs_log_iovec *reg = &lv->lv_iovecp[index]; struct xlog_op_header *ophdr = reg->i_addr; - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); - xlog_write_iovec(iclog, log_offset, reg->i_addr, - reg->i_len, len, record_cnt, data_cnt); + ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid); + xlog_write_iovec(data, reg->i_addr, reg->i_len); } } static int xlog_write_get_more_iclog_space( - struct xlog_ticket *ticket, - struct xlog_in_core **iclogp, - uint32_t *log_offset, - uint32_t len, - uint32_t *record_cnt, - uint32_t *data_cnt) + struct xlog_write_data *data) { - struct xlog_in_core *iclog = *iclogp; - struct xlog *log = iclog->ic_log; + struct xlog *log = data->iclog->ic_log; int error; spin_lock(&log->l_icloglock); - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC); - xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); - error = xlog_state_release_iclog(log, iclog, ticket); + ASSERT(data->iclog->ic_state == XLOG_STATE_WANT_SYNC); + xlog_state_finish_copy(log, data->iclog, data->record_cnt, + data->data_cnt); + error = xlog_state_release_iclog(log, data->iclog, data->ticket); spin_unlock(&log->l_icloglock); if (error) return error; - error = xlog_state_get_iclog_space(log, len, &iclog, ticket, - log_offset); + error = xlog_state_get_iclog_space(log, data); if (error) return error; - *record_cnt = 0; - *data_cnt = 0; - *iclogp = iclog; + data->record_cnt = 0; + data->data_cnt = 0; return 0; } @@ -1968,14 +1957,8 @@ xlog_write_get_more_iclog_space( static int xlog_write_partial( struct xfs_log_vec *lv, - struct xlog_ticket *ticket, - struct xlog_in_core **iclogp, - uint32_t *log_offset, - uint32_t *len, - uint32_t *record_cnt, - uint32_t *data_cnt) + struct xlog_write_data *data) { - struct xlog_in_core *iclog = *iclogp; struct xlog_op_header *ophdr; int index = 0; uint32_t rlen; @@ -1997,25 +1980,23 @@ xlog_write_partial( * Hence if there isn't space for region data after the * opheader, then we need to start afresh with a new iclog. */ - if (iclog->ic_size - *log_offset <= + if (data->iclog->ic_size - data->log_offset <= sizeof(struct xlog_op_header)) { - error = xlog_write_get_more_iclog_space(ticket, - &iclog, log_offset, *len, record_cnt, - data_cnt); + error = xlog_write_get_more_iclog_space(data); if (error) return error; } ophdr = reg->i_addr; - rlen = min_t(uint32_t, reg->i_len, iclog->ic_size - *log_offset); + rlen = min_t(uint32_t, reg->i_len, + data->iclog->ic_size - data->log_offset); - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); + ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid); ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header)); if (rlen != reg->i_len) ophdr->oh_flags |= XLOG_CONTINUE_TRANS; - xlog_write_iovec(iclog, log_offset, reg->i_addr, - rlen, len, record_cnt, data_cnt); + xlog_write_iovec(data, reg->i_addr, rlen); /* If we wrote the whole region, move to the next. */ if (rlen == reg->i_len) @@ -2050,23 +2031,22 @@ xlog_write_partial( * consumes hasn't been accounted to the lv we are * writing. */ - *len += sizeof(struct xlog_op_header); - error = xlog_write_get_more_iclog_space(ticket, - &iclog, log_offset, *len, record_cnt, - data_cnt); + data->bytes_left += sizeof(struct xlog_op_header); + error = xlog_write_get_more_iclog_space(data); if (error) return error; - ophdr = iclog->ic_datap + *log_offset; - ophdr->oh_tid = cpu_to_be32(ticket->t_tid); + ophdr = data->iclog->ic_datap + data->log_offset; + ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid); ophdr->oh_clientid = XFS_TRANSACTION; ophdr->oh_res2 = 0; ophdr->oh_flags = XLOG_WAS_CONT_TRANS; - ticket->t_curr_res -= sizeof(struct xlog_op_header); - *log_offset += sizeof(struct xlog_op_header); - *data_cnt += sizeof(struct xlog_op_header); - *len -= sizeof(struct xlog_op_header); + data->ticket->t_curr_res -= + sizeof(struct xlog_op_header); + data->log_offset += sizeof(struct xlog_op_header); + data->data_cnt += sizeof(struct xlog_op_header); + data->bytes_left -= sizeof(struct xlog_op_header); /* * If rlen fits in the iclog, then end the region @@ -2074,26 +2054,19 @@ xlog_write_partial( */ reg_offset += rlen; rlen = reg->i_len - reg_offset; - if (rlen <= iclog->ic_size - *log_offset) + if (rlen <= data->iclog->ic_size - data->log_offset) ophdr->oh_flags |= XLOG_END_TRANS; else ophdr->oh_flags |= XLOG_CONTINUE_TRANS; - rlen = min_t(uint32_t, rlen, iclog->ic_size - *log_offset); + rlen = min_t(uint32_t, rlen, + data->iclog->ic_size - data->log_offset); ophdr->oh_len = cpu_to_be32(rlen); - xlog_write_iovec(iclog, log_offset, - reg->i_addr + reg_offset, - rlen, len, record_cnt, data_cnt); - + xlog_write_iovec(data, reg->i_addr + reg_offset, rlen); } while (ophdr->oh_flags & XLOG_CONTINUE_TRANS); } - /* - * No more iovecs remain in this logvec so return the next log vec to - * the caller so it can go back to fast path copying. - */ - *iclogp = iclog; return 0; } @@ -2146,12 +2119,12 @@ xlog_write( uint32_t len) { - struct xlog_in_core *iclog = NULL; struct xfs_log_vec *lv; - uint32_t record_cnt = 0; - uint32_t data_cnt = 0; - int error = 0; - int log_offset; + struct xlog_write_data data = { + .ticket = ticket, + .bytes_left = len, + }; + int error; if (ticket->t_curr_res < 0) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, @@ -2160,12 +2133,11 @@ xlog_write( xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); } - error = xlog_state_get_iclog_space(log, len, &iclog, ticket, - &log_offset); + error = xlog_state_get_iclog_space(log, &data); if (error) return error; - ASSERT(log_offset <= iclog->ic_size - 1); + ASSERT(data.log_offset <= data.iclog->ic_size - 1); /* * If we have a context pointer, pass it the first iclog we are @@ -2173,7 +2145,7 @@ xlog_write( * ordering. */ if (ctx) - xlog_cil_set_ctx_write_state(ctx, iclog); + xlog_cil_set_ctx_write_state(ctx, data.iclog); list_for_each_entry(lv, lv_chain, lv_list) { /* @@ -2181,10 +2153,8 @@ xlog_write( * the partial copy loop which can handle this case. */ if (lv->lv_niovecs && - lv->lv_bytes > iclog->ic_size - log_offset) { - error = xlog_write_partial(lv, ticket, &iclog, - &log_offset, &len, &record_cnt, - &data_cnt); + lv->lv_bytes > data.iclog->ic_size - data.log_offset) { + error = xlog_write_partial(lv, &data); if (error) { /* * We have no iclog to release, so just return @@ -2193,11 +2163,10 @@ xlog_write( return error; } } else { - xlog_write_full(lv, ticket, iclog, &log_offset, - &len, &record_cnt, &data_cnt); + xlog_write_full(lv, &data); } } - ASSERT(len == 0); + ASSERT(data.bytes_left == 0); /* * We've already been guaranteed that the last writes will fit inside @@ -2206,8 +2175,8 @@ xlog_write( * iclog with the number of bytes written here. */ spin_lock(&log->l_icloglock); - xlog_state_finish_copy(log, iclog, record_cnt, 0); - error = xlog_state_release_iclog(log, iclog, ticket); + xlog_state_finish_copy(log, data.iclog, data.record_cnt, 0); + error = xlog_state_release_iclog(log, data.iclog, ticket); spin_unlock(&log->l_icloglock); return error; @@ -2529,10 +2498,7 @@ xlog_state_done_syncing( STATIC int xlog_state_get_iclog_space( struct xlog *log, - int len, - struct xlog_in_core **iclogp, - struct xlog_ticket *ticket, - int *logoffsetp) + struct xlog_write_data *data) { int log_offset; struct xlog_rec_header *head; @@ -2567,7 +2533,7 @@ restart: * must be written. */ if (log_offset == 0) { - ticket->t_curr_res -= log->l_iclog_hsize; + data->ticket->t_curr_res -= log->l_iclog_hsize; head->h_cycle = cpu_to_be32(log->l_curr_cycle); head->h_lsn = cpu_to_be64( xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block)); @@ -2597,7 +2563,8 @@ restart: * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog, ticket); + error = xlog_state_release_iclog(log, iclog, + data->ticket); spin_unlock(&log->l_icloglock); if (error) return error; @@ -2610,16 +2577,16 @@ restart: * iclogs (to mark it taken), this particular iclog will release/sync * to disk in xlog_write(). */ - if (len <= iclog->ic_size - iclog->ic_offset) - iclog->ic_offset += len; + if (data->bytes_left <= iclog->ic_size - iclog->ic_offset) + iclog->ic_offset += data->bytes_left; else xlog_state_switch_iclogs(log, iclog, iclog->ic_size); - *iclogp = iclog; + data->iclog = iclog; ASSERT(iclog->ic_offset <= iclog->ic_size); spin_unlock(&log->l_icloglock); - *logoffsetp = log_offset; + data->log_offset = log_offset; return 0; } From 865970d49a45af00ac4576684c33024e8d59b84c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:24 +0100 Subject: [PATCH 18/67] xfs: add a xlog_write_space_left helper Various places check how much space is left in the current iclog, add a helper for that. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 90b72dcff466..2c1371f6f5e3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1880,13 +1880,18 @@ xlog_print_trans( } } +static inline uint32_t xlog_write_space_left(struct xlog_write_data *data) +{ + return data->iclog->ic_size - data->log_offset; +} + static inline void xlog_write_iovec( struct xlog_write_data *data, void *buf, uint32_t buf_len) { - ASSERT(data->log_offset < data->iclog->ic_log->l_iclog_size); + ASSERT(xlog_write_space_left(data) > 0); ASSERT(data->log_offset % sizeof(int32_t) == 0); ASSERT(buf_len % sizeof(int32_t) == 0); @@ -1908,7 +1913,7 @@ xlog_write_full( { int index; - ASSERT(data->log_offset + data->bytes_left <= data->iclog->ic_size || + ASSERT(data->bytes_left <= xlog_write_space_left(data) || data->iclog->ic_state == XLOG_STATE_WANT_SYNC); /* @@ -1980,7 +1985,7 @@ xlog_write_partial( * Hence if there isn't space for region data after the * opheader, then we need to start afresh with a new iclog. */ - if (data->iclog->ic_size - data->log_offset <= + if (xlog_write_space_left(data) <= sizeof(struct xlog_op_header)) { error = xlog_write_get_more_iclog_space(data); if (error) @@ -1988,8 +1993,7 @@ xlog_write_partial( } ophdr = reg->i_addr; - rlen = min_t(uint32_t, reg->i_len, - data->iclog->ic_size - data->log_offset); + rlen = min_t(uint32_t, reg->i_len, xlog_write_space_left(data)); ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid); ophdr->oh_len = cpu_to_be32(rlen - sizeof(struct xlog_op_header)); @@ -2054,13 +2058,13 @@ xlog_write_partial( */ reg_offset += rlen; rlen = reg->i_len - reg_offset; - if (rlen <= data->iclog->ic_size - data->log_offset) + if (rlen <= xlog_write_space_left(data)) ophdr->oh_flags |= XLOG_END_TRANS; else ophdr->oh_flags |= XLOG_CONTINUE_TRANS; rlen = min_t(uint32_t, rlen, - data->iclog->ic_size - data->log_offset); + xlog_write_space_left(data)); ophdr->oh_len = cpu_to_be32(rlen); xlog_write_iovec(data, reg->i_addr + reg_offset, rlen); @@ -2137,7 +2141,7 @@ xlog_write( if (error) return error; - ASSERT(data.log_offset <= data.iclog->ic_size - 1); + ASSERT(xlog_write_space_left(&data) > 0); /* * If we have a context pointer, pass it the first iclog we are @@ -2153,7 +2157,7 @@ xlog_write( * the partial copy loop which can handle this case. */ if (lv->lv_niovecs && - lv->lv_bytes > data.iclog->ic_size - data.log_offset) { + lv->lv_bytes > xlog_write_space_left(&data)) { error = xlog_write_partial(lv, &data); if (error) { /* From e2663443da71445a0c847199480b6a53ddec35e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:25 +0100 Subject: [PATCH 19/67] xfs: improve the iclog space assert in xlog_write_iovec We need enough space for the length we copy into the iclog, not just some space, so tighten up the check a bit. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 2c1371f6f5e3..6d0319388e29 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1891,7 +1891,7 @@ xlog_write_iovec( void *buf, uint32_t buf_len) { - ASSERT(xlog_write_space_left(data) > 0); + ASSERT(xlog_write_space_left(data) >= buf_len); ASSERT(data->log_offset % sizeof(int32_t) == 0); ASSERT(buf_len % sizeof(int32_t) == 0); From a10b44cf1018f5f94d5a4caefef581d181c70f5d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Nov 2025 13:14:26 +0100 Subject: [PATCH 20/67] xfs: factor out a xlog_write_space_advance helper Add a new xlog_write_space_advance that returns the current place in the iclog that data is written to, and advances the various counters by the amount taken from xlog_write_iovec, and also use it xlog_write_partial, which open codes the counter adjustments, but misses the asserts. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6d0319388e29..8ddd25970471 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1885,21 +1885,31 @@ static inline uint32_t xlog_write_space_left(struct xlog_write_data *data) return data->iclog->ic_size - data->log_offset; } +static void * +xlog_write_space_advance( + struct xlog_write_data *data, + unsigned int len) +{ + void *p = data->iclog->ic_datap + data->log_offset; + + ASSERT(xlog_write_space_left(data) >= len); + ASSERT(data->log_offset % sizeof(int32_t) == 0); + ASSERT(len % sizeof(int32_t) == 0); + + data->data_cnt += len; + data->log_offset += len; + data->bytes_left -= len; + return p; +} + static inline void xlog_write_iovec( struct xlog_write_data *data, void *buf, uint32_t buf_len) { - ASSERT(xlog_write_space_left(data) >= buf_len); - ASSERT(data->log_offset % sizeof(int32_t) == 0); - ASSERT(buf_len % sizeof(int32_t) == 0); - - memcpy(data->iclog->ic_datap + data->log_offset, buf, buf_len); - data->log_offset += buf_len; - data->bytes_left -= buf_len; + memcpy(xlog_write_space_advance(data, buf_len), buf, buf_len); data->record_cnt++; - data->data_cnt += buf_len; } /* @@ -2040,7 +2050,8 @@ xlog_write_partial( if (error) return error; - ophdr = data->iclog->ic_datap + data->log_offset; + ophdr = xlog_write_space_advance(data, + sizeof(struct xlog_op_header)); ophdr->oh_tid = cpu_to_be32(data->ticket->t_tid); ophdr->oh_clientid = XFS_TRANSACTION; ophdr->oh_res2 = 0; @@ -2048,9 +2059,6 @@ xlog_write_partial( data->ticket->t_curr_res -= sizeof(struct xlog_op_header); - data->log_offset += sizeof(struct xlog_op_header); - data->data_cnt += sizeof(struct xlog_op_header); - data->bytes_left -= sizeof(struct xlog_op_header); /* * If rlen fits in the iclog, then end the region From 971ffb634113474add7c3adb07c1ea03110e7844 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Dec 2025 06:41:44 +0100 Subject: [PATCH 21/67] xfs: rename xfs_linux.h to xfs_platform.h Rename xfs_linux.h to prepare for including including it directly from source files including those shared with xfsprogs. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs.h | 2 +- fs/xfs/{xfs_linux.h => xfs_platform.h} | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename fs/xfs/{xfs_linux.h => xfs_platform.h} (98%) diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 9355ccad9503..b335a471c088 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -23,6 +23,6 @@ #endif -#include "xfs_linux.h" +#include "xfs_platform.h" #endif /* __XFS_H__ */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_platform.h similarity index 98% rename from fs/xfs/xfs_linux.h rename to fs/xfs/xfs_platform.h index 4dd747bdbcca..ec8cd71fc868 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_platform.h @@ -3,8 +3,8 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#ifndef __XFS_LINUX__ -#define __XFS_LINUX__ +#ifndef _XFS_PLATFORM_H +#define _XFS_PLATFORM_H #include #include @@ -281,4 +281,4 @@ kmem_to_page(void *addr) return virt_to_page(addr); } -#endif /* __XFS_LINUX__ */ +#endif /* _XFS_PLATFORM_H */ From 501a5161d2c3c3e6d6cf520446e51dfc86f06d8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Dec 2025 06:41:45 +0100 Subject: [PATCH 22/67] xfs: include global headers first in xfs_platform.h Ensure we have all kernel headers included by the time we do our own thing, just like the rest of the tree. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_platform.h | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_platform.h b/fs/xfs/xfs_platform.h index ec8cd71fc868..5223fe567ac1 100644 --- a/fs/xfs/xfs_platform.h +++ b/fs/xfs/xfs_platform.h @@ -8,19 +8,6 @@ #include #include - -/* - * Kernel specific type declarations for XFS - */ - -typedef __s64 xfs_off_t; /* type */ -typedef unsigned long long xfs_ino_t; /* type */ -typedef __s64 xfs_daddr_t; /* type */ -typedef __u32 xfs_dev_t; -typedef __u32 xfs_nlink_t; - -#include "xfs_types.h" - #include #include #include @@ -63,7 +50,6 @@ typedef __u32 xfs_nlink_t; #include #include #include - #include #include #include @@ -71,6 +57,16 @@ typedef __u32 xfs_nlink_t; #include #include +/* + * Kernel specific type declarations for XFS + */ +typedef __s64 xfs_off_t; /* type */ +typedef unsigned long long xfs_ino_t; /* type */ +typedef __s64 xfs_daddr_t; /* type */ +typedef __u32 xfs_dev_t; +typedef __u32 xfs_nlink_t; + +#include "xfs_types.h" #include "xfs_fs.h" #include "xfs_stats.h" #include "xfs_sysctl.h" From 19a46f12466993c1227276cd934a1eb8071a24cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Dec 2025 06:41:46 +0100 Subject: [PATCH 23/67] xfs: move the remaining content from xfs.h to xfs_platform.h Move the global defines from xfs.h to xfs_platform.h to prepare for removing xfs.h. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs.h | 17 ----------------- fs/xfs/xfs_platform.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index b335a471c088..3ec52f2ec4b2 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -6,23 +6,6 @@ #ifndef __XFS_H__ #define __XFS_H__ -#ifdef CONFIG_XFS_DEBUG -#define DEBUG 1 -#endif - -#ifdef CONFIG_XFS_DEBUG_EXPENSIVE -#define DEBUG_EXPENSIVE 1 -#endif - -#ifdef CONFIG_XFS_ASSERT_FATAL -#define XFS_ASSERT_FATAL 1 -#endif - -#ifdef CONFIG_XFS_WARN -#define XFS_WARN 1 -#endif - - #include "xfs_platform.h" #endif /* __XFS_H__ */ diff --git a/fs/xfs/xfs_platform.h b/fs/xfs/xfs_platform.h index 5223fe567ac1..c7b013593646 100644 --- a/fs/xfs/xfs_platform.h +++ b/fs/xfs/xfs_platform.h @@ -57,6 +57,22 @@ #include #include +#ifdef CONFIG_XFS_DEBUG +#define DEBUG 1 +#endif + +#ifdef CONFIG_XFS_DEBUG_EXPENSIVE +#define DEBUG_EXPENSIVE 1 +#endif + +#ifdef CONFIG_XFS_ASSERT_FATAL +#define XFS_ASSERT_FATAL 1 +#endif + +#ifdef CONFIG_XFS_WARN +#define XFS_WARN 1 +#endif + /* * Kernel specific type declarations for XFS */ From cf9b52fa7d65362b648927d1d752ec99659f5c43 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Dec 2025 06:41:47 +0100 Subject: [PATCH 24/67] xfs: directly include xfs_platform.h The xfs.h header conflicts with the public xfs.h in xfsprogs, leading to a spurious difference in all shared libxfs files that have to include libxfs_priv.h in userspace. Directly include xfs_platform.h so that we can add a header of the same name to xfsprogs and remove this major annoyance for the shared code. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_ag.c | 2 +- fs/xfs/libxfs/xfs_ag_resv.c | 2 +- fs/xfs/libxfs/xfs_alloc.c | 2 +- fs/xfs/libxfs/xfs_alloc_btree.c | 2 +- fs/xfs/libxfs/xfs_attr.c | 2 +- fs/xfs/libxfs/xfs_attr_leaf.c | 2 +- fs/xfs/libxfs/xfs_attr_remote.c | 2 +- fs/xfs/libxfs/xfs_bit.c | 2 +- fs/xfs/libxfs/xfs_bmap.c | 2 +- fs/xfs/libxfs/xfs_bmap_btree.c | 2 +- fs/xfs/libxfs/xfs_btree.c | 2 +- fs/xfs/libxfs/xfs_btree_mem.c | 2 +- fs/xfs/libxfs/xfs_btree_staging.c | 2 +- fs/xfs/libxfs/xfs_da_btree.c | 2 +- fs/xfs/libxfs/xfs_defer.c | 2 +- fs/xfs/libxfs/xfs_dir2.c | 2 +- fs/xfs/libxfs/xfs_dir2_block.c | 2 +- fs/xfs/libxfs/xfs_dir2_data.c | 2 +- fs/xfs/libxfs/xfs_dir2_leaf.c | 2 +- fs/xfs/libxfs/xfs_dir2_node.c | 2 +- fs/xfs/libxfs/xfs_dir2_sf.c | 2 +- fs/xfs/libxfs/xfs_dquot_buf.c | 2 +- fs/xfs/libxfs/xfs_exchmaps.c | 2 +- fs/xfs/libxfs/xfs_group.c | 2 +- fs/xfs/libxfs/xfs_ialloc.c | 2 +- fs/xfs/libxfs/xfs_ialloc_btree.c | 2 +- fs/xfs/libxfs/xfs_iext_tree.c | 2 +- fs/xfs/libxfs/xfs_inode_buf.c | 2 +- fs/xfs/libxfs/xfs_inode_fork.c | 2 +- fs/xfs/libxfs/xfs_inode_util.c | 2 +- fs/xfs/libxfs/xfs_log_rlimit.c | 2 +- fs/xfs/libxfs/xfs_metadir.c | 2 +- fs/xfs/libxfs/xfs_metafile.c | 2 +- fs/xfs/libxfs/xfs_parent.c | 2 +- fs/xfs/libxfs/xfs_refcount.c | 2 +- fs/xfs/libxfs/xfs_refcount_btree.c | 2 +- fs/xfs/libxfs/xfs_rmap.c | 2 +- fs/xfs/libxfs/xfs_rmap_btree.c | 2 +- fs/xfs/libxfs/xfs_rtbitmap.c | 2 +- fs/xfs/libxfs/xfs_rtgroup.c | 2 +- fs/xfs/libxfs/xfs_rtrefcount_btree.c | 2 +- fs/xfs/libxfs/xfs_rtrmap_btree.c | 2 +- fs/xfs/libxfs/xfs_sb.c | 2 +- fs/xfs/libxfs/xfs_symlink_remote.c | 2 +- fs/xfs/libxfs/xfs_trans_inode.c | 2 +- fs/xfs/libxfs/xfs_trans_resv.c | 2 +- fs/xfs/libxfs/xfs_trans_space.c | 2 +- fs/xfs/libxfs/xfs_types.c | 2 +- fs/xfs/libxfs/xfs_zones.c | 2 +- fs/xfs/scrub/agb_bitmap.c | 2 +- fs/xfs/scrub/agheader.c | 2 +- fs/xfs/scrub/agheader_repair.c | 2 +- fs/xfs/scrub/alloc.c | 2 +- fs/xfs/scrub/alloc_repair.c | 2 +- fs/xfs/scrub/attr.c | 2 +- fs/xfs/scrub/attr_repair.c | 2 +- fs/xfs/scrub/bitmap.c | 2 +- fs/xfs/scrub/bmap.c | 2 +- fs/xfs/scrub/bmap_repair.c | 2 +- fs/xfs/scrub/btree.c | 2 +- fs/xfs/scrub/common.c | 2 +- fs/xfs/scrub/cow_repair.c | 2 +- fs/xfs/scrub/dabtree.c | 2 +- fs/xfs/scrub/dir.c | 2 +- fs/xfs/scrub/dir_repair.c | 2 +- fs/xfs/scrub/dirtree.c | 2 +- fs/xfs/scrub/dirtree_repair.c | 2 +- fs/xfs/scrub/dqiterate.c | 2 +- fs/xfs/scrub/findparent.c | 2 +- fs/xfs/scrub/fscounters.c | 2 +- fs/xfs/scrub/fscounters_repair.c | 2 +- fs/xfs/scrub/health.c | 2 +- fs/xfs/scrub/ialloc.c | 2 +- fs/xfs/scrub/ialloc_repair.c | 2 +- fs/xfs/scrub/inode.c | 2 +- fs/xfs/scrub/inode_repair.c | 2 +- fs/xfs/scrub/iscan.c | 2 +- fs/xfs/scrub/listxattr.c | 2 +- fs/xfs/scrub/metapath.c | 2 +- fs/xfs/scrub/newbt.c | 2 +- fs/xfs/scrub/nlinks.c | 2 +- fs/xfs/scrub/nlinks_repair.c | 2 +- fs/xfs/scrub/orphanage.c | 2 +- fs/xfs/scrub/parent.c | 2 +- fs/xfs/scrub/parent_repair.c | 2 +- fs/xfs/scrub/quota.c | 2 +- fs/xfs/scrub/quota_repair.c | 2 +- fs/xfs/scrub/quotacheck.c | 2 +- fs/xfs/scrub/quotacheck_repair.c | 2 +- fs/xfs/scrub/rcbag.c | 2 +- fs/xfs/scrub/rcbag_btree.c | 2 +- fs/xfs/scrub/readdir.c | 2 +- fs/xfs/scrub/reap.c | 2 +- fs/xfs/scrub/refcount.c | 2 +- fs/xfs/scrub/refcount_repair.c | 2 +- fs/xfs/scrub/repair.c | 2 +- fs/xfs/scrub/rgsuper.c | 2 +- fs/xfs/scrub/rmap.c | 2 +- fs/xfs/scrub/rmap_repair.c | 2 +- fs/xfs/scrub/rtbitmap.c | 2 +- fs/xfs/scrub/rtbitmap_repair.c | 2 +- fs/xfs/scrub/rtrefcount.c | 2 +- fs/xfs/scrub/rtrefcount_repair.c | 2 +- fs/xfs/scrub/rtrmap.c | 2 +- fs/xfs/scrub/rtrmap_repair.c | 2 +- fs/xfs/scrub/rtsummary.c | 2 +- fs/xfs/scrub/rtsummary_repair.c | 2 +- fs/xfs/scrub/scrub.c | 2 +- fs/xfs/scrub/stats.c | 2 +- fs/xfs/scrub/symlink.c | 2 +- fs/xfs/scrub/symlink_repair.c | 2 +- fs/xfs/scrub/tempfile.c | 2 +- fs/xfs/scrub/trace.c | 2 +- fs/xfs/scrub/xfarray.c | 2 +- fs/xfs/scrub/xfblob.c | 2 +- fs/xfs/scrub/xfile.c | 2 +- fs/xfs/xfs.h | 11 ----------- fs/xfs/xfs_acl.c | 2 +- fs/xfs/xfs_aops.c | 2 +- fs/xfs/xfs_attr_inactive.c | 2 +- fs/xfs/xfs_attr_item.c | 2 +- fs/xfs/xfs_attr_list.c | 2 +- fs/xfs/xfs_bio_io.c | 2 +- fs/xfs/xfs_bmap_item.c | 2 +- fs/xfs/xfs_bmap_util.c | 2 +- fs/xfs/xfs_buf.c | 2 +- fs/xfs/xfs_buf_item.c | 2 +- fs/xfs/xfs_buf_item_recover.c | 2 +- fs/xfs/xfs_buf_mem.c | 2 +- fs/xfs/xfs_dahash_test.c | 2 +- fs/xfs/xfs_dir2_readdir.c | 2 +- fs/xfs/xfs_discard.c | 2 +- fs/xfs/xfs_dquot.c | 2 +- fs/xfs/xfs_dquot_item.c | 2 +- fs/xfs/xfs_dquot_item_recover.c | 2 +- fs/xfs/xfs_drain.c | 2 +- fs/xfs/xfs_error.c | 2 +- fs/xfs/xfs_exchmaps_item.c | 2 +- fs/xfs/xfs_exchrange.c | 2 +- fs/xfs/xfs_export.c | 2 +- fs/xfs/xfs_extent_busy.c | 2 +- fs/xfs/xfs_extfree_item.c | 2 +- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_filestream.c | 2 +- fs/xfs/xfs_fsmap.c | 2 +- fs/xfs/xfs_fsops.c | 2 +- fs/xfs/xfs_globals.c | 2 +- fs/xfs/xfs_handle.c | 2 +- fs/xfs/xfs_health.c | 2 +- fs/xfs/xfs_hooks.c | 2 +- fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_icreate_item.c | 2 +- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_inode_item.c | 2 +- fs/xfs/xfs_inode_item_recover.c | 2 +- fs/xfs/xfs_ioctl.c | 2 +- fs/xfs/xfs_ioctl32.c | 2 +- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_itable.c | 2 +- fs/xfs/xfs_iunlink_item.c | 2 +- fs/xfs/xfs_iwalk.c | 2 +- fs/xfs/xfs_log.c | 2 +- fs/xfs/xfs_log_cil.c | 2 +- fs/xfs/xfs_log_recover.c | 2 +- fs/xfs/xfs_message.c | 2 +- fs/xfs/xfs_mount.c | 2 +- fs/xfs/xfs_mru_cache.c | 2 +- fs/xfs/xfs_notify_failure.c | 2 +- fs/xfs/xfs_pnfs.c | 2 +- fs/xfs/xfs_pwork.c | 2 +- fs/xfs/xfs_qm.c | 2 +- fs/xfs/xfs_qm_bhv.c | 2 +- fs/xfs/xfs_qm_syscalls.c | 2 +- fs/xfs/xfs_quotaops.c | 2 +- fs/xfs/xfs_refcount_item.c | 2 +- fs/xfs/xfs_reflink.c | 2 +- fs/xfs/xfs_rmap_item.c | 2 +- fs/xfs/xfs_rtalloc.c | 2 +- fs/xfs/xfs_stats.c | 2 +- fs/xfs/xfs_super.c | 2 +- fs/xfs/xfs_symlink.c | 2 +- fs/xfs/xfs_sysctl.c | 2 +- fs/xfs/xfs_sysfs.c | 2 +- fs/xfs/xfs_trace.c | 2 +- fs/xfs/xfs_trans.c | 2 +- fs/xfs/xfs_trans_ail.c | 2 +- fs/xfs/xfs_trans_buf.c | 2 +- fs/xfs/xfs_trans_dquot.c | 2 +- fs/xfs/xfs_xattr.c | 2 +- fs/xfs/xfs_zone_alloc.c | 2 +- fs/xfs/xfs_zone_gc.c | 2 +- fs/xfs/xfs_zone_info.c | 2 +- fs/xfs/xfs_zone_space_resv.c | 2 +- 194 files changed, 193 insertions(+), 204 deletions(-) delete mode 100644 fs/xfs/xfs.h diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index e6ba914f6d06..586918ed1cbf 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -5,7 +5,7 @@ * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 8ac8230c3d3c..c4cdcc570d61 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ad381c73abc4..5bec3365a99a 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index fa1f03c1331e..29f6ec1c3f6f 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 8c04acd30d48..866abae58fe1 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 91c1b30ebaab..6061230b17ef 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index bff3dc226f81..e6c8dd1a997a 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c index 40ce5f3094d1..f05a07c0f75d 100644 --- a/fs/xfs/libxfs/xfs_bit.c +++ b/fs/xfs/libxfs/xfs_bit.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_log_format.h" #include "xfs_bit.h" diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 53ef4b7e504d..7a4c8f1aa76c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 188feac04b60..1c7165df483a 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index dbe9df8c3300..7012f3570c8d 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c index f2f7b4305413..37136a70e56d 100644 --- a/fs/xfs/libxfs/xfs_btree_mem.c +++ b/fs/xfs/libxfs/xfs_btree_mem.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index 5ed84f9cc877..4300c058807b 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -3,7 +3,7 @@ * Copyright (C) 2020 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 90f7fc219fcc..766631f0562e 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 5b377cbbb1f7..0bd87b40d091 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 82a338458a51..107c1a5b8a96 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 0f93ed1a4a74..6d70e6b429e7 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index a16b05c43e2e..80ba94f51e5c 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 71c2f22a3f6e..bc909543eb74 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index fe8d4fa13128..ed0b5287a44f 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 17a20384c8b7..1a67cdd6a707 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index dceef2abd4e2..ce767b40482f 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c index 932ee4619e9e..5d28f4eac527 100644 --- a/fs/xfs/libxfs/xfs_exchmaps.c +++ b/fs/xfs/libxfs/xfs_exchmaps.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c index 792f76d2e2a0..2ff9d1e56b47 100644 --- a/fs/xfs/libxfs/xfs_group.c +++ b/fs/xfs/libxfs/xfs_group.c @@ -3,7 +3,7 @@ * Copyright (c) 2018 Red Hat, Inc. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index c19d6d713780..dcef06ec0a02 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 100afdd66cdd..1376e8630449 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 8796f2b3e534..5b2b926ab228 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -3,7 +3,7 @@ * Copyright (c) 2017 Christoph Hellwig. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_bit.h" diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index b1812b2c3cce..a017016e9075 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 1772d82f2d68..d14a7f2f4c03 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -4,7 +4,7 @@ * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index 309ce6dd5553..551fa51befb6 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -4,7 +4,7 @@ * All Rights Reserved. */ #include -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 34bba96d30ca..37712b2f8757 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -3,7 +3,7 @@ * Copyright (c) 2013 Jie Liu. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c index 178e89711cb7..3e5c61188927 100644 --- a/fs/xfs/libxfs/xfs_metadir.c +++ b/fs/xfs/libxfs/xfs_metadir.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c index b02e3d6c0868..cf239f862212 100644 --- a/fs/xfs/libxfs/xfs_metafile.c +++ b/fs/xfs/libxfs/xfs_metafile.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index 69366c44a701..6539f5adae2d 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_da_format.h" diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 2484dc9f6d7e..915ec85530e7 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 06da3ca14727..7e5f92c1ac56 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 83e0488ff773..e78133c908ca 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -3,7 +3,7 @@ * Copyright (c) 2014 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index bf16aee50d73..10b3272238eb 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2014 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 618061d898d4..bc4c0a99f4dd 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c index be16efaa6925..09328f2d1575 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.c +++ b/fs/xfs/libxfs/xfs_rtgroup.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rtrefcount_btree.c b/fs/xfs/libxfs/xfs_rtrefcount_btree.c index ac11e94b42ae..c1518267eb17 100644 --- a/fs/xfs/libxfs/xfs_rtrefcount_btree.c +++ b/fs/xfs/libxfs/xfs_rtrefcount_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c index 55f903165769..00557b7ef298 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.c +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 94c272a2ae26..38d16fe1f6d8 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index fb47a76ead18..f9a5966d8048 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -4,7 +4,7 @@ * Copyright (c) 2012-2013 Red Hat, Inc. * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index c962ad64b0c1..1a0fdcbf39fa 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -3,7 +3,7 @@ * Copyright (c) 2000,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 86a111d0f2fc..3151e97ca8ff 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -4,7 +4,7 @@ * Copyright (C) 2010 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c index b9dc3752f702..9b8f495c9049 100644 --- a/fs/xfs/libxfs/xfs_trans_space.c +++ b/fs/xfs/libxfs/xfs_trans_space.c @@ -3,7 +3,7 @@ * Copyright (c) 2000,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 1faf04204c5d..67c947a47f14 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -4,7 +4,7 @@ * Copyright (C) 2017 Oracle. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_shared.h" diff --git a/fs/xfs/libxfs/xfs_zones.c b/fs/xfs/libxfs/xfs_zones.c index b40f71f878b5..8c3c67caf64e 100644 --- a/fs/xfs/libxfs/xfs_zones.c +++ b/fs/xfs/libxfs/xfs_zones.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/agb_bitmap.c b/fs/xfs/scrub/agb_bitmap.c index 573e4e062754..0194e3aaa1fa 100644 --- a/fs/xfs/scrub/agb_bitmap.c +++ b/fs/xfs/scrub/agb_bitmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_bit.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 303374df44bd..7ffe4b0ef0f1 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index cd6f0223879f..1c09948d841e 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 8b282138097f..48edaa2cb1e0 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c index bed6a09aa791..d84777e23321 100644 --- a/fs/xfs/scrub/alloc_repair.c +++ b/fs/xfs/scrub/alloc_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 708334f9b2bd..eeb5ac34d742 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 09d63aa10314..1da1354f5e06 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index 7ba35a7a7920..51f3171bc6c8 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_bit.h" diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 4f1e2574660d..d40534bf9ef9 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 1084213b8e9b..1d1056d447e0 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index cd6f0ff382a7..8ba004979862 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 7bfa37c99480..38d0b7d5c894 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c index b2a83801412e..33749cf43520 100644 --- a/fs/xfs/scrub/cow_repair.c +++ b/fs/xfs/scrub/cow_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 056de4819f86..dd14f355358c 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index c877bde71e62..1d98775b4b17 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 8d3b550990b5..d54206f674e2 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c index 3a9cdf8738b6..529dae105e57 100644 --- a/fs/xfs/scrub/dirtree.c +++ b/fs/xfs/scrub/dirtree.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/dirtree_repair.c b/fs/xfs/scrub/dirtree_repair.c index 5c04e70ba951..019feaf0d606 100644 --- a/fs/xfs/scrub/dirtree_repair.c +++ b/fs/xfs/scrub/dirtree_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/dqiterate.c b/fs/xfs/scrub/dqiterate.c index 20c4daedd48d..10950e4bd4c3 100644 --- a/fs/xfs/scrub/dqiterate.c +++ b/fs/xfs/scrub/dqiterate.c @@ -3,7 +3,7 @@ * Copyright (C) 2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_bit.h" diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c index 84487072b6dd..2076f028d271 100644 --- a/fs/xfs/scrub/findparent.c +++ b/fs/xfs/scrub/findparent.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index cebd0d526926..b35f65b537ba 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -3,7 +3,7 @@ * Copyright (C) 2019-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c index f0d2b04644e4..783e409f8f3c 100644 --- a/fs/xfs/scrub/fscounters_repair.c +++ b/fs/xfs/scrub/fscounters_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 3c0f25098b69..2171bcf0f6c1 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -3,7 +3,7 @@ * Copyright (C) 2019-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 4dc7c83dc08a..911dc0f9a79d 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c index 14e48d3f1912..bccf2e18d43e 100644 --- a/fs/xfs/scrub/ialloc_repair.c +++ b/fs/xfs/scrub/ialloc_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index bb3f475b6353..948d04dcba2a 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 4f7040c9ddf0..bf182a18f115 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/iscan.c b/fs/xfs/scrub/iscan.c index 84f117667ca2..2a974eed00cc 100644 --- a/fs/xfs/scrub/iscan.c +++ b/fs/xfs/scrub/iscan.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/listxattr.c b/fs/xfs/scrub/listxattr.c index 256ff7700c94..0863db64b1b2 100644 --- a/fs/xfs/scrub/listxattr.c +++ b/fs/xfs/scrub/listxattr.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c index 378ec7c8d38e..3d9de59c1758 100644 --- a/fs/xfs/scrub/metapath.c +++ b/fs/xfs/scrub/metapath.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c index 951ae8b71566..43e868f829aa 100644 --- a/fs/xfs/scrub/newbt.c +++ b/fs/xfs/scrub/newbt.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 091c79e432e5..8bf0bff64b41 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c index 6ef2ee9c3814..9049215c6eae 100644 --- a/fs/xfs/scrub/nlinks_repair.c +++ b/fs/xfs/scrub/nlinks_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 4e550a1d5353..52a108f6d5f4 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 11d5de10fd56..36d505f3e40b 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index 2949feda6271..512a546f8ce1 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 5c5374c44c5a..1d25bd5b892e 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_bit.h" diff --git a/fs/xfs/scrub/quota_repair.c b/fs/xfs/scrub/quota_repair.c index b1d661aa5f06..487bd4f68ebb 100644 --- a/fs/xfs/scrub/quota_repair.c +++ b/fs/xfs/scrub/quota_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c index d412a8359784..00e0c0e56d82 100644 --- a/fs/xfs/scrub/quotacheck.c +++ b/fs/xfs/scrub/quotacheck.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/quotacheck_repair.c b/fs/xfs/scrub/quotacheck_repair.c index 51be8d8d261b..dbb522e1513b 100644 --- a/fs/xfs/scrub/quotacheck_repair.c +++ b/fs/xfs/scrub/quotacheck_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rcbag.c b/fs/xfs/scrub/rcbag.c index e1e52bc20713..c1a97a073d92 100644 --- a/fs/xfs/scrub/rcbag.c +++ b/fs/xfs/scrub/rcbag.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rcbag_btree.c b/fs/xfs/scrub/rcbag_btree.c index 9a4ef823c5a7..367f8ccf55c4 100644 --- a/fs/xfs/scrub/rcbag_btree.c +++ b/fs/xfs/scrub/rcbag_btree.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/readdir.c b/fs/xfs/scrub/readdir.c index 01c9a2dc0f2c..c66ec9093a38 100644 --- a/fs/xfs/scrub/readdir.c +++ b/fs/xfs/scrub/readdir.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c index 07f5bb8a6421..fff23932828b 100644 --- a/fs/xfs/scrub/reap.c +++ b/fs/xfs/scrub/reap.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index d46528023015..bf87025f24fc 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c index 9c8cb5332da0..46546bf6eb13 100644 --- a/fs/xfs/scrub/refcount_repair.c +++ b/fs/xfs/scrub/refcount_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index efd5a7ccdf62..3ebe27524ce3 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rgsuper.c b/fs/xfs/scrub/rgsuper.c index d189732d0e24..482f899a518a 100644 --- a/fs/xfs/scrub/rgsuper.c +++ b/fs/xfs/scrub/rgsuper.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 39e9ad7cd8ae..2c25910e2903 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c index 17d4a38d735c..f27e3c8aa6d5 100644 --- a/fs/xfs/scrub/rmap_repair.c +++ b/fs/xfs/scrub/rmap_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index d5ff8609dbfb..4bcfd99cec17 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtbitmap_repair.c b/fs/xfs/scrub/rtbitmap_repair.c index 203a1a97c502..fd0d12db55f9 100644 --- a/fs/xfs/scrub/rtbitmap_repair.c +++ b/fs/xfs/scrub/rtbitmap_repair.c @@ -3,7 +3,7 @@ * Copyright (C) 2020-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtrefcount.c b/fs/xfs/scrub/rtrefcount.c index 4c5dffc73641..8cfe2f120b6b 100644 --- a/fs/xfs/scrub/rtrefcount.c +++ b/fs/xfs/scrub/rtrefcount.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtrefcount_repair.c b/fs/xfs/scrub/rtrefcount_repair.c index 983362447826..a092934ed371 100644 --- a/fs/xfs/scrub/rtrefcount_repair.c +++ b/fs/xfs/scrub/rtrefcount_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtrmap.c b/fs/xfs/scrub/rtrmap.c index 12989fe80e8b..8b1a8389d32f 100644 --- a/fs/xfs/scrub/rtrmap.c +++ b/fs/xfs/scrub/rtrmap.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c index 7561941a337a..5af94e48d8cf 100644 --- a/fs/xfs/scrub/rtrmap_repair.c +++ b/fs/xfs/scrub/rtrmap_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 4ac679c1bd29..712f27f6266c 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c index d593977d70df..afffbd6e0ad1 100644 --- a/fs/xfs/scrub/rtsummary_repair.c +++ b/fs/xfs/scrub/rtsummary_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 3c3b0d25006f..670ac2baae0c 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index f8a37ea97791..4efafc5ae966 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -3,7 +3,7 @@ * Copyright (C) 2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index c848bcc07cd5..91d40b9fb5c6 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c index df629892462f..25416dfb5189 100644 --- a/fs/xfs/scrub/symlink_repair.c +++ b/fs/xfs/scrub/symlink_repair.c @@ -3,7 +3,7 @@ * Copyright (c) 2018-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index cf99e0ca51b0..8d754df72aa5 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 987313a52e64..70d353287993 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -3,7 +3,7 @@ * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c index ed2e8c64b1a8..c7c4a71b6fa7 100644 --- a/fs/xfs/scrub/xfarray.c +++ b/fs/xfs/scrub/xfarray.c @@ -3,7 +3,7 @@ * Copyright (C) 2021-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c index 6ef2a9637f16..96fc360312de 100644 --- a/fs/xfs/scrub/xfblob.c +++ b/fs/xfs/scrub/xfblob.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index c753c79df203..2998c9b62f4b 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -3,7 +3,7 @@ * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h deleted file mode 100644 index 3ec52f2ec4b2..000000000000 --- a/fs/xfs/xfs.h +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - */ -#ifndef __XFS_H__ -#define __XFS_H__ - -#include "xfs_platform.h" - -#endif /* __XFS_H__ */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index c7c3dcfa2718..fdfca6fc75b6 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -3,7 +3,7 @@ * Copyright (c) 2008, Christoph Hellwig * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 56a544638491..043ab12a18ea 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -4,7 +4,7 @@ * Copyright (c) 2016-2025 Christoph Hellwig. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 319004bf089f..92331991f9fd 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index ad2956f78eca..354472bf45f1 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -4,7 +4,7 @@ * Author: Allison Henderson */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_trans_resv.h" diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 379b48d015d2..114566b1ae5c 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c index 2a736d10eafb..b87e7975b613 100644 --- a/fs/xfs/xfs_bio_io.c +++ b/fs/xfs/xfs_bio_io.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2019 Christoph Hellwig. */ -#include "xfs.h" +#include "xfs_platform.h" static inline unsigned int bio_max_vecs(unsigned int count) { diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index f38ed63fe86b..e8775f254c89 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2208a720ec3f..0ab00615f1ad 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -4,7 +4,7 @@ * Copyright (c) 2012 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 47edf3041631..db46883991de 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include #include diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index cb2a36374ed4..8487635579e5 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index e4c8af873632..77ad071ebe78 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index dcbfa274e06d..0106da0a9f44 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_buf.h" #include "xfs_buf_mem.h" diff --git a/fs/xfs/xfs_dahash_test.c b/fs/xfs/xfs_dahash_test.c index 0dab5941e080..f1ee2643b948 100644 --- a/fs/xfs/xfs_dahash_test.c +++ b/fs/xfs/xfs_dahash_test.c @@ -3,7 +3,7 @@ * Copyright (C) 2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 06ac5a7de60a..60a80d4173f7 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index b6ffe4807a11..31477a74b523 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -3,7 +3,7 @@ * Copyright (C) 2010, 2023 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 612ca682a513..2b208e2c5264 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 8bc7f43093a2..491e2a7053a3 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c index 89bc9bcaf51e..fe419b28de22 100644 --- a/fs/xfs/xfs_dquot_item_recover.c +++ b/fs/xfs/xfs_dquot_item_recover.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c index fa5f31931efd..1ad67f6c1fbf 100644 --- a/fs/xfs/xfs_drain.c +++ b/fs/xfs/xfs_drain.c @@ -3,7 +3,7 @@ * Copyright (C) 2022-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 39830b252ac8..873f2d1a134c 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_fs.h" diff --git a/fs/xfs/xfs_exchmaps_item.c b/fs/xfs/xfs_exchmaps_item.c index 10d6fbeff651..13a42467370f 100644 --- a/fs/xfs/xfs_exchmaps_item.c +++ b/fs/xfs/xfs_exchmaps_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index 0b41bdfecdfb..5c083f29ea65 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 201489d3de08..e3e3c3c89840 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index da3161572735..cfecb2959472 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -5,7 +5,7 @@ * Copyright (c) 2011 Christoph Hellwig. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3d1edc43e6fb..749a4eb9793c 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7874cf745af3..d36a9aafa8ab 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 044918fbae06..44e1b14069a3 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -4,7 +4,7 @@ * Copyright (c) 2014 Christoph Hellwig. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index af68c7de8ee8..098c2b50bc6f 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -3,7 +3,7 @@ * Copyright (C) 2017 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 0ada73569394..b687c5fa34ac 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 566fd663c95b..60efe8246304 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_error.h" /* diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index 5a3e3bf4e7cc..d1291ca15239 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c @@ -4,7 +4,7 @@ * Copyright (c) 2022-2024 Oracle. * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 3c1557fb1cf0..12fa8f24da85 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -3,7 +3,7 @@ * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_hooks.c b/fs/xfs/xfs_hooks.c index a58d1de2d37d..a09109e692b1 100644 --- a/fs/xfs/xfs_hooks.c +++ b/fs/xfs/xfs_hooks.c @@ -3,7 +3,7 @@ * Copyright (c) 2022-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 23a920437fe4..dbaab4ae709f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 004dd22393dc..95b0eba242e9 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2008-2010, 2013 Dave Chinner * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f1f88e48fe22..50c0404f9064 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -5,7 +5,7 @@ */ #include -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 81dfe70e173d..8913036b8024 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 9d1999d41be1..5d93228783eb 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 59eaad774371..54cef912e05f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index b64785dc4354..c66e192448a8 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -5,7 +5,7 @@ */ #include #include -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 37a1b33e9045..be86d43044df 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -4,7 +4,7 @@ * Copyright (c) 2016-2018 Christoph Hellwig. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ad94fbf55014..165001c7423d 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2aa37a4d2706..9faff287f747 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_iunlink_item.c b/fs/xfs/xfs_iunlink_item.c index 1fd70a7aed63..a03a48eeb9a8 100644 --- a/fs/xfs/xfs_iunlink_item.c +++ b/fs/xfs/xfs_iunlink_item.c @@ -3,7 +3,7 @@ * Copyright (c) 2020-2022, Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index c1c31d1a8e21..ed4033006868 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -3,7 +3,7 @@ * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 8ddd25970471..a26378ca247d 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index bc25012ac5c0..566976b8fef3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -3,7 +3,7 @@ * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 03e42c7dab56..94e8598056eb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 19aba2c3d525..16bdc38887ab 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -3,7 +3,7 @@ * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_error.h" #include "xfs_shared.h" diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 0953f6ae94ab..4b0046483ca6 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 73b7e72944e4..4e417747688f 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -3,7 +3,7 @@ * Copyright (c) 2006-2007 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_mru_cache.h" /* diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index b17672889942..a6a34dc2c028 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -3,7 +3,7 @@ * Copyright (c) 2022 Fujitsu. All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index afe7497012d4..221e55887a2a 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2014 Christoph Hellwig. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c index c283b801cc5d..7c79ab0db0e2 100644 --- a/fs/xfs/xfs_pwork.c +++ b/fs/xfs/xfs_pwork.c @@ -3,7 +3,7 @@ * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 95be67ac6eb4..a3e7d4a107d4 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index edc0aef3cf34..a094b8252ffd 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 022e2179c06b..d50b7318cb5c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -5,7 +5,7 @@ */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 94fbe3d99ec7..8804508cc2b8 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -3,7 +3,7 @@ * Copyright (c) 2008, Christoph Hellwig * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index a41f5b577e22..881c3f3a6a24 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 3f177b4ec131..db23a0f231d6 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 8bf04b101156..a39fe08dcd8f 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -3,7 +3,7 @@ * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a12ffed12391..90a94a5b6f7e 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 35c7fb3ba324..9781222e0653 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" struct xstats xfsstats; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bc71aa9dcee8..845abf437bf5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -4,7 +4,7 @@ * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 4252b07cd251..c4da624fb296 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -4,7 +4,7 @@ * Copyright (c) 2012-2013 Red Hat, Inc. * All rights reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_fs.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 9918f14b4874..7f32d282dc88 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -3,7 +3,7 @@ * Copyright (c) 2001-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_error.h" static struct ctl_table_header *xfs_table_header; diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 7a5c5ef2db92..6c7909838234 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -4,7 +4,7 @@ * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index a60556dbd172..478aebb60411 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -3,7 +3,7 @@ * Copyright (c) 2009, Christoph Hellwig * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_bit.h" diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 474f5a04ec63..2c3c29d0d4a0 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -4,7 +4,7 @@ * Copyright (C) 2010 Red Hat, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 38983c6777df..363d7f88c2c6 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -4,7 +4,7 @@ * Copyright (c) 2008 Dave Chinner * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 53af546c0b23..95db73a37e57 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index c842ce06acd6..eaf9de6e07fd 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -3,7 +3,7 @@ * Copyright (c) 2000-2002 Silicon Graphics, Inc. * All Rights Reserved. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ac5cecec9aa1..a735f16d9cd8 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -4,7 +4,7 @@ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index bbcf21704ea0..4ca7769b5adb 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 3c52cc1497d4..446d7cd1545c 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" diff --git a/fs/xfs/xfs_zone_info.c b/fs/xfs/xfs_zone_info.c index 07e30c596975..53eabbc3334c 100644 --- a/fs/xfs/xfs_zone_info.c +++ b/fs/xfs/xfs_zone_info.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" diff --git a/fs/xfs/xfs_zone_space_resv.c b/fs/xfs/xfs_zone_space_resv.c index fc1a4d1ce10c..5c6e6ef627e4 100644 --- a/fs/xfs/xfs_zone_space_resv.c +++ b/fs/xfs/xfs_zone_space_resv.c @@ -3,7 +3,7 @@ * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ -#include "xfs.h" +#include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" From 7ca44303f9f6160a2f87ae3d5d2326d9127cd61c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2026 14:06:41 +0100 Subject: [PATCH 25/67] block: add a bio_reuse helper Add a helper to allow an existing bio to be resubmitted without having to re-add the payload. Signed-off-by: Christoph Hellwig Reviewed-by: Jens Axboe Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Reviewed-by: Damien Le Moal Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- block/bio.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/bio.h | 1 + 2 files changed, 35 insertions(+) diff --git a/block/bio.c b/block/bio.c index e726c0e280a8..40f690985bfb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -311,6 +311,40 @@ void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf) } EXPORT_SYMBOL(bio_reset); +/** + * bio_reuse - reuse a bio with the payload left intact + * @bio: bio to reuse + * @opf: operation and flags for the next I/O + * + * Allow reusing an existing bio for another operation with all set up + * fields including the payload, device and end_io handler left intact. + * + * Typically used when @bio is first used to read data which is then written + * to another location without modification. @bio must not be in-flight and + * owned by the caller. Can't be used for cloned bios. + * + * Note: Can't be used when @bio has integrity or blk-crypto contexts for now. + * Feel free to add that support when you need it, though. + */ +void bio_reuse(struct bio *bio, blk_opf_t opf) +{ + unsigned short vcnt = bio->bi_vcnt, i; + bio_end_io_t *end_io = bio->bi_end_io; + void *private = bio->bi_private; + + WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); + WARN_ON_ONCE(bio_integrity(bio)); + WARN_ON_ONCE(bio_has_crypt_ctx(bio)); + + bio_reset(bio, bio->bi_bdev, opf); + for (i = 0; i < vcnt; i++) + bio->bi_iter.bi_size += bio->bi_io_vec[i].bv_len; + bio->bi_vcnt = vcnt; + bio->bi_private = private; + bio->bi_end_io = end_io; +} +EXPORT_SYMBOL_GPL(bio_reuse); + static struct bio *__bio_chain_endio(struct bio *bio) { struct bio *parent = bio->bi_private; diff --git a/include/linux/bio.h b/include/linux/bio.h index c75a9b3672aa..6156f2d66d4a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -414,6 +414,7 @@ static inline void bio_init_inline(struct bio *bio, struct block_device *bdev, } extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); +void bio_reuse(struct bio *bio, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, From 0506d32f7c52e41f6e8db7c337e0ce6374c6ffbb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2026 14:06:42 +0100 Subject: [PATCH 26/67] xfs: use bio_reuse in the zone GC code Replace our somewhat fragile code to reuse the bio, which caused a regression in the past with the block layer bio_reuse helper. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 446d7cd1545c..4d8507fd05e6 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -811,8 +811,6 @@ xfs_zone_gc_write_chunk( { struct xfs_zone_gc_data *data = chunk->data; struct xfs_mount *mp = chunk->ip->i_mount; - phys_addr_t bvec_paddr = - bvec_phys(bio_first_bvec_all(&chunk->bio)); struct xfs_gc_bio *split_chunk; if (chunk->bio.bi_status) @@ -825,10 +823,7 @@ xfs_zone_gc_write_chunk( WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_move_tail(&chunk->entry, &data->writing); - bio_reset(&chunk->bio, mp->m_rtdev_targp->bt_bdev, REQ_OP_WRITE); - bio_add_folio_nofail(&chunk->bio, chunk->scratch->folio, chunk->len, - offset_in_folio(chunk->scratch->folio, bvec_paddr)); - + bio_reuse(&chunk->bio, REQ_OP_WRITE); while ((split_chunk = xfs_zone_gc_split_write(data, chunk))) xfs_zone_gc_submit_write(data, split_chunk); xfs_zone_gc_submit_write(data, chunk); From 102f444b57b35e41b04a5c8192fcdacb467c9161 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2026 14:06:43 +0100 Subject: [PATCH 27/67] xfs: rework zone GC buffer management The double buffering where just one scratch area is used at a time does not efficiently use the available memory. It was originally implemented when GC I/O could happen out of order, but that was removed before upstream submission to avoid fragmentation. Now that all GC I/Os are processed in order, just use a number of buffers as a simple ring buffer. For a synthetic benchmark that fills 256MiB HDD zones and punches out holes to free half the space this leads to a decrease of GC time by a little more than 25%. Thanks to Hans Holmberg for testing and benchmarking. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 106 ++++++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 47 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 4d8507fd05e6..dfa6653210c7 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -50,23 +50,11 @@ */ /* - * Size of each GC scratch pad. This is also the upper bound for each - * GC I/O, which helps to keep latency down. + * Size of each GC scratch allocation, and the number of buffers. */ -#define XFS_GC_CHUNK_SIZE SZ_1M - -/* - * Scratchpad data to read GCed data into. - * - * The offset member tracks where the next allocation starts, and freed tracks - * the amount of space that is not used anymore. - */ -#define XFS_ZONE_GC_NR_SCRATCH 2 -struct xfs_zone_scratch { - struct folio *folio; - unsigned int offset; - unsigned int freed; -}; +#define XFS_GC_BUF_SIZE SZ_1M +#define XFS_GC_NR_BUFS 2 +static_assert(XFS_GC_NR_BUFS < BIO_MAX_VECS); /* * Chunk that is read and written for each GC operation. @@ -141,10 +129,14 @@ struct xfs_zone_gc_data { struct bio_set bio_set; /* - * Scratchpad used, and index to indicated which one is used. + * Scratchpad to buffer GC data, organized as a ring buffer over + * discontiguous folios. scratch_head is where the buffer is filled, + * and scratch_tail tracks the buffer space freed. */ - struct xfs_zone_scratch scratch[XFS_ZONE_GC_NR_SCRATCH]; - unsigned int scratch_idx; + struct folio *scratch_folios[XFS_GC_NR_BUFS]; + unsigned int scratch_size; + unsigned int scratch_head; + unsigned int scratch_tail; /* * List of bios currently being read, written and reset. @@ -210,20 +202,16 @@ xfs_zone_gc_data_alloc( if (!data->iter.recs) goto out_free_data; - /* - * We actually only need a single bio_vec. It would be nice to have - * a flag that only allocates the inline bvecs and not the separate - * bvec pool. - */ if (bioset_init(&data->bio_set, 16, offsetof(struct xfs_gc_bio, bio), BIOSET_NEED_BVECS)) goto out_free_recs; - for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) { - data->scratch[i].folio = - folio_alloc(GFP_KERNEL, get_order(XFS_GC_CHUNK_SIZE)); - if (!data->scratch[i].folio) + for (i = 0; i < XFS_GC_NR_BUFS; i++) { + data->scratch_folios[i] = + folio_alloc(GFP_KERNEL, get_order(XFS_GC_BUF_SIZE)); + if (!data->scratch_folios[i]) goto out_free_scratch; } + data->scratch_size = XFS_GC_BUF_SIZE * XFS_GC_NR_BUFS; INIT_LIST_HEAD(&data->reading); INIT_LIST_HEAD(&data->writing); INIT_LIST_HEAD(&data->resetting); @@ -232,7 +220,7 @@ xfs_zone_gc_data_alloc( out_free_scratch: while (--i >= 0) - folio_put(data->scratch[i].folio); + folio_put(data->scratch_folios[i]); bioset_exit(&data->bio_set); out_free_recs: kfree(data->iter.recs); @@ -247,8 +235,8 @@ xfs_zone_gc_data_free( { int i; - for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) - folio_put(data->scratch[i].folio); + for (i = 0; i < XFS_GC_NR_BUFS; i++) + folio_put(data->scratch_folios[i]); bioset_exit(&data->bio_set); kfree(data->iter.recs); kfree(data); @@ -590,7 +578,12 @@ static unsigned int xfs_zone_gc_scratch_available( struct xfs_zone_gc_data *data) { - return XFS_GC_CHUNK_SIZE - data->scratch[data->scratch_idx].offset; + if (!data->scratch_tail) + return data->scratch_size - data->scratch_head; + + if (!data->scratch_head) + return data->scratch_tail; + return (data->scratch_size - data->scratch_head) + data->scratch_tail; } static bool @@ -664,6 +657,28 @@ xfs_zone_gc_alloc_blocks( return oz; } +static void +xfs_zone_gc_add_data( + struct xfs_gc_bio *chunk) +{ + struct xfs_zone_gc_data *data = chunk->data; + unsigned int len = chunk->len; + unsigned int off = data->scratch_head; + + do { + unsigned int this_off = off % XFS_GC_BUF_SIZE; + unsigned int this_len = min(len, XFS_GC_BUF_SIZE - this_off); + + bio_add_folio_nofail(&chunk->bio, + data->scratch_folios[off / XFS_GC_BUF_SIZE], + this_len, this_off); + len -= this_len; + off += this_len; + if (off == data->scratch_size) + off = 0; + } while (len); +} + static bool xfs_zone_gc_start_chunk( struct xfs_zone_gc_data *data) @@ -677,6 +692,7 @@ xfs_zone_gc_start_chunk( struct xfs_inode *ip; struct bio *bio; xfs_daddr_t daddr; + unsigned int len; bool is_seq; if (xfs_is_shutdown(mp)) @@ -691,17 +707,19 @@ xfs_zone_gc_start_chunk( return false; } - bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ, GFP_NOFS, &data->bio_set); + len = XFS_FSB_TO_B(mp, irec.rm_blockcount); + bio = bio_alloc_bioset(bdev, + min(howmany(len, XFS_GC_BUF_SIZE) + 1, XFS_GC_NR_BUFS), + REQ_OP_READ, GFP_NOFS, &data->bio_set); chunk = container_of(bio, struct xfs_gc_bio, bio); chunk->ip = ip; chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset); - chunk->len = XFS_FSB_TO_B(mp, irec.rm_blockcount); + chunk->len = len; chunk->old_startblock = xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock); chunk->new_daddr = daddr; chunk->is_seq = is_seq; - chunk->scratch = &data->scratch[data->scratch_idx]; chunk->data = data; chunk->oz = oz; chunk->victim_rtg = iter->victim_rtg; @@ -710,13 +728,9 @@ xfs_zone_gc_start_chunk( bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); bio->bi_end_io = xfs_zone_gc_end_io; - bio_add_folio_nofail(bio, chunk->scratch->folio, chunk->len, - chunk->scratch->offset); - chunk->scratch->offset += chunk->len; - if (chunk->scratch->offset == XFS_GC_CHUNK_SIZE) { - data->scratch_idx = - (data->scratch_idx + 1) % XFS_ZONE_GC_NR_SCRATCH; - } + xfs_zone_gc_add_data(chunk); + data->scratch_head = (data->scratch_head + len) % data->scratch_size; + WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_add_tail(&chunk->entry, &data->reading); xfs_zone_gc_iter_advance(iter, irec.rm_blockcount); @@ -834,6 +848,7 @@ xfs_zone_gc_finish_chunk( struct xfs_gc_bio *chunk) { uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + struct xfs_zone_gc_data *data = chunk->data; struct xfs_inode *ip = chunk->ip; struct xfs_mount *mp = ip->i_mount; int error; @@ -845,11 +860,8 @@ xfs_zone_gc_finish_chunk( return; } - chunk->scratch->freed += chunk->len; - if (chunk->scratch->freed == chunk->scratch->offset) { - chunk->scratch->offset = 0; - chunk->scratch->freed = 0; - } + data->scratch_tail = + (data->scratch_tail + chunk->len) % data->scratch_size; /* * Cycle through the iolock and wait for direct I/O and layouts to From f39854a3fb2f06dc69b81ada002b641ba5b4696b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 18 Dec 2025 18:40:50 -0800 Subject: [PATCH 28/67] xfs: mark data structures corrupt on EIO and ENODATA I learned a few things this year: first, blk_status_to_errno can return ENODATA for critical media errors; and second, the scrub code doesn't mark data structures as corrupt on ENODATA or EIO. Currently, scrub failing to capture these errors isn't all that impactful -- the checking code will exit to userspace with EIO/ENODATA, and xfs_scrub will log a complaint and exit with nonzero status. Most people treat fsck tools failing as a sign that the fs is corrupt, but online fsck should mark the metadata bad and keep moving. Cc: stable@vger.kernel.org # v4.15 Fixes: 4700d22980d459 ("xfs: create helpers to record and deal with scrub problems") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/btree.c | 2 ++ fs/xfs/scrub/common.c | 4 ++++ fs/xfs/scrub/dabtree.c | 2 ++ 3 files changed, 8 insertions(+) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 8ba004979862..40f36db9f07d 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -42,6 +42,8 @@ __xchk_btree_process_error( break; case -EFSBADCRC: case -EFSCORRUPTED: + case -EIO: + case -ENODATA: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; *error = 0; diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 38d0b7d5c894..affed35a8c96 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -103,6 +103,8 @@ __xchk_process_error( break; case -EFSBADCRC: case -EFSCORRUPTED: + case -EIO: + case -ENODATA: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; *error = 0; @@ -177,6 +179,8 @@ __xchk_fblock_process_error( break; case -EFSBADCRC: case -EFSCORRUPTED: + case -EIO: + case -ENODATA: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; *error = 0; diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index dd14f355358c..5858d4d5e279 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -45,6 +45,8 @@ xchk_da_process_error( break; case -EFSBADCRC: case -EFSCORRUPTED: + case -EIO: + case -ENODATA: /* Note the badness but don't abort. */ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; *error = 0; From 3a65ea768b8094e4699e72f9ab420eb9e0f3f568 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Jan 2026 16:17:40 +0100 Subject: [PATCH 29/67] xfs: remove xfs_attr_leaf_hasname The calling convention of xfs_attr_leaf_hasname() is problematic, because it returns a NULL buffer when xfs_attr3_leaf_read fails, a valid buffer when xfs_attr3_leaf_lookup_int returns -ENOATTR or -EEXIST, and a non-NULL buffer pointer for an already released buffer when xfs_attr3_leaf_lookup_int fails with other error values. Fix this by simply open coding xfs_attr_leaf_hasname in the callers, so that the buffer release code is done by each caller of xfs_attr3_leaf_read. Cc: stable@vger.kernel.org # v5.19+ Fixes: 07120f1abdff ("xfs: Add xfs_has_attr and subroutines") Reported-by: Mark Tinguely Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_attr.c | 75 +++++++++++++--------------------------- 1 file changed, 24 insertions(+), 51 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 866abae58fe1..9e6b18d6ae00 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -50,7 +50,6 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); */ STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); -STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp); /* * Internal routines when attribute list is more than one block. @@ -979,11 +978,12 @@ xfs_attr_lookup( return error; if (xfs_attr_is_leaf(dp)) { - error = xfs_attr_leaf_hasname(args, &bp); - - if (bp) - xfs_trans_brelse(args->trans, bp); - + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, + 0, &bp); + if (error) + return error; + error = xfs_attr3_leaf_lookup_int(bp, args); + xfs_trans_brelse(args->trans, bp); return error; } @@ -1222,27 +1222,6 @@ xfs_attr_shortform_addname( * External routines when attribute list is one block *========================================================================*/ -/* - * Return EEXIST if attr is found, or ENOATTR if not - */ -STATIC int -xfs_attr_leaf_hasname( - struct xfs_da_args *args, - struct xfs_buf **bp) -{ - int error = 0; - - error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, bp); - if (error) - return error; - - error = xfs_attr3_leaf_lookup_int(*bp, args); - if (error != -ENOATTR && error != -EEXIST) - xfs_trans_brelse(args->trans, *bp); - - return error; -} - /* * Remove a name from the leaf attribute list structure * @@ -1253,25 +1232,22 @@ STATIC int xfs_attr_leaf_removename( struct xfs_da_args *args) { - struct xfs_inode *dp; - struct xfs_buf *bp; + struct xfs_inode *dp = args->dp; int error, forkoff; + struct xfs_buf *bp; trace_xfs_attr_leaf_removename(args); - /* - * Remove the attribute. - */ - dp = args->dp; - - error = xfs_attr_leaf_hasname(args, &bp); - if (error == -ENOATTR) { + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); + if (error) + return error; + error = xfs_attr3_leaf_lookup_int(bp, args); + if (error != -EEXIST) { xfs_trans_brelse(args->trans, bp); - if (args->op_flags & XFS_DA_OP_RECOVERY) + if (error == -ENOATTR && (args->op_flags & XFS_DA_OP_RECOVERY)) return 0; return error; - } else if (error != -EEXIST) - return error; + } xfs_attr3_leaf_remove(bp, args); @@ -1295,23 +1271,20 @@ xfs_attr_leaf_removename( * Returns 0 on successful retrieval, otherwise an error. */ STATIC int -xfs_attr_leaf_get(xfs_da_args_t *args) +xfs_attr_leaf_get( + struct xfs_da_args *args) { - struct xfs_buf *bp; - int error; + struct xfs_buf *bp; + int error; trace_xfs_attr_leaf_get(args); - error = xfs_attr_leaf_hasname(args, &bp); - - if (error == -ENOATTR) { - xfs_trans_brelse(args->trans, bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); + if (error) return error; - } else if (error != -EEXIST) - return error; - - - error = xfs_attr3_leaf_getvalue(bp, args); + error = xfs_attr3_leaf_lookup_int(bp, args); + if (error == -EEXIST) + error = xfs_attr3_leaf_getvalue(bp, args); xfs_trans_brelse(args->trans, bp); return error; } From 41263267ef26d315b1425eb9c8a8d7092f9db7c8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 14 Jan 2026 07:53:24 +0100 Subject: [PATCH 30/67] xfs: add missing forward declaration in xfs_zones.h Add the missing forward declaration for struct blk_zone in xfs_zones.h. This avoids headaches with the order of header file inclusion to avoid compilation errors. Signed-off-by: Damien Le Moal Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_zones.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/libxfs/xfs_zones.h b/fs/xfs/libxfs/xfs_zones.h index 5fefd132e002..df10a34da71d 100644 --- a/fs/xfs/libxfs/xfs_zones.h +++ b/fs/xfs/libxfs/xfs_zones.h @@ -3,6 +3,7 @@ #define _LIBXFS_ZONES_H struct xfs_rtgroup; +struct blk_zone; /* * In order to guarantee forward progress for GC we need to reserve at least From fc633b5c5b80c1d840b7a8bc2828be96582c6b55 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2026 07:53:25 +0100 Subject: [PATCH 31/67] xfs: add a xfs_rtgroup_raw_size helper Add a helper to figure the on-disk size of a group, accounting for the XFS_SB_FEAT_INCOMPAT_ZONE_GAPS feature if needed. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_rtgroup.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h index 73cace4d25c7..c0b9f9f2c413 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.h +++ b/fs/xfs/libxfs/xfs_rtgroup.h @@ -371,4 +371,19 @@ xfs_rtgs_to_rfsbs( return xfs_groups_to_rfsbs(mp, nr_groups, XG_TYPE_RTG); } +/* + * Return the "raw" size of a group on the hardware device. This includes the + * daddr gaps present for XFS_SB_FEAT_INCOMPAT_ZONE_GAPS file systems. + */ +static inline xfs_rgblock_t +xfs_rtgroup_raw_size( + struct xfs_mount *mp) +{ + struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG]; + + if (g->has_daddr_gaps) + return 1U << g->blklog; + return g->blocks; +} + #endif /* __LIBXFS_RTGROUP_H */ From 776b76f7547fb839954aae06f58ac7b6b35c0b25 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2026 07:53:26 +0100 Subject: [PATCH 32/67] xfs: pass the write pointer to xfs_init_zone Move the two methods to query the write pointer out of xfs_init_zone into the callers, so that xfs_init_zone doesn't have to bother with the blk_zone structure and instead operates purely at the XFS realtime group level. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 66 +++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 4ca7769b5adb..335e683436d3 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -981,43 +981,43 @@ struct xfs_init_zones { uint64_t reclaimable; }; +/* + * For sequential write required zones, we restart writing at the hardware write + * pointer returned by xfs_zone_validate(). + * + * For conventional zones or conventional devices we have to query the rmap to + * find the highest recorded block and set the write pointer to the block after + * that. In case of a power loss this misses blocks where the data I/O has + * completed but not recorded in the rmap yet, and it also rewrites blocks if + * the most recently written ones got deleted again before unmount, but this is + * the best we can do without hardware support. + */ +static xfs_rgblock_t +xfs_rmap_estimate_write_pointer( + struct xfs_rtgroup *rtg) +{ + xfs_rgblock_t highest_rgbno; + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); + + if (highest_rgbno == NULLRGBLOCK) + return 0; + return highest_rgbno + 1; +} + static int xfs_init_zone( struct xfs_init_zones *iz, struct xfs_rtgroup *rtg, - struct blk_zone *zone) + xfs_rgblock_t write_pointer) { struct xfs_mount *mp = rtg_mount(rtg); struct xfs_zone_info *zi = mp->m_zone_info; uint32_t used = rtg_rmap(rtg)->i_used_blocks; - xfs_rgblock_t write_pointer, highest_rgbno; int error; - if (zone && !xfs_zone_validate(zone, rtg, &write_pointer)) - return -EFSCORRUPTED; - - /* - * For sequential write required zones we retrieved the hardware write - * pointer above. - * - * For conventional zones or conventional devices we don't have that - * luxury. Instead query the rmap to find the highest recorded block - * and set the write pointer to the block after that. In case of a - * power loss this misses blocks where the data I/O has completed but - * not recorded in the rmap yet, and it also rewrites blocks if the most - * recently written ones got deleted again before unmount, but this is - * the best we can do without hardware support. - */ - if (!zone || zone->cond == BLK_ZONE_COND_NOT_WP) { - xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); - highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); - if (highest_rgbno == NULLRGBLOCK) - write_pointer = 0; - else - write_pointer = highest_rgbno + 1; - xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); - } - /* * If there are no used blocks, but the zone is not in empty state yet * we lost power before the zoned reset. In that case finish the work @@ -1066,6 +1066,7 @@ xfs_get_zone_info_cb( struct xfs_mount *mp = iz->mp; xfs_fsblock_t zsbno = xfs_daddr_to_rtb(mp, zone->start); xfs_rgnumber_t rgno; + xfs_rgblock_t write_pointer; struct xfs_rtgroup *rtg; int error; @@ -1080,7 +1081,13 @@ xfs_get_zone_info_cb( xfs_warn(mp, "realtime group not found for zone %u.", rgno); return -EFSCORRUPTED; } - error = xfs_init_zone(iz, rtg, zone); + if (!xfs_zone_validate(zone, rtg, &write_pointer)) { + xfs_rtgroup_rele(rtg); + return -EFSCORRUPTED; + } + if (zone->cond == BLK_ZONE_COND_NOT_WP) + write_pointer = xfs_rmap_estimate_write_pointer(rtg); + error = xfs_init_zone(iz, rtg, write_pointer); xfs_rtgroup_rele(rtg); return error; } @@ -1290,7 +1297,8 @@ xfs_mount_zones( struct xfs_rtgroup *rtg = NULL; while ((rtg = xfs_rtgroup_next(mp, rtg))) { - error = xfs_init_zone(&iz, rtg, NULL); + error = xfs_init_zone(&iz, rtg, + xfs_rmap_estimate_write_pointer(rtg)); if (error) { xfs_rtgroup_rele(rtg); goto out_free_zone_info; From 19c5b6051ed62d8c4b1cf92e463c1bcf629107f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2026 07:53:27 +0100 Subject: [PATCH 33/67] xfs: split and refactor zone validation Currently xfs_zone_validate mixes validating the software zone state in the XFS realtime group with validating the hardware state reported in struct blk_zone and deriving the write pointer from that. Move all code that works on the realtime group to xfs_init_zone, and only keep the hardware state validation in xfs_zone_validate. This makes the code more clear, and allows for better reuse in userspace. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_zones.c | 149 ++++++++++---------------------------- fs/xfs/libxfs/xfs_zones.h | 5 +- fs/xfs/xfs_zone_alloc.c | 28 ++++++- 3 files changed, 68 insertions(+), 114 deletions(-) diff --git a/fs/xfs/libxfs/xfs_zones.c b/fs/xfs/libxfs/xfs_zones.c index 8c3c67caf64e..24e350c31933 100644 --- a/fs/xfs/libxfs/xfs_zones.c +++ b/fs/xfs/libxfs/xfs_zones.c @@ -15,173 +15,102 @@ #include "xfs_zones.h" static bool -xfs_zone_validate_empty( +xfs_validate_blk_zone_seq( + struct xfs_mount *mp, struct blk_zone *zone, - struct xfs_rtgroup *rtg, + unsigned int zone_no, xfs_rgblock_t *write_pointer) { - struct xfs_mount *mp = rtg_mount(rtg); - - if (rtg_rmap(rtg)->i_used_blocks > 0) { - xfs_warn(mp, "empty zone %u has non-zero used counter (0x%x).", - rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks); - return false; - } - - *write_pointer = 0; - return true; -} - -static bool -xfs_zone_validate_wp( - struct blk_zone *zone, - struct xfs_rtgroup *rtg, - xfs_rgblock_t *write_pointer) -{ - struct xfs_mount *mp = rtg_mount(rtg); - xfs_rtblock_t wp_fsb = xfs_daddr_to_rtb(mp, zone->wp); - - if (rtg_rmap(rtg)->i_used_blocks > rtg->rtg_extents) { - xfs_warn(mp, "zone %u has too large used counter (0x%x).", - rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks); - return false; - } - - if (xfs_rtb_to_rgno(mp, wp_fsb) != rtg_rgno(rtg)) { - xfs_warn(mp, "zone %u write pointer (0x%llx) outside of zone.", - rtg_rgno(rtg), wp_fsb); - return false; - } - - *write_pointer = xfs_rtb_to_rgbno(mp, wp_fsb); - if (*write_pointer >= rtg->rtg_extents) { - xfs_warn(mp, "zone %u has invalid write pointer (0x%x).", - rtg_rgno(rtg), *write_pointer); - return false; - } - - return true; -} - -static bool -xfs_zone_validate_full( - struct blk_zone *zone, - struct xfs_rtgroup *rtg, - xfs_rgblock_t *write_pointer) -{ - struct xfs_mount *mp = rtg_mount(rtg); - - if (rtg_rmap(rtg)->i_used_blocks > rtg->rtg_extents) { - xfs_warn(mp, "zone %u has too large used counter (0x%x).", - rtg_rgno(rtg), rtg_rmap(rtg)->i_used_blocks); - return false; - } - - *write_pointer = rtg->rtg_extents; - return true; -} - -static bool -xfs_zone_validate_seq( - struct blk_zone *zone, - struct xfs_rtgroup *rtg, - xfs_rgblock_t *write_pointer) -{ - struct xfs_mount *mp = rtg_mount(rtg); - switch (zone->cond) { case BLK_ZONE_COND_EMPTY: - return xfs_zone_validate_empty(zone, rtg, write_pointer); + *write_pointer = 0; + return true; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: case BLK_ZONE_COND_CLOSED: case BLK_ZONE_COND_ACTIVE: - return xfs_zone_validate_wp(zone, rtg, write_pointer); + if (zone->wp < zone->start || + zone->wp >= zone->start + zone->capacity) { + xfs_warn(mp, + "zone %u write pointer (%llu) outside of zone.", + zone_no, zone->wp); + return false; + } + + *write_pointer = XFS_BB_TO_FSB(mp, zone->wp - zone->start); + return true; case BLK_ZONE_COND_FULL: - return xfs_zone_validate_full(zone, rtg, write_pointer); + *write_pointer = XFS_BB_TO_FSB(mp, zone->capacity); + return true; case BLK_ZONE_COND_NOT_WP: case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: xfs_warn(mp, "zone %u has unsupported zone condition 0x%x.", - rtg_rgno(rtg), zone->cond); + zone_no, zone->cond); return false; default: xfs_warn(mp, "zone %u has unknown zone condition 0x%x.", - rtg_rgno(rtg), zone->cond); + zone_no, zone->cond); return false; } } static bool -xfs_zone_validate_conv( +xfs_validate_blk_zone_conv( + struct xfs_mount *mp, struct blk_zone *zone, - struct xfs_rtgroup *rtg) + unsigned int zone_no) { - struct xfs_mount *mp = rtg_mount(rtg); - switch (zone->cond) { case BLK_ZONE_COND_NOT_WP: return true; default: xfs_warn(mp, "conventional zone %u has unsupported zone condition 0x%x.", - rtg_rgno(rtg), zone->cond); + zone_no, zone->cond); return false; } } bool -xfs_zone_validate( +xfs_validate_blk_zone( + struct xfs_mount *mp, struct blk_zone *zone, - struct xfs_rtgroup *rtg, + unsigned int zone_no, + uint32_t expected_size, + uint32_t expected_capacity, xfs_rgblock_t *write_pointer) { - struct xfs_mount *mp = rtg_mount(rtg); - struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG]; - uint32_t expected_size; - /* * Check that the zone capacity matches the rtgroup size stored in the * superblock. Note that all zones including the last one must have a * uniform capacity. */ - if (XFS_BB_TO_FSB(mp, zone->capacity) != g->blocks) { + if (XFS_BB_TO_FSB(mp, zone->capacity) != expected_capacity) { xfs_warn(mp, -"zone %u capacity (0x%llx) does not match RT group size (0x%x).", - rtg_rgno(rtg), XFS_BB_TO_FSB(mp, zone->capacity), - g->blocks); +"zone %u capacity (%llu) does not match RT group size (%u).", + zone_no, XFS_BB_TO_FSB(mp, zone->capacity), + expected_capacity); return false; } - if (g->has_daddr_gaps) { - expected_size = 1 << g->blklog; - } else { - if (zone->len != zone->capacity) { - xfs_warn(mp, -"zone %u has capacity != size ((0x%llx vs 0x%llx)", - rtg_rgno(rtg), - XFS_BB_TO_FSB(mp, zone->len), - XFS_BB_TO_FSB(mp, zone->capacity)); - return false; - } - expected_size = g->blocks; - } - if (XFS_BB_TO_FSB(mp, zone->len) != expected_size) { xfs_warn(mp, -"zone %u length (0x%llx) does match geometry (0x%x).", - rtg_rgno(rtg), XFS_BB_TO_FSB(mp, zone->len), +"zone %u length (%llu) does not match geometry (%u).", + zone_no, XFS_BB_TO_FSB(mp, zone->len), expected_size); + return false; } switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: - return xfs_zone_validate_conv(zone, rtg); + return xfs_validate_blk_zone_conv(mp, zone, zone_no); case BLK_ZONE_TYPE_SEQWRITE_REQ: - return xfs_zone_validate_seq(zone, rtg, write_pointer); + return xfs_validate_blk_zone_seq(mp, zone, zone_no, + write_pointer); default: xfs_warn(mp, "zoned %u has unsupported type 0x%x.", - rtg_rgno(rtg), zone->type); + zone_no, zone->type); return false; } } diff --git a/fs/xfs/libxfs/xfs_zones.h b/fs/xfs/libxfs/xfs_zones.h index df10a34da71d..c16089c9a652 100644 --- a/fs/xfs/libxfs/xfs_zones.h +++ b/fs/xfs/libxfs/xfs_zones.h @@ -37,7 +37,8 @@ struct blk_zone; */ #define XFS_DEFAULT_MAX_OPEN_ZONES 128 -bool xfs_zone_validate(struct blk_zone *zone, struct xfs_rtgroup *rtg, - xfs_rgblock_t *write_pointer); +bool xfs_validate_blk_zone(struct xfs_mount *mp, struct blk_zone *zone, + unsigned int zone_no, uint32_t expected_size, + uint32_t expected_capacity, xfs_rgblock_t *write_pointer); #endif /* _LIBXFS_ZONES_H */ diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 335e683436d3..538236ea2466 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -977,13 +977,15 @@ xfs_free_open_zones( struct xfs_init_zones { struct xfs_mount *mp; + uint32_t zone_size; + uint32_t zone_capacity; uint64_t available; uint64_t reclaimable; }; /* * For sequential write required zones, we restart writing at the hardware write - * pointer returned by xfs_zone_validate(). + * pointer returned by xfs_validate_blk_zone(). * * For conventional zones or conventional devices we have to query the rmap to * find the highest recorded block and set the write pointer to the block after @@ -1018,6 +1020,25 @@ xfs_init_zone( uint32_t used = rtg_rmap(rtg)->i_used_blocks; int error; + if (write_pointer > rtg->rtg_extents) { + xfs_warn(mp, "zone %u has invalid write pointer (0x%x).", + rtg_rgno(rtg), write_pointer); + return -EFSCORRUPTED; + } + + if (used > rtg->rtg_extents) { + xfs_warn(mp, +"zone %u has used counter (0x%x) larger than zone capacity (0x%llx).", + rtg_rgno(rtg), used, rtg->rtg_extents); + return -EFSCORRUPTED; + } + + if (write_pointer == 0 && used != 0) { + xfs_warn(mp, "empty zone %u has non-zero used counter (0x%x).", + rtg_rgno(rtg), used); + return -EFSCORRUPTED; + } + /* * If there are no used blocks, but the zone is not in empty state yet * we lost power before the zoned reset. In that case finish the work @@ -1081,7 +1102,8 @@ xfs_get_zone_info_cb( xfs_warn(mp, "realtime group not found for zone %u.", rgno); return -EFSCORRUPTED; } - if (!xfs_zone_validate(zone, rtg, &write_pointer)) { + if (!xfs_validate_blk_zone(mp, zone, idx, iz->zone_size, + iz->zone_capacity, &write_pointer)) { xfs_rtgroup_rele(rtg); return -EFSCORRUPTED; } @@ -1227,6 +1249,8 @@ xfs_mount_zones( { struct xfs_init_zones iz = { .mp = mp, + .zone_capacity = mp->m_groups[XG_TYPE_RTG].blocks, + .zone_size = xfs_rtgroup_raw_size(mp), }; struct xfs_buftarg *bt = mp->m_rtdev_targp; xfs_extlen_t zone_blocks = mp->m_groups[XG_TYPE_RTG].blocks; From b37c1e4e9af795ac31ddc992b0461182c45705dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2026 07:53:28 +0100 Subject: [PATCH 34/67] xfs: check that used blocks are smaller than the write pointer Any used block must have been written, this reject used blocks > write pointer. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 538236ea2466..d127f122d6cf 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -1033,6 +1033,13 @@ xfs_init_zone( return -EFSCORRUPTED; } + if (used > write_pointer) { + xfs_warn(mp, +"zone %u has used counter (0x%x) larger than write pointer (0x%x).", + rtg_rgno(rtg), used, write_pointer); + return -EFSCORRUPTED; + } + if (write_pointer == 0 && used != 0) { xfs_warn(mp, "empty zone %u has non-zero used counter (0x%x).", rtg_rgno(rtg), used); From 12d12dcc1508874886ebcbd2aefba74f1ed71f98 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2026 07:53:29 +0100 Subject: [PATCH 35/67] xfs: use blkdev_get_zone_info to simplify zone reporting Unwind the callback based programming model by querying the cached zone information using blkdev_get_zone_info. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 114 ++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 64 deletions(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index d127f122d6cf..89d3a5c878ee 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -976,7 +976,6 @@ xfs_free_open_zones( } struct xfs_init_zones { - struct xfs_mount *mp; uint32_t zone_size; uint32_t zone_capacity; uint64_t available; @@ -994,19 +993,52 @@ struct xfs_init_zones { * the most recently written ones got deleted again before unmount, but this is * the best we can do without hardware support. */ -static xfs_rgblock_t -xfs_rmap_estimate_write_pointer( - struct xfs_rtgroup *rtg) +static int +xfs_query_write_pointer( + struct xfs_init_zones *iz, + struct xfs_rtgroup *rtg, + xfs_rgblock_t *write_pointer) { + struct xfs_mount *mp = rtg_mount(rtg); + struct block_device *bdev = mp->m_rtdev_targp->bt_bdev; + sector_t start = xfs_gbno_to_daddr(&rtg->rtg_group, 0); xfs_rgblock_t highest_rgbno; + struct blk_zone zone = {}; + int error; + + if (bdev_is_zoned(bdev)) { + error = blkdev_get_zone_info(bdev, start, &zone); + if (error) + return error; + if (zone.start != start) { + xfs_warn(mp, "mismatched zone start: 0x%llx/0x%llx.", + zone.start, start); + return -EFSCORRUPTED; + } + + if (!xfs_validate_blk_zone(mp, &zone, rtg_rgno(rtg), + iz->zone_size, iz->zone_capacity, + write_pointer)) + return -EFSCORRUPTED; + + /* + * Use the hardware write pointer returned by + * xfs_validate_blk_zone for sequential write required zones, + * else fall through to the rmap-based estimation below. + */ + if (zone.cond != BLK_ZONE_COND_NOT_WP) + return 0; + } xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); if (highest_rgbno == NULLRGBLOCK) - return 0; - return highest_rgbno + 1; + *write_pointer = 0; + else + *write_pointer = highest_rgbno + 1; + return 0; } static int @@ -1084,43 +1116,6 @@ xfs_init_zone( return 0; } -static int -xfs_get_zone_info_cb( - struct blk_zone *zone, - unsigned int idx, - void *data) -{ - struct xfs_init_zones *iz = data; - struct xfs_mount *mp = iz->mp; - xfs_fsblock_t zsbno = xfs_daddr_to_rtb(mp, zone->start); - xfs_rgnumber_t rgno; - xfs_rgblock_t write_pointer; - struct xfs_rtgroup *rtg; - int error; - - if (xfs_rtb_to_rgbno(mp, zsbno) != 0) { - xfs_warn(mp, "mismatched zone start 0x%llx.", zsbno); - return -EFSCORRUPTED; - } - - rgno = xfs_rtb_to_rgno(mp, zsbno); - rtg = xfs_rtgroup_grab(mp, rgno); - if (!rtg) { - xfs_warn(mp, "realtime group not found for zone %u.", rgno); - return -EFSCORRUPTED; - } - if (!xfs_validate_blk_zone(mp, zone, idx, iz->zone_size, - iz->zone_capacity, &write_pointer)) { - xfs_rtgroup_rele(rtg); - return -EFSCORRUPTED; - } - if (zone->cond == BLK_ZONE_COND_NOT_WP) - write_pointer = xfs_rmap_estimate_write_pointer(rtg); - error = xfs_init_zone(iz, rtg, write_pointer); - xfs_rtgroup_rele(rtg); - return error; -} - /* * Calculate the max open zone limit based on the of number of backing zones * available. @@ -1255,15 +1250,13 @@ xfs_mount_zones( struct xfs_mount *mp) { struct xfs_init_zones iz = { - .mp = mp, .zone_capacity = mp->m_groups[XG_TYPE_RTG].blocks, .zone_size = xfs_rtgroup_raw_size(mp), }; - struct xfs_buftarg *bt = mp->m_rtdev_targp; - xfs_extlen_t zone_blocks = mp->m_groups[XG_TYPE_RTG].blocks; + struct xfs_rtgroup *rtg = NULL; int error; - if (!bt) { + if (!mp->m_rtdev_targp) { xfs_notice(mp, "RT device missing."); return -EINVAL; } @@ -1291,7 +1284,7 @@ xfs_mount_zones( return -ENOMEM; xfs_info(mp, "%u zones of %u blocks (%u max open zones)", - mp->m_sb.sb_rgcount, zone_blocks, mp->m_max_open_zones); + mp->m_sb.sb_rgcount, iz.zone_capacity, mp->m_max_open_zones); trace_xfs_zones_mount(mp); /* @@ -1315,25 +1308,18 @@ xfs_mount_zones( * or beneficial. */ mp->m_super->s_min_writeback_pages = - XFS_FSB_TO_B(mp, min(zone_blocks, XFS_MAX_BMBT_EXTLEN)) >> + XFS_FSB_TO_B(mp, min(iz.zone_capacity, XFS_MAX_BMBT_EXTLEN)) >> PAGE_SHIFT; - if (bdev_is_zoned(bt->bt_bdev)) { - error = blkdev_report_zones_cached(bt->bt_bdev, - XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart), - mp->m_sb.sb_rgcount, xfs_get_zone_info_cb, &iz); - if (error < 0) - goto out_free_zone_info; - } else { - struct xfs_rtgroup *rtg = NULL; + while ((rtg = xfs_rtgroup_next(mp, rtg))) { + xfs_rgblock_t write_pointer; - while ((rtg = xfs_rtgroup_next(mp, rtg))) { - error = xfs_init_zone(&iz, rtg, - xfs_rmap_estimate_write_pointer(rtg)); - if (error) { - xfs_rtgroup_rele(rtg); - goto out_free_zone_info; - } + error = xfs_query_write_pointer(&iz, rtg, &write_pointer); + if (!error) + error = xfs_init_zone(&iz, rtg, write_pointer); + if (error) { + xfs_rtgroup_rele(rtg); + goto out_free_zone_info; } } From 4d6d335ea9558a7dc0c5044886440d7223596235 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 20 Jan 2026 22:45:40 -0800 Subject: [PATCH 36/67] xfs: promote metadata directories and large block support Large block support was merged upstream in 6.12 (Dec 2024) and metadata directories was merged in 6.13 (Jan 2025). We've not received any serious complaints about the ondisk formats of these two features in the past year, so let's remove the experimental warnings. Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_message.c | 8 -------- fs/xfs/xfs_message.h | 2 -- fs/xfs/xfs_super.c | 4 ---- 3 files changed, 14 deletions(-) diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 16bdc38887ab..fd297082aeb8 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -149,14 +149,6 @@ xfs_warn_experimental( .opstate = XFS_OPSTATE_WARNED_LARP, .name = "logged extended attributes", }, - [XFS_EXPERIMENTAL_LBS] = { - .opstate = XFS_OPSTATE_WARNED_LBS, - .name = "large block size", - }, - [XFS_EXPERIMENTAL_METADIR] = { - .opstate = XFS_OPSTATE_WARNED_METADIR, - .name = "metadata directory tree", - }, [XFS_EXPERIMENTAL_ZONED] = { .opstate = XFS_OPSTATE_WARNED_ZONED, .name = "zoned RT device", diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index d68e72379f9d..49b0ef40d299 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -93,8 +93,6 @@ void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg, enum xfs_experimental_feat { XFS_EXPERIMENTAL_SHRINK, XFS_EXPERIMENTAL_LARP, - XFS_EXPERIMENTAL_LBS, - XFS_EXPERIMENTAL_METADIR, XFS_EXPERIMENTAL_ZONED, XFS_EXPERIMENTAL_MAX, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 845abf437bf5..e05bf62a5413 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1835,8 +1835,6 @@ xfs_fs_fill_super( error = -ENOSYS; goto out_free_sb; } - - xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS); } /* Ensure this filesystem fits in the page cache limits */ @@ -1922,8 +1920,6 @@ xfs_fs_fill_super( goto out_filestream_unmount; } xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED); - } else if (xfs_has_metadir(mp)) { - xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR); } if (xfs_has_reflink(mp)) { From 01a28961549ac9c387ccd5eb00d58be1d8c2794b Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 20 Jan 2026 09:57:46 +0100 Subject: [PATCH 37/67] xfs: always allocate the free zone with the lowest index Zones in the beginning of the address space are typically mapped to higer bandwidth tracks on HDDs than those at the end of the address space. So, in stead of allocating zones "round robin" across the whole address space, always allocate the zone with the lowest index. This increases average write bandwidth for overwrite workloads when less than the full capacity is being used. At ~50% utilization this improves bandwidth for a random file overwrite benchmark with 128MiB files and 256MiB zone capacity by 30%. Running the same benchmark with small 2-8 MiB files at 67% capacity shows no significant difference in performance. Due to heavy fragmentation the whole zone range is in use, greatly limiting the number of free zones with high bw. Signed-off-by: Hans Holmberg Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 47 +++++++++++++++-------------------------- fs/xfs/xfs_zone_priv.h | 1 - 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 89d3a5c878ee..b60952565737 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -408,31 +408,6 @@ xfs_zone_free_blocks( return 0; } -static struct xfs_group * -xfs_find_free_zone( - struct xfs_mount *mp, - unsigned long start, - unsigned long end) -{ - struct xfs_zone_info *zi = mp->m_zone_info; - XA_STATE (xas, &mp->m_groups[XG_TYPE_RTG].xa, start); - struct xfs_group *xg; - - xas_lock(&xas); - xas_for_each_marked(&xas, xg, end, XFS_RTG_FREE) - if (atomic_inc_not_zero(&xg->xg_active_ref)) - goto found; - xas_unlock(&xas); - return NULL; - -found: - xas_clear_mark(&xas, XFS_RTG_FREE); - atomic_dec(&zi->zi_nr_free_zones); - zi->zi_free_zone_cursor = xg->xg_gno; - xas_unlock(&xas); - return xg; -} - static struct xfs_open_zone * xfs_init_open_zone( struct xfs_rtgroup *rtg, @@ -472,13 +447,25 @@ xfs_open_zone( bool is_gc) { struct xfs_zone_info *zi = mp->m_zone_info; + XA_STATE (xas, &mp->m_groups[XG_TYPE_RTG].xa, 0); struct xfs_group *xg; - xg = xfs_find_free_zone(mp, zi->zi_free_zone_cursor, ULONG_MAX); - if (!xg) - xg = xfs_find_free_zone(mp, 0, zi->zi_free_zone_cursor); - if (!xg) - return NULL; + /* + * Pick the free zone with lowest index. Zones in the beginning of the + * address space typically provides higher bandwidth than those at the + * end of the address space on HDDs. + */ + xas_lock(&xas); + xas_for_each_marked(&xas, xg, ULONG_MAX, XFS_RTG_FREE) + if (atomic_inc_not_zero(&xg->xg_active_ref)) + goto found; + xas_unlock(&xas); + return NULL; + +found: + xas_clear_mark(&xas, XFS_RTG_FREE); + atomic_dec(&zi->zi_nr_free_zones); + xas_unlock(&xas); set_current_state(TASK_RUNNING); return xfs_init_open_zone(to_rtg(xg), 0, write_hint, is_gc); diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h index ce7f0e2f4598..8fbf9a52964e 100644 --- a/fs/xfs/xfs_zone_priv.h +++ b/fs/xfs/xfs_zone_priv.h @@ -72,7 +72,6 @@ struct xfs_zone_info { /* * Free zone search cursor and number of free zones: */ - unsigned long zi_free_zone_cursor; atomic_t zi_nr_free_zones; /* From a1ca658d649a4d8972e2e21ac2625b633217e327 Mon Sep 17 00:00:00 2001 From: Wenwu Hou Date: Sat, 17 Jan 2026 14:52:43 +0800 Subject: [PATCH 38/67] xfs: fix incorrect context handling in xfs_trans_roll The memalloc_nofs_save() and memalloc_nofs_restore() calls are incorrectly paired in xfs_trans_roll. Call path: xfs_trans_alloc() __xfs_trans_alloc() // tp->t_pflags = memalloc_nofs_save(); xfs_trans_set_context() ... xfs_defer_trans_roll() xfs_trans_roll() xfs_trans_dup() // old_tp->t_pflags = 0; xfs_trans_switch_context() __xfs_trans_commit() xfs_trans_free() // memalloc_nofs_restore(tp->t_pflags); xfs_trans_clear_context() The code passes 0 to memalloc_nofs_restore() when committing the original transaction, but memalloc_nofs_restore() should always receive the flags returned from the paired memalloc_nofs_save() call. Before commit 3f6d5e6a468d ("mm: introduce memalloc_flags_{save,restore}"), calling memalloc_nofs_restore(0) would unset the PF_MEMALLOC_NOFS flag, which could cause memory allocation deadlocks[1]. Fortunately, after that commit, memalloc_nofs_restore(0) does nothing, so this issue is currently harmless. Fixes: 756b1c343333 ("xfs: use current->journal_info for detecting transaction recursion") Link: https://lore.kernel.org/linux-xfs/20251104131857.1587584-1-leo.lilong@huawei.com [1] Signed-off-by: Wenwu Hou Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trans.c | 8 ++++++-- fs/xfs/xfs_trans.h | 9 --------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2c3c29d0d4a0..bcc470f56e46 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -124,8 +124,6 @@ xfs_trans_dup( ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; - xfs_trans_switch_context(tp, ntp); - /* move deferred ops over to the new tp */ xfs_defer_move(ntp, tp); @@ -1043,6 +1041,12 @@ xfs_trans_roll( * locked be logged in the prior and the next transactions. */ tp = *tpp; + /* + * __xfs_trans_commit cleared the NOFS flag by calling into + * xfs_trans_free. Set it again here before doing memory + * allocations. + */ + xfs_trans_set_context(tp); error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); if (error) return error; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 8830600b3e72..eb83c5dac032 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -280,13 +280,4 @@ xfs_trans_clear_context( memalloc_nofs_restore(tp->t_pflags); } -static inline void -xfs_trans_switch_context( - struct xfs_trans *old_tp, - struct xfs_trans *new_tp) -{ - new_tp->t_pflags = old_tp->t_pflags; - old_tp->t_pflags = 0; -} - #endif /* __XFS_TRANS_H__ */ From 6f13c1d2a6271c2e73226864a0e83de2770b6f34 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:30 -0800 Subject: [PATCH 39/67] xfs: delete attr leaf freemap entries when empty Back in commit 2a2b5932db6758 ("xfs: fix attr leaf header freemap.size underflow"), Brian Foster observed that it's possible for a small freemap at the end of the end of the xattr entries array to experience a size underflow when subtracting the space consumed by an expansion of the entries array. There are only three freemap entries, which means that it is not a complete index of all free space in the leaf block. This code can leave behind a zero-length freemap entry with a nonzero base. Subsequent setxattr operations can increase the base up to the point that it overlaps with another freemap entry. This isn't in and of itself a problem because the code in _leaf_add that finds free space ignores any freemap entry with zero size. However, there's another bug in the freemap update code in _leaf_add, which is that it fails to update a freemap entry that begins midway through the xattr entry that was just appended to the array. That can result in the freemap containing two entries with the same base but different sizes (0 for the "pushed-up" entry, nonzero for the entry that's actually tracking free space). A subsequent _leaf_add can then allocate xattr namevalue entries on top of the entries array, leading to data loss. But fixing that is for later. For now, eliminate the possibility of confusion by zeroing out the base of any freemap entry that has zero size. Because the freemap is not intended to be a complete index of free space, a subsequent failure to find any free space for a new xattr will trigger block compaction, which regenerates the freemap. It looks like this bug has been in the codebase for quite a long time. Cc: # v2.6.12 Fixes: 1da177e4c3f415 ("Linux-2.6.12-rc2") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_leaf.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 6061230b17ef..c8c9737f0456 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -1580,6 +1580,19 @@ xfs_attr3_leaf_add_work( min_t(uint16_t, ichdr->freemap[i].size, sizeof(xfs_attr_leaf_entry_t)); } + + /* + * Don't leave zero-length freemaps with nonzero base lying + * around, because we don't want the code in _remove that + * matches on base address to get confused and create + * overlapping freemaps. If we end up with no freemap entries + * then the next _add will compact the leaf block and + * regenerate the freemaps. + */ + if (ichdr->freemap[i].size == 0 && ichdr->freemap[i].base > 0) { + ichdr->freemap[i].base = 0; + ichdr->holes = 1; + } } ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); } From 3eefc0c2b78444b64feeb3783c017d6adc3cd3ce Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:31 -0800 Subject: [PATCH 40/67] xfs: fix freemap adjustments when adding xattrs to leaf blocks xfs/592 and xfs/794 both trip this assertion in the leaf block freemap adjustment code after ~20 minutes of running on my test VMs: ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) + xfs_attr3_leaf_hdr_size(leaf)); Upon enabling quite a lot more debugging code, I narrowed this down to fsstress trying to set a local extended attribute with namelen=3 and valuelen=71. This results in an entry size of 80 bytes. At the start of xfs_attr3_leaf_add_work, the freemap looks like this: i 0 base 448 size 0 rhs 448 count 46 i 1 base 388 size 132 rhs 448 count 46 i 2 base 2120 size 4 rhs 448 count 46 firstused = 520 where "rhs" is the first byte past the end of the leaf entry array. This is inconsistent -- the entries array ends at byte 448, but freemap[1] says there's free space starting at byte 388! By the end of the function, the freemap is in worse shape: i 0 base 456 size 0 rhs 456 count 47 i 1 base 388 size 52 rhs 456 count 47 i 2 base 2120 size 4 rhs 456 count 47 firstused = 440 Important note: 388 is not aligned with the entries array element size of 8 bytes. Based on the incorrect freemap, the name area starts at byte 440, which is below the end of the entries array! That's why the assertion triggers and the filesystem shuts down. How did we end up here? First, recall from the previous patch that the freemap array in an xattr leaf block is not intended to be a comprehensive map of all free space in the leaf block. In other words, it's perfectly legal to have a leaf block with: * 376 bytes in use by the entries array * freemap[0] has [base = 376, size = 8] * freemap[1] has [base = 388, size = 1500] * the space between 376 and 388 is free, but the freemap stopped tracking that some time ago If we add one xattr, the entries array grows to 384 bytes, and freemap[0] becomes [base = 384, size = 0]. So far, so good. But if we add a second xattr, the entries array grows to 392 bytes, and freemap[0] gets pushed up to [base = 392, size = 0]. This is bad, because freemap[1] hasn't been updated, and now the entries array and the free space claim the same space. The fix here is to adjust all freemap entries so that none of them collide with the entries array. Note that this fix relies on commit 2a2b5932db6758 ("xfs: fix attr leaf header freemap.size underflow") and the previous patch that resets zero length freemap entries to have base = 0. Cc: # v2.6.12 Fixes: 1da177e4c3f415 ("Linux-2.6.12-rc2") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_leaf.c | 36 +++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index c8c9737f0456..c0d625227137 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -1476,6 +1476,7 @@ xfs_attr3_leaf_add_work( struct xfs_attr_leaf_name_local *name_loc; struct xfs_attr_leaf_name_remote *name_rmt; struct xfs_mount *mp; + int old_end, new_end; int tmp; int i; @@ -1568,17 +1569,36 @@ xfs_attr3_leaf_add_work( if (be16_to_cpu(entry->nameidx) < ichdr->firstused) ichdr->firstused = be16_to_cpu(entry->nameidx); - ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf)); - tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + new_end = ichdr->count * sizeof(struct xfs_attr_leaf_entry) + + xfs_attr3_leaf_hdr_size(leaf); + old_end = new_end - sizeof(struct xfs_attr_leaf_entry); + + ASSERT(ichdr->firstused >= new_end); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - if (ichdr->freemap[i].base == tmp) { - ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t); + int diff = 0; + + if (ichdr->freemap[i].base == old_end) { + /* + * This freemap entry starts at the old end of the + * leaf entry array, so we need to adjust its base + * upward to accomodate the larger array. + */ + diff = sizeof(struct xfs_attr_leaf_entry); + } else if (ichdr->freemap[i].size > 0 && + ichdr->freemap[i].base < new_end) { + /* + * This freemap entry starts in the space claimed by + * the new leaf entry. Adjust its base upward to + * reflect that. + */ + diff = new_end - ichdr->freemap[i].base; + } + + if (diff) { + ichdr->freemap[i].base += diff; ichdr->freemap[i].size -= - min_t(uint16_t, ichdr->freemap[i].size, - sizeof(xfs_attr_leaf_entry_t)); + min_t(uint16_t, ichdr->freemap[i].size, diff); } /* From a165f7e7633ee0d83926d29e7909fdd8dd4dfadc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:31 -0800 Subject: [PATCH 41/67] xfs: refactor attr3 leaf table size computation Replace all the open-coded callsites with a single static inline helper. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_leaf.c | 57 ++++++++++++++++++----------------- fs/xfs/libxfs/xfs_da_format.h | 2 +- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index c0d625227137..75d481427bfe 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -75,6 +75,16 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, int move_count); STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); +/* Compute the byte offset of the end of the leaf entry array. */ +static inline int +xfs_attr_leaf_entries_end( + unsigned int hdrcount, + const struct xfs_attr_leafblock *leaf) +{ + return hdrcount * sizeof(struct xfs_attr_leaf_entry) + + xfs_attr3_leaf_hdr_size(leaf); +} + /* * attr3 block 'firstused' conversion helpers. * @@ -1409,8 +1419,7 @@ xfs_attr3_leaf_add( * Search through freemap for first-fit on new name length. * (may need to figure in size of entry struct too) */ - tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + tablesize = xfs_attr_leaf_entries_end(ichdr.count + 1, leaf); for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) { if (tablesize > ichdr.firstused) { sum += ichdr.freemap[i].size; @@ -1569,8 +1578,7 @@ xfs_attr3_leaf_add_work( if (be16_to_cpu(entry->nameidx) < ichdr->firstused) ichdr->firstused = be16_to_cpu(entry->nameidx); - new_end = ichdr->count * sizeof(struct xfs_attr_leaf_entry) + - xfs_attr3_leaf_hdr_size(leaf); + new_end = xfs_attr_leaf_entries_end(ichdr->count, leaf); old_end = new_end - sizeof(struct xfs_attr_leaf_entry); ASSERT(ichdr->firstused >= new_end); @@ -1807,8 +1815,8 @@ xfs_attr3_leaf_rebalance( /* * leaf2 is the destination, compact it if it looks tight. */ - max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1); - max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t); + max = ichdr2.firstused - + xfs_attr_leaf_entries_end(ichdr2.count, leaf1); if (space > max) xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp); @@ -1836,8 +1844,8 @@ xfs_attr3_leaf_rebalance( /* * leaf1 is the destination, compact it if it looks tight. */ - max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1); - max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t); + max = ichdr1.firstused - + xfs_attr_leaf_entries_end(ichdr1.count, leaf1); if (space > max) xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp); @@ -2043,9 +2051,7 @@ xfs_attr3_leaf_toosmall( blk = &state->path.blk[ state->path.active-1 ]; leaf = blk->bp->b_addr; xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf); - bytes = xfs_attr3_leaf_hdr_size(leaf) + - ichdr.count * sizeof(xfs_attr_leaf_entry_t) + - ichdr.usedbytes; + bytes = xfs_attr_leaf_entries_end(ichdr.count, leaf) + ichdr.usedbytes; if (bytes > (state->args->geo->blksize >> 1)) { *action = 0; /* blk over 50%, don't try to join */ return 0; @@ -2103,9 +2109,8 @@ xfs_attr3_leaf_toosmall( bytes = state->args->geo->blksize - (state->args->geo->blksize >> 2) - ichdr.usedbytes - ichdr2.usedbytes - - ((ichdr.count + ichdr2.count) * - sizeof(xfs_attr_leaf_entry_t)) - - xfs_attr3_leaf_hdr_size(leaf); + xfs_attr_leaf_entries_end(ichdr.count + ichdr2.count, + leaf); xfs_trans_brelse(state->args->trans, bp); if (bytes >= 0) @@ -2167,8 +2172,7 @@ xfs_attr3_leaf_remove( ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); ASSERT(args->index >= 0 && args->index < ichdr.count); - ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + - xfs_attr3_leaf_hdr_size(leaf)); + ASSERT(ichdr.firstused >= xfs_attr_leaf_entries_end(ichdr.count, leaf)); entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; @@ -2181,8 +2185,7 @@ xfs_attr3_leaf_remove( * find smallest free region in case we need to replace it, * adjust any map that borders the entry table, */ - tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf); + tablesize = xfs_attr_leaf_entries_end(ichdr.count, leaf); tmp = ichdr.freemap[0].size; before = after = -1; smallest = XFS_ATTR_LEAF_MAPSIZE - 1; @@ -2289,8 +2292,7 @@ xfs_attr3_leaf_remove( * Check if leaf is less than 50% full, caller may want to * "join" the leaf with a sibling if so. */ - tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + - ichdr.count * sizeof(xfs_attr_leaf_entry_t); + tmp = ichdr.usedbytes + xfs_attr_leaf_entries_end(ichdr.count, leaf); return tmp < args->geo->magicpct; /* leaf is < 37% full */ } @@ -2613,11 +2615,11 @@ xfs_attr3_leaf_moveents( ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); ASSERT(ichdr_s->magic == ichdr_d->magic); ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); - ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) - + xfs_attr3_leaf_hdr_size(leaf_s)); + ASSERT(ichdr_s->firstused >= + xfs_attr_leaf_entries_end(ichdr_s->count, leaf_s)); ASSERT(ichdr_d->count < args->geo->blksize / 8); - ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) - + xfs_attr3_leaf_hdr_size(leaf_d)); + ASSERT(ichdr_d->firstused >= + xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d)); ASSERT(start_s < ichdr_s->count); ASSERT(start_d <= ichdr_d->count); @@ -2677,8 +2679,7 @@ xfs_attr3_leaf_moveents( ichdr_d->usedbytes += tmp; ichdr_s->count -= 1; ichdr_d->count += 1; - tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t) - + xfs_attr3_leaf_hdr_size(leaf_d); + tmp = xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d); ASSERT(ichdr_d->firstused >= tmp); #ifdef GROT } @@ -2714,8 +2715,8 @@ xfs_attr3_leaf_moveents( /* * Fill in the freemap information */ - ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d); - ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t); + ichdr_d->freemap[0].base = + xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d); ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; ichdr_d->freemap[1].base = 0; ichdr_d->freemap[2].base = 0; diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 86de99e2f757..7d55307e619f 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -746,7 +746,7 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) static inline int -xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) +xfs_attr3_leaf_hdr_size(const struct xfs_attr_leafblock *leafp) { if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) return sizeof(struct xfs_attr3_leaf_hdr); From 27a0c41f33d8d31558d334b07eb58701aab0b3dd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:32 -0800 Subject: [PATCH 42/67] xfs: strengthen attr leaf block freemap checking Check for erroneous overlapping freemap regions and collisions between freemap regions and the xattr leaf entry array. Note that we must explicitly zero out the extra freemaps in xfs_attr3_leaf_compact so that the in-memory buffer has a correctly initialized freemap array to satisfy the new verification code, even if subsequent code changes the contents before unlocking the buffer. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_leaf.c | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 75d481427bfe..c3327b10709c 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -85,6 +85,49 @@ xfs_attr_leaf_entries_end( xfs_attr3_leaf_hdr_size(leaf); } +static inline bool +ichdr_freemaps_overlap( + const struct xfs_attr3_icleaf_hdr *ichdr, + unsigned int x, + unsigned int y) +{ + const unsigned int xend = + ichdr->freemap[x].base + ichdr->freemap[x].size; + const unsigned int yend = + ichdr->freemap[y].base + ichdr->freemap[y].size; + + /* empty slots do not overlap */ + if (!ichdr->freemap[x].size || !ichdr->freemap[y].size) + return false; + + return ichdr->freemap[x].base < yend && xend > ichdr->freemap[y].base; +} + +static inline xfs_failaddr_t +xfs_attr_leaf_ichdr_freemaps_verify( + const struct xfs_attr3_icleaf_hdr *ichdr, + const struct xfs_attr_leafblock *leaf) +{ + unsigned int entries_end = + xfs_attr_leaf_entries_end(ichdr->count, leaf); + int i; + + if (ichdr_freemaps_overlap(ichdr, 0, 1)) + return __this_address; + if (ichdr_freemaps_overlap(ichdr, 0, 2)) + return __this_address; + if (ichdr_freemaps_overlap(ichdr, 1, 2)) + return __this_address; + + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (ichdr->freemap[i].size > 0 && + ichdr->freemap[i].base < entries_end) + return __this_address; + } + + return NULL; +} + /* * attr3 block 'firstused' conversion helpers. * @@ -228,6 +271,8 @@ xfs_attr3_leaf_hdr_to_disk( hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base); hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size); } + + ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL); return; } to->hdr.info.forw = cpu_to_be32(from->forw); @@ -243,6 +288,8 @@ xfs_attr3_leaf_hdr_to_disk( to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base); to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size); } + + ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL); } static xfs_failaddr_t @@ -395,6 +442,10 @@ xfs_attr3_leaf_verify( return __this_address; } + fa = xfs_attr_leaf_ichdr_freemaps_verify(&ichdr, leaf); + if (fa) + return fa; + return NULL; } @@ -1664,6 +1715,10 @@ xfs_attr3_leaf_compact( ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src); ichdr_dst->freemap[0].size = ichdr_dst->firstused - ichdr_dst->freemap[0].base; + ichdr_dst->freemap[1].base = 0; + ichdr_dst->freemap[2].base = 0; + ichdr_dst->freemap[1].size = 0; + ichdr_dst->freemap[2].size = 0; /* write the header back to initialise the underlying buffer */ xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst); From 6fed8270448c246e706921c177e9633013dd3fcf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:33 -0800 Subject: [PATCH 43/67] xfs: fix the xattr scrub to detect freemap/entries array collisions In the previous patches, we observed that it's possible for there to be freemap entries with zero size but a nonzero base. This isn't an inconsistency per se, but older kernels can get confused by this and corrupt the block, leading to corruption. If we see this, flag the xattr structure for optimization so that it gets rebuilt. Cc: # v4.15 Fixes: 13791d3b833428 ("xfs: scrub extended attribute leaf space") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/attr.c | 54 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index eeb5ac34d742..a397c50b7794 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -287,32 +287,6 @@ xchk_xattr_set_map( return ret; } -/* - * Check the leaf freemap from the usage bitmap. Returns false if the - * attr freemap has problems or points to used space. - */ -STATIC bool -xchk_xattr_check_freemap( - struct xfs_scrub *sc, - struct xfs_attr3_icleaf_hdr *leafhdr) -{ - struct xchk_xattr_buf *ab = sc->buf; - unsigned int mapsize = sc->mp->m_attr_geo->blksize; - int i; - - /* Construct bitmap of freemap contents. */ - bitmap_zero(ab->freemap, mapsize); - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { - if (!xchk_xattr_set_map(sc, ab->freemap, - leafhdr->freemap[i].base, - leafhdr->freemap[i].size)) - return false; - } - - /* Look for bits that are set in freemap and are marked in use. */ - return !bitmap_intersects(ab->freemap, ab->usedmap, mapsize); -} - /* * Check this leaf entry's relations to everything else. * Returns the number of bytes used for the name/value data. @@ -403,6 +377,7 @@ xchk_xattr_block( *last_checked = blk->blkno; bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize); + bitmap_zero(ab->freemap, mp->m_attr_geo->blksize); /* Check all the padding. */ if (xfs_has_crc(ds->sc->mp)) { @@ -449,6 +424,9 @@ xchk_xattr_block( if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused) xchk_da_set_corrupt(ds, level); + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { /* Mark the leaf entry itself. */ @@ -467,7 +445,29 @@ xchk_xattr_block( goto out; } - if (!xchk_xattr_check_freemap(ds->sc, &leafhdr)) + /* Construct bitmap of freemap contents. */ + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { + if (!xchk_xattr_set_map(ds->sc, ab->freemap, + leafhdr.freemap[i].base, + leafhdr.freemap[i].size)) + xchk_da_set_corrupt(ds, level); + + /* + * freemap entries with zero length and nonzero base can cause + * problems with older kernels, so we mark these for preening + * even though there's no inconsistency. + */ + if (leafhdr.freemap[i].size == 0 && + leafhdr.freemap[i].base > 0) + xchk_da_set_preen(ds, level); + + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + } + + /* Look for bits that are set in freemap and are marked in use. */ + if (bitmap_intersects(ab->freemap, ab->usedmap, + mp->m_attr_geo->blksize)) xchk_da_set_corrupt(ds, level); if (leafhdr.usedbytes != usedbytes) From bd3138e8912c9db182eac5fed1337645a98b7a4f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:33 -0800 Subject: [PATCH 44/67] xfs: fix remote xattr valuelblk check In debugging other problems with generic/753, it turns out that it's possible for the system go to down in the middle of a remote xattr set operation such that the leaf block entry is marked incomplete and valueblk is set to zero. Make this no longer a failure. Cc: # v4.15 Fixes: 13791d3b833428 ("xfs: scrub extended attribute leaf space") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/attr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index a397c50b7794..c3c122ea2d32 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -338,7 +338,10 @@ xchk_xattr_entry( rentry = xfs_attr3_leaf_name_remote(leaf, idx); namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); name_end = (char *)rentry + namesize; - if (rentry->namelen == 0 || rentry->valueblk == 0) + if (rentry->namelen == 0) + xchk_da_set_corrupt(ds, level); + if (rentry->valueblk == 0 && + !(ent->flags & XFS_ATTR_INCOMPLETE)) xchk_da_set_corrupt(ds, level); } if (name_end > buf_end) From 1ef7729df1f0c5f7bb63a121164f54d376d35835 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:34 -0800 Subject: [PATCH 45/67] xfs: reduce xfs_attr_try_sf_addname parameters The dp parameter to this function is an alias of args->dp, so remove it for clarity before we go adding new callers. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 9e6b18d6ae00..9a5402d1e9bf 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -350,16 +350,14 @@ xfs_attr_set_resv( */ STATIC int xfs_attr_try_sf_addname( - struct xfs_inode *dp, struct xfs_da_args *args) { - int error; /* * Build initial attribute list (if required). */ - if (dp->i_af.if_format == XFS_DINODE_FMT_EXTENTS) + if (args->dp->i_af.if_format == XFS_DINODE_FMT_EXTENTS) xfs_attr_shortform_create(args); error = xfs_attr_shortform_addname(args); @@ -371,9 +369,9 @@ xfs_attr_try_sf_addname( * NOTE: this is also the error path (EEXIST, etc). */ if (!error) - xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(args->trans, args->dp, XFS_ICHGTIME_CHG); - if (xfs_has_wsync(dp->i_mount)) + if (xfs_has_wsync(args->dp->i_mount)) xfs_trans_set_sync(args->trans); return error; @@ -384,10 +382,9 @@ xfs_attr_sf_addname( struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; - struct xfs_inode *dp = args->dp; int error = 0; - error = xfs_attr_try_sf_addname(dp, args); + error = xfs_attr_try_sf_addname(args); if (error != -ENOSPC) { ASSERT(!error || error == -EEXIST); attr->xattri_dela_state = XFS_DAS_DONE; From d693534513d8dcdaafcf855986d0fe0476a47462 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:35 -0800 Subject: [PATCH 46/67] xfs: speed up parent pointer operations when possible After a recent fsmark benchmarking run, I observed that the overhead of parent pointers on file creation and deletion can be a bit high. On a machine with 20 CPUs, 128G of memory, and an NVME SSD capable of pushing 750000iops, I see the following results: $ mkfs.xfs -f -l logdev=/dev/nvme1n1,size=1g /dev/nvme0n1 -n parent=0 meta-data=/dev/nvme0n1 isize=512 agcount=40, agsize=9767586 blks = sectsz=4096 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=1 = reflink=1 bigtime=1 inobtcount=1 nrext64=1 = exchange=0 metadir=0 data = bsize=4096 blocks=390703440, imaxpct=5 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1, parent=0 log =/dev/nvme1n1 bsize=4096 blocks=262144, version=2 = sectsz=4096 sunit=1 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 = rgcount=0 rgsize=0 extents = zoned=0 start=0 reserved=0 So we created 40 AGs, one per CPU. Now we create 40 directories and run fsmark: $ time fs_mark -D 10000 -S 0 -n 100000 -s 0 -L 8 -d ... # Version 3.3, 40 thread(s) starting at Wed Dec 10 14:22:07 2025 # Sync method: NO SYNC: Test does not issue sync() or fsync() calls. # Directories: Time based hash between directories across 10000 subdirectories with 180 seconds per subdirectory. # File names: 40 bytes long, (16 initial bytes of time stamp with 24 random bytes at end of name) # Files info: size 0 bytes, written with an IO size of 16384 bytes per write # App overhead is time in microseconds spent in the test not doing file writing related system calls. parent=0 parent=1 ================== ================== real 0m57.573s real 1m2.934s user 3m53.578s user 3m53.508s sys 19m44.440s sys 25m14.810s $ time rm -rf ... parent=0 parent=1 ================== ================== real 0m59.649s real 1m12.505s user 0m41.196s user 0m47.489s sys 13m9.566s sys 20m33.844s Parent pointers increase the system time by 28% overhead to create 32 million files that are totally empty. Removing them incurs a system time increase of 56%. Wall time increases by 9% and 22%. For most filesystems, each file tends to have a single owner and not that many xattrs. If the xattr structure is shortform, then all xattr changes are logged with the inode and do not require the the xattr intent mechanism to persist the parent pointer. Therefore, we can speed up parent pointer operations by calling the shortform xattr functions directly if the child's xattr is in short format. Now the overhead looks like: $ time fs_mark -D 10000 -S 0 -n 100000 -s 0 -L 8 -d ... parent=0 parent=1 ================== ================== real 0m58.030s real 1m0.983s user 3m54.141s user 3m53.758s sys 19m57.003s sys 21m30.605s $ time rm -rf ... parent=0 parent=1 ================== ================== real 0m58.911s real 1m4.420s user 0m41.329s user 0m45.169s sys 13m27.857s sys 15m58.564s Now parent pointers only increase the system time by 8% for creation and 19% for deletion. Wall time increases by 5% and 9% now. Close the performance gap by creating helpers for the attr set, remove, and replace operations that will try to make direct shortform updates, and fall back to the attr intent machinery if that doesn't work. This works for regular xattrs and for parent pointers. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 99 ++++++++++++++++++++++++++++++++++++-- fs/xfs/libxfs/xfs_attr.h | 6 ++- fs/xfs/libxfs/xfs_parent.c | 14 +++--- 3 files changed, 109 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 9a5402d1e9bf..54be75edb2eb 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1028,6 +1028,91 @@ trans_cancel: return error; } +/* + * Decide if it is theoretically possible to try to bypass the attr intent + * mechanism for better performance. Other constraints (e.g. available space + * in the existing structure) are not considered here. + */ +static inline bool +xfs_attr_can_shortcut( + const struct xfs_inode *ip) +{ + return xfs_inode_has_attr_fork(ip) && xfs_attr_is_shortform(ip); +} + +/* Try to set an attr in one transaction or fall back to attr intents. */ +int +xfs_attr_setname( + struct xfs_da_args *args, + int rmt_blks) +{ + int error; + + if (!rmt_blks && xfs_attr_can_shortcut(args->dp)) { + args->op_flags |= XFS_DA_OP_ADDNAME; + + error = xfs_attr_try_sf_addname(args); + if (error != -ENOSPC) + return error; + } + + xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET); + return 0; +} + +/* Try to remove an attr in one transaction or fall back to attr intents. */ +int +xfs_attr_removename( + struct xfs_da_args *args) +{ + if (xfs_attr_can_shortcut(args->dp)) + return xfs_attr_sf_removename(args); + + xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE); + return 0; +} + +/* Try to replace an attr in one transaction or fall back to attr intents. */ +int +xfs_attr_replacename( + struct xfs_da_args *args, + int rmt_blks) +{ + int error; + + if (rmt_blks || !xfs_attr_can_shortcut(args->dp)) { + xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE); + return 0; + } + + args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE; + + error = xfs_attr_sf_removename(args); + if (error) + return error; + + if (args->attr_filter & XFS_ATTR_PARENT) { + /* + * Move the new name/value to the regular name/value slots and + * zero out the new name/value slots because we don't need to + * log them for a PPTR_SET operation. + */ + xfs_attr_update_pptr_replace_args(args); + args->new_name = NULL; + args->new_namelen = 0; + args->new_value = NULL; + args->new_valuelen = 0; + } + args->op_flags &= ~XFS_DA_OP_REPLACE; + + error = xfs_attr_try_sf_addname(args); + if (error != -ENOSPC) + return error; + + xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET); + return 0; +} + /* * Make a change to the xattr structure. * @@ -1108,14 +1193,19 @@ xfs_attr_set( case -EEXIST: if (op == XFS_ATTRUPDATE_REMOVE) { /* if no value, we are performing a remove operation */ - xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE); + error = xfs_attr_removename(args); + if (error) + goto out_trans_cancel; break; } /* Pure create fails if the attr already exists */ if (op == XFS_ATTRUPDATE_CREATE) goto out_trans_cancel; - xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE); + + error = xfs_attr_replacename(args, rmt_blks); + if (error) + goto out_trans_cancel; break; case -ENOATTR: /* Can't remove what isn't there. */ @@ -1125,7 +1215,10 @@ xfs_attr_set( /* Pure replace fails if no existing attr to replace. */ if (op == XFS_ATTRUPDATE_REPLACE) goto out_trans_cancel; - xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET); + + error = xfs_attr_setname(args, rmt_blks); + if (error) + goto out_trans_cancel; break; default: goto out_trans_cancel; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 0e51d0723f9a..8244305949de 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -573,7 +573,7 @@ struct xfs_trans_res xfs_attr_set_resv(const struct xfs_da_args *args); */ static inline bool xfs_attr_is_shortform( - struct xfs_inode *ip) + const struct xfs_inode *ip) { return ip->i_af.if_format == XFS_DINODE_FMT_LOCAL || (ip->i_af.if_format == XFS_DINODE_FMT_EXTENTS && @@ -649,4 +649,8 @@ void xfs_attr_intent_destroy_cache(void); int xfs_attr_sf_totsize(struct xfs_inode *dp); int xfs_attr_add_fork(struct xfs_inode *ip, int size, int rsvd); +int xfs_attr_setname(struct xfs_da_args *args, int rmt_blks); +int xfs_attr_removename(struct xfs_da_args *args); +int xfs_attr_replacename(struct xfs_da_args *args, int rmt_blks); + #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index 6539f5adae2d..3509cc4b2175 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -29,6 +29,7 @@ #include "xfs_trans_space.h" #include "xfs_attr_item.h" #include "xfs_health.h" +#include "xfs_attr_leaf.h" struct kmem_cache *xfs_parent_args_cache; @@ -202,8 +203,8 @@ xfs_parent_addname( xfs_inode_to_parent_rec(&ppargs->rec, dp); xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child, child->i_ino, parent_name); - xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_SET); - return 0; + + return xfs_attr_setname(&ppargs->args, 0); } /* Remove a parent pointer to reflect a dirent removal. */ @@ -224,8 +225,8 @@ xfs_parent_removename( xfs_inode_to_parent_rec(&ppargs->rec, dp); xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child, child->i_ino, parent_name); - xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REMOVE); - return 0; + + return xfs_attr_removename(&ppargs->args); } /* Replace one parent pointer with another to reflect a rename. */ @@ -250,12 +251,13 @@ xfs_parent_replacename( child->i_ino, old_name); xfs_inode_to_parent_rec(&ppargs->new_rec, new_dp); + ppargs->args.new_name = new_name->name; ppargs->args.new_namelen = new_name->len; ppargs->args.new_value = &ppargs->new_rec; ppargs->args.new_valuelen = sizeof(struct xfs_parent_rec); - xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE); - return 0; + + return xfs_attr_replacename(&ppargs->args, 0); } /* From eaec8aeff31d0679eadb27a13a62942ddbfd7b87 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:36 -0800 Subject: [PATCH 47/67] xfs: add a method to replace shortform attrs If we're trying to replace an xattr in a shortform attr structure and the old entry fits the new entry, we can just memcpy and exit without having to delete, compact, and re-add the entry (or worse use the attr intent machinery). For parent pointers this only advantages renaming where the filename length stays the same (e.g. mv autoexec.bat scandisk.exe) but for regular xattrs it might be useful for updating security labels and the like. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 4 ++++ fs/xfs/libxfs/xfs_attr_leaf.c | 38 +++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_attr_leaf.h | 1 + fs/xfs/xfs_trace.h | 1 + 4 files changed, 44 insertions(+) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 54be75edb2eb..93caa1dae501 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1085,6 +1085,10 @@ xfs_attr_replacename( return 0; } + error = xfs_attr_shortform_replace(args); + if (error != -ENOSPC) + return error; + args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE; error = xfs_attr_sf_removename(args); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index c3327b10709c..47f48ae555c0 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -842,6 +842,44 @@ xfs_attr_sf_findname( return NULL; } +/* + * Replace a shortform xattr if it's the right length. Returns 0 on success, + * -ENOSPC if the length is wrong, or -ENOATTR if the attr was not found. + */ +int +xfs_attr_shortform_replace( + struct xfs_da_args *args) +{ + struct xfs_attr_sf_entry *sfe; + + ASSERT(args->dp->i_af.if_format == XFS_DINODE_FMT_LOCAL); + + trace_xfs_attr_sf_replace(args); + + sfe = xfs_attr_sf_findname(args); + if (!sfe) + return -ENOATTR; + + if (args->attr_filter & XFS_ATTR_PARENT) { + if (sfe->namelen != args->new_namelen || + sfe->valuelen != args->new_valuelen) + return -ENOSPC; + + memcpy(sfe->nameval, args->new_name, sfe->namelen); + memcpy(&sfe->nameval[sfe->namelen], args->new_value, + sfe->valuelen); + } else { + if (sfe->valuelen != args->valuelen) + return -ENOSPC; + memcpy(&sfe->nameval[sfe->namelen], args->value, + sfe->valuelen); + } + + xfs_trans_log_inode(args->trans, args->dp, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + return 0; +} + /* * Add a name/value pair to the shortform attribute list. * Overflow from the inode has already been checked for. diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 589f810eedc0..aca46da2bc50 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -46,6 +46,7 @@ struct xfs_attr3_icleaf_hdr { * Internal routines when attribute fork size < XFS_LITINO(mp). */ void xfs_attr_shortform_create(struct xfs_da_args *args); +int xfs_attr_shortform_replace(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f70afbf3cb19..a8bea99e0024 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2410,6 +2410,7 @@ DEFINE_ATTR_EVENT(xfs_attr_sf_addname); DEFINE_ATTR_EVENT(xfs_attr_sf_create); DEFINE_ATTR_EVENT(xfs_attr_sf_lookup); DEFINE_ATTR_EVENT(xfs_attr_sf_remove); +DEFINE_ATTR_EVENT(xfs_attr_sf_replace); DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf); DEFINE_ATTR_EVENT(xfs_attr_leaf_add); From 60382993a2e18041f88c7969f567f168cd3b4de3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:37 -0800 Subject: [PATCH 48/67] xfs: get rid of the xchk_xfile_*_descr calls The xchk_xfile_*_descr macros call kasprintf, which can fail to allocate memory if the formatted string is larger than 16 bytes (or whatever the nofail guarantees are nowadays). Some of them could easily exceed that, and Jiaming Zhang found a few places where that can happen with syzbot. The descriptions are debugging aids and aren't required to be unique, so let's just pass in static strings and eliminate this path to failure. Note this patch touches a number of commits, most of which were merged between 6.6 and 6.14. Cc: r772577952@gmail.com Cc: # v6.12 Fixes: ab97f4b1c03075 ("xfs: repair AGI unlinked inode bucket lists") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Tested-by: Jiaming Zhang --- fs/xfs/scrub/agheader_repair.c | 13 ++++--------- fs/xfs/scrub/alloc_repair.c | 5 +---- fs/xfs/scrub/attr_repair.c | 20 +++++--------------- fs/xfs/scrub/bmap_repair.c | 6 +----- fs/xfs/scrub/common.h | 25 ------------------------- fs/xfs/scrub/dir.c | 13 ++++--------- fs/xfs/scrub/dir_repair.c | 11 +++-------- fs/xfs/scrub/dirtree.c | 11 +++-------- fs/xfs/scrub/ialloc_repair.c | 5 +---- fs/xfs/scrub/nlinks.c | 6 ++---- fs/xfs/scrub/parent.c | 11 +++-------- fs/xfs/scrub/parent_repair.c | 23 ++++++----------------- fs/xfs/scrub/quotacheck.c | 13 +++---------- fs/xfs/scrub/refcount_repair.c | 13 ++----------- fs/xfs/scrub/rmap_repair.c | 5 +---- fs/xfs/scrub/rtbitmap_repair.c | 6 ++---- fs/xfs/scrub/rtrefcount_repair.c | 15 +++------------ fs/xfs/scrub/rtrmap_repair.c | 5 +---- fs/xfs/scrub/rtsummary.c | 7 ++----- 19 files changed, 47 insertions(+), 166 deletions(-) diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 1c09948d841e..d8e3c51a41b1 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -1708,7 +1708,6 @@ xrep_agi( { struct xrep_agi *ragi; struct xfs_mount *mp = sc->mp; - char *descr; unsigned int i; int error; @@ -1742,17 +1741,13 @@ xrep_agi( xagino_bitmap_init(&ragi->iunlink_bmp); sc->buf_cleanup = xrep_agi_buf_cleanup; - descr = xchk_xfile_ag_descr(sc, "iunlinked next pointers"); - error = xfarray_create(descr, 0, sizeof(xfs_agino_t), - &ragi->iunlink_next); - kfree(descr); + error = xfarray_create("iunlinked next pointers", 0, + sizeof(xfs_agino_t), &ragi->iunlink_next); if (error) return error; - descr = xchk_xfile_ag_descr(sc, "iunlinked prev pointers"); - error = xfarray_create(descr, 0, sizeof(xfs_agino_t), - &ragi->iunlink_prev); - kfree(descr); + error = xfarray_create("iunlinked prev pointers", 0, + sizeof(xfs_agino_t), &ragi->iunlink_prev); if (error) return error; diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c index d84777e23321..f9a9b4327189 100644 --- a/fs/xfs/scrub/alloc_repair.c +++ b/fs/xfs/scrub/alloc_repair.c @@ -850,7 +850,6 @@ xrep_allocbt( struct xrep_abt *ra; struct xfs_mount *mp = sc->mp; unsigned int busy_gen; - char *descr; int error; /* We require the rmapbt to rebuild anything. */ @@ -876,11 +875,9 @@ xrep_allocbt( } /* Set up enough storage to handle maximally fragmented free space. */ - descr = xchk_xfile_ag_descr(sc, "free space records"); - error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2, + error = xfarray_create("free space records", mp->m_sb.sb_agblocks / 2, sizeof(struct xfs_alloc_rec_incore), &ra->free_records); - kfree(descr); if (error) goto out_ra; diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 1da1354f5e06..f9191eae13ee 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -1529,7 +1529,6 @@ xrep_xattr_setup_scan( struct xrep_xattr **rxp) { struct xrep_xattr *rx; - char *descr; int max_len; int error; @@ -1555,35 +1554,26 @@ xrep_xattr_setup_scan( goto out_rx; /* Set up some staging for salvaged attribute keys and values */ - descr = xchk_xfile_ino_descr(sc, "xattr keys"); - error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key), + error = xfarray_create("xattr keys", 0, sizeof(struct xrep_xattr_key), &rx->xattr_records); - kfree(descr); if (error) goto out_rx; - descr = xchk_xfile_ino_descr(sc, "xattr names"); - error = xfblob_create(descr, &rx->xattr_blobs); - kfree(descr); + error = xfblob_create("xattr names", &rx->xattr_blobs); if (error) goto out_keys; if (xfs_has_parent(sc->mp)) { ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); - descr = xchk_xfile_ino_descr(sc, - "xattr retained parent pointer entries"); - error = xfarray_create(descr, 0, + error = xfarray_create("xattr parent pointer entries", 0, sizeof(struct xrep_xattr_pptr), &rx->pptr_recs); - kfree(descr); if (error) goto out_values; - descr = xchk_xfile_ino_descr(sc, - "xattr retained parent pointer names"); - error = xfblob_create(descr, &rx->pptr_names); - kfree(descr); + error = xfblob_create("xattr parent pointer names", + &rx->pptr_names); if (error) goto out_pprecs; diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 1d1056d447e0..0a83d5845379 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -923,7 +923,6 @@ xrep_bmap( bool allow_unwritten) { struct xrep_bmap *rb; - char *descr; xfs_extnum_t max_bmbt_recs; bool large_extcount; int error = 0; @@ -945,11 +944,8 @@ xrep_bmap( /* Set up enough storage to handle the max records for this fork. */ large_extcount = xfs_has_large_extent_counts(sc->mp); max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork); - descr = xchk_xfile_ino_descr(sc, "%s fork mapping records", - whichfork == XFS_DATA_FORK ? "data" : "attr"); - error = xfarray_create(descr, max_bmbt_recs, + error = xfarray_create("fork mapping records", max_bmbt_recs, sizeof(struct xfs_bmbt_rec), &rb->bmap_records); - kfree(descr); if (error) goto out_rb; diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index ddbc065c798c..f2ecc68538f0 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -246,31 +246,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc) int xchk_metadata_inode_forks(struct xfs_scrub *sc); -/* - * Helper macros to allocate and format xfile description strings. - * Callers must kfree the pointer returned. - */ -#define xchk_xfile_descr(sc, fmt, ...) \ - kasprintf(XCHK_GFP_FLAGS, "XFS (%s): " fmt, \ - (sc)->mp->m_super->s_id, ##__VA_ARGS__) -#define xchk_xfile_ag_descr(sc, fmt, ...) \ - kasprintf(XCHK_GFP_FLAGS, "XFS (%s): AG 0x%x " fmt, \ - (sc)->mp->m_super->s_id, \ - (sc)->sa.pag ? \ - pag_agno((sc)->sa.pag) : (sc)->sm->sm_agno, \ - ##__VA_ARGS__) -#define xchk_xfile_ino_descr(sc, fmt, ...) \ - kasprintf(XCHK_GFP_FLAGS, "XFS (%s): inode 0x%llx " fmt, \ - (sc)->mp->m_super->s_id, \ - (sc)->ip ? (sc)->ip->i_ino : (sc)->sm->sm_ino, \ - ##__VA_ARGS__) -#define xchk_xfile_rtgroup_descr(sc, fmt, ...) \ - kasprintf(XCHK_GFP_FLAGS, "XFS (%s): rtgroup 0x%x " fmt, \ - (sc)->mp->m_super->s_id, \ - (sc)->sa.pag ? \ - rtg_rgno((sc)->sr.rtg) : (sc)->sm->sm_agno, \ - ##__VA_ARGS__) - /* * Setting up a hook to wait for intents to drain is costly -- we have to take * the CPU hotplug lock and force an i-cache flush on all CPUs once to set it diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 1d98775b4b17..91228623d016 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -1102,22 +1102,17 @@ xchk_directory( sd->xname.name = sd->namebuf; if (xfs_has_parent(sc->mp)) { - char *descr; - /* * Set up some staging memory for dirents that we can't check * due to locking contention. */ - descr = xchk_xfile_ino_descr(sc, "slow directory entries"); - error = xfarray_create(descr, 0, sizeof(struct xchk_dirent), - &sd->dir_entries); - kfree(descr); + error = xfarray_create("slow directory entries", 0, + sizeof(struct xchk_dirent), &sd->dir_entries); if (error) goto out_sd; - descr = xchk_xfile_ino_descr(sc, "slow directory entry names"); - error = xfblob_create(descr, &sd->dir_names); - kfree(descr); + error = xfblob_create("slow directory entry names", + &sd->dir_names); if (error) goto out_entries; } diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index d54206f674e2..dbfcef6fb7da 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -1784,20 +1784,15 @@ xrep_dir_setup_scan( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; - char *descr; int error; /* Set up some staging memory for salvaging dirents. */ - descr = xchk_xfile_ino_descr(sc, "directory entries"); - error = xfarray_create(descr, 0, sizeof(struct xrep_dirent), - &rd->dir_entries); - kfree(descr); + error = xfarray_create("directory entries", 0, + sizeof(struct xrep_dirent), &rd->dir_entries); if (error) return error; - descr = xchk_xfile_ino_descr(sc, "directory entry names"); - error = xfblob_create(descr, &rd->dir_names); - kfree(descr); + error = xfblob_create("directory entry names", &rd->dir_names); if (error) goto out_xfarray; diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c index 529dae105e57..e484f8a0886c 100644 --- a/fs/xfs/scrub/dirtree.c +++ b/fs/xfs/scrub/dirtree.c @@ -92,7 +92,6 @@ xchk_setup_dirtree( struct xfs_scrub *sc) { struct xchk_dirtree *dl; - char *descr; int error; xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); @@ -116,16 +115,12 @@ xchk_setup_dirtree( mutex_init(&dl->lock); - descr = xchk_xfile_ino_descr(sc, "dirtree path steps"); - error = xfarray_create(descr, 0, sizeof(struct xchk_dirpath_step), - &dl->path_steps); - kfree(descr); + error = xfarray_create("dirtree path steps", 0, + sizeof(struct xchk_dirpath_step), &dl->path_steps); if (error) goto out_dl; - descr = xchk_xfile_ino_descr(sc, "dirtree path names"); - error = xfblob_create(descr, &dl->path_names); - kfree(descr); + error = xfblob_create("dirtree path names", &dl->path_names); if (error) goto out_steps; diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c index bccf2e18d43e..d206054c1ae3 100644 --- a/fs/xfs/scrub/ialloc_repair.c +++ b/fs/xfs/scrub/ialloc_repair.c @@ -797,7 +797,6 @@ xrep_iallocbt( { struct xrep_ibt *ri; struct xfs_mount *mp = sc->mp; - char *descr; xfs_agino_t first_agino, last_agino; int error = 0; @@ -816,11 +815,9 @@ xrep_iallocbt( /* Set up enough storage to handle an AG with nothing but inodes. */ xfs_agino_range(mp, pag_agno(sc->sa.pag), &first_agino, &last_agino); last_agino /= XFS_INODES_PER_CHUNK; - descr = xchk_xfile_ag_descr(sc, "inode index records"); - error = xfarray_create(descr, last_agino, + error = xfarray_create("inode index records", last_agino, sizeof(struct xfs_inobt_rec_incore), &ri->inode_records); - kfree(descr); if (error) goto out_ri; diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 8bf0bff64b41..46488aff908c 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -990,7 +990,6 @@ xchk_nlinks_setup_scan( struct xchk_nlink_ctrs *xnc) { struct xfs_mount *mp = sc->mp; - char *descr; unsigned long long max_inos; xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1; xfs_agino_t first_agino, last_agino; @@ -1007,10 +1006,9 @@ xchk_nlinks_setup_scan( */ xfs_agino_range(mp, last_agno, &first_agino, &last_agino); max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; - descr = xchk_xfile_descr(sc, "file link counts"); - error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), + error = xfarray_create("file link counts", + min(XFS_MAXINUMBER + 1, max_inos), sizeof(struct xchk_nlink), &xnc->nlinks); - kfree(descr); if (error) goto out_teardown; diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 36d505f3e40b..5a259570b154 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -755,7 +755,6 @@ xchk_parent_pptr( struct xfs_scrub *sc) { struct xchk_pptrs *pp; - char *descr; int error; pp = kvzalloc(sizeof(struct xchk_pptrs), XCHK_GFP_FLAGS); @@ -768,16 +767,12 @@ xchk_parent_pptr( * Set up some staging memory for parent pointers that we can't check * due to locking contention. */ - descr = xchk_xfile_ino_descr(sc, "slow parent pointer entries"); - error = xfarray_create(descr, 0, sizeof(struct xchk_pptr), - &pp->pptr_entries); - kfree(descr); + error = xfarray_create("slow parent pointer entries", 0, + sizeof(struct xchk_pptr), &pp->pptr_entries); if (error) goto out_pp; - descr = xchk_xfile_ino_descr(sc, "slow parent pointer names"); - error = xfblob_create(descr, &pp->pptr_names); - kfree(descr); + error = xfblob_create("slow parent pointer names", &pp->pptr_names); if (error) goto out_entries; diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index 512a546f8ce1..83a8205ae2f1 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -1497,7 +1497,6 @@ xrep_parent_setup_scan( struct xrep_parent *rp) { struct xfs_scrub *sc = rp->sc; - char *descr; struct xfs_da_geometry *geo = sc->mp->m_attr_geo; int max_len; int error; @@ -1525,32 +1524,22 @@ xrep_parent_setup_scan( goto out_xattr_name; /* Set up some staging memory for logging parent pointer updates. */ - descr = xchk_xfile_ino_descr(sc, "parent pointer entries"); - error = xfarray_create(descr, 0, sizeof(struct xrep_pptr), - &rp->pptr_recs); - kfree(descr); + error = xfarray_create("parent pointer entries", 0, + sizeof(struct xrep_pptr), &rp->pptr_recs); if (error) goto out_xattr_value; - descr = xchk_xfile_ino_descr(sc, "parent pointer names"); - error = xfblob_create(descr, &rp->pptr_names); - kfree(descr); + error = xfblob_create("parent pointer names", &rp->pptr_names); if (error) goto out_recs; /* Set up some storage for copying attrs before the mapping exchange */ - descr = xchk_xfile_ino_descr(sc, - "parent pointer retained xattr entries"); - error = xfarray_create(descr, 0, sizeof(struct xrep_parent_xattr), - &rp->xattr_records); - kfree(descr); + error = xfarray_create("parent pointer xattr entries", 0, + sizeof(struct xrep_parent_xattr), &rp->xattr_records); if (error) goto out_names; - descr = xchk_xfile_ino_descr(sc, - "parent pointer retained xattr values"); - error = xfblob_create(descr, &rp->xattr_blobs); - kfree(descr); + error = xfblob_create("parent pointer xattr values", &rp->xattr_blobs); if (error) goto out_attr_keys; diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c index 00e0c0e56d82..e8cba19334a0 100644 --- a/fs/xfs/scrub/quotacheck.c +++ b/fs/xfs/scrub/quotacheck.c @@ -741,7 +741,6 @@ xqcheck_setup_scan( struct xfs_scrub *sc, struct xqcheck *xqc) { - char *descr; struct xfs_quotainfo *qi = sc->mp->m_quotainfo; unsigned long long max_dquots = XFS_DQ_ID_MAX + 1ULL; int error; @@ -756,28 +755,22 @@ xqcheck_setup_scan( error = -ENOMEM; if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) { - descr = xchk_xfile_descr(sc, "user dquot records"); - error = xfarray_create(descr, max_dquots, + error = xfarray_create("user dquot records", max_dquots, sizeof(struct xqcheck_dquot), &xqc->ucounts); - kfree(descr); if (error) goto out_teardown; } if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) { - descr = xchk_xfile_descr(sc, "group dquot records"); - error = xfarray_create(descr, max_dquots, + error = xfarray_create("group dquot records", max_dquots, sizeof(struct xqcheck_dquot), &xqc->gcounts); - kfree(descr); if (error) goto out_teardown; } if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) { - descr = xchk_xfile_descr(sc, "project dquot records"); - error = xfarray_create(descr, max_dquots, + error = xfarray_create("project dquot records", max_dquots, sizeof(struct xqcheck_dquot), &xqc->pcounts); - kfree(descr); if (error) goto out_teardown; } diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c index 46546bf6eb13..507993e0fb0f 100644 --- a/fs/xfs/scrub/refcount_repair.c +++ b/fs/xfs/scrub/refcount_repair.c @@ -123,13 +123,7 @@ int xrep_setup_ag_refcountbt( struct xfs_scrub *sc) { - char *descr; - int error; - - descr = xchk_xfile_ag_descr(sc, "rmap record bag"); - error = xrep_setup_xfbtree(sc, descr); - kfree(descr); - return error; + return xrep_setup_xfbtree(sc, "rmap record bag"); } /* Check for any obvious conflicts with this shared/CoW staging extent. */ @@ -704,7 +698,6 @@ xrep_refcountbt( { struct xrep_refc *rr; struct xfs_mount *mp = sc->mp; - char *descr; int error; /* We require the rmapbt to rebuild anything. */ @@ -717,11 +710,9 @@ xrep_refcountbt( rr->sc = sc; /* Set up enough storage to handle one refcount record per block. */ - descr = xchk_xfile_ag_descr(sc, "reference count records"); - error = xfarray_create(descr, mp->m_sb.sb_agblocks, + error = xfarray_create("reference count records", mp->m_sb.sb_agblocks, sizeof(struct xfs_refcount_irec), &rr->refcount_records); - kfree(descr); if (error) goto out_rr; diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c index f27e3c8aa6d5..ab7053e25e1c 100644 --- a/fs/xfs/scrub/rmap_repair.c +++ b/fs/xfs/scrub/rmap_repair.c @@ -164,14 +164,11 @@ xrep_setup_ag_rmapbt( struct xfs_scrub *sc) { struct xrep_rmap *rr; - char *descr; int error; xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP); - descr = xchk_xfile_ag_descr(sc, "reverse mapping records"); - error = xrep_setup_xfbtree(sc, descr); - kfree(descr); + error = xrep_setup_xfbtree(sc, "reverse mapping records"); if (error) return error; diff --git a/fs/xfs/scrub/rtbitmap_repair.c b/fs/xfs/scrub/rtbitmap_repair.c index fd0d12db55f9..f4ca86a2ea1b 100644 --- a/fs/xfs/scrub/rtbitmap_repair.c +++ b/fs/xfs/scrub/rtbitmap_repair.c @@ -43,7 +43,6 @@ xrep_setup_rtbitmap( struct xchk_rtbitmap *rtb) { struct xfs_mount *mp = sc->mp; - char *descr; unsigned long long blocks = mp->m_sb.sb_rbmblocks; int error; @@ -52,9 +51,8 @@ xrep_setup_rtbitmap( return error; /* Create an xfile to hold our reconstructed bitmap. */ - descr = xchk_xfile_rtgroup_descr(sc, "bitmap file"); - error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile); - kfree(descr); + error = xfile_create("realtime bitmap file", + blocks * mp->m_sb.sb_blocksize, &sc->xfile); if (error) return error; diff --git a/fs/xfs/scrub/rtrefcount_repair.c b/fs/xfs/scrub/rtrefcount_repair.c index a092934ed371..f713daf095fb 100644 --- a/fs/xfs/scrub/rtrefcount_repair.c +++ b/fs/xfs/scrub/rtrefcount_repair.c @@ -128,13 +128,7 @@ int xrep_setup_rtrefcountbt( struct xfs_scrub *sc) { - char *descr; - int error; - - descr = xchk_xfile_ag_descr(sc, "rmap record bag"); - error = xrep_setup_xfbtree(sc, descr); - kfree(descr); - return error; + return xrep_setup_xfbtree(sc, "realtime rmap record bag"); } /* Check for any obvious conflicts with this shared/CoW staging extent. */ @@ -704,7 +698,6 @@ xrep_rtrefcountbt( { struct xrep_rtrefc *rr; struct xfs_mount *mp = sc->mp; - char *descr; int error; /* We require the rmapbt to rebuild anything. */ @@ -722,11 +715,9 @@ xrep_rtrefcountbt( rr->sc = sc; /* Set up enough storage to handle one refcount record per rt extent. */ - descr = xchk_xfile_ag_descr(sc, "reference count records"); - error = xfarray_create(descr, mp->m_sb.sb_rextents, - sizeof(struct xfs_refcount_irec), + error = xfarray_create("realtime reference count records", + mp->m_sb.sb_rextents, sizeof(struct xfs_refcount_irec), &rr->refcount_records); - kfree(descr); if (error) goto out_rr; diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c index 5af94e48d8cf..4610d6d80648 100644 --- a/fs/xfs/scrub/rtrmap_repair.c +++ b/fs/xfs/scrub/rtrmap_repair.c @@ -103,14 +103,11 @@ xrep_setup_rtrmapbt( struct xfs_scrub *sc) { struct xrep_rtrmap *rr; - char *descr; int error; xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP); - descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records"); - error = xrep_setup_xfbtree(sc, descr); - kfree(descr); + error = xrep_setup_xfbtree(sc, "realtime reverse mapping records"); if (error) return error; diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 712f27f6266c..b510e6bbbd3e 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -43,7 +43,6 @@ xchk_setup_rtsummary( struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; - char *descr; struct xchk_rtsummary *rts; int error; @@ -70,10 +69,8 @@ xchk_setup_rtsummary( * Create an xfile to construct a new rtsummary file. The xfile allows * us to avoid pinning kernel memory for this purpose. */ - descr = xchk_xfile_descr(sc, "realtime summary file"); - error = xfile_create(descr, XFS_FSB_TO_B(mp, mp->m_rsumblocks), - &sc->xfile); - kfree(descr); + error = xfile_create("realtime summary file", + XFS_FSB_TO_B(mp, mp->m_rsumblocks), &sc->xfile); if (error) return error; From ba408d299a3bb3c5309f40c5326e4fb83ead4247 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:37 -0800 Subject: [PATCH 49/67] xfs: only call xf{array,blob}_destroy if we have a valid pointer Only call the xfarray and xfblob destructor if we have a valid pointer, and be sure to null out that pointer afterwards. Note that this patch fixes a large number of commits, most of which were merged between 6.9 and 6.10. Cc: r772577952@gmail.com Cc: # v6.12 Fixes: ab97f4b1c03075 ("xfs: repair AGI unlinked inode bucket lists") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Tested-by: Jiaming Zhang --- fs/xfs/scrub/agheader_repair.c | 8 ++++++-- fs/xfs/scrub/attr_repair.c | 6 ++++-- fs/xfs/scrub/dir_repair.c | 8 ++++++-- fs/xfs/scrub/dirtree.c | 8 ++++++-- fs/xfs/scrub/nlinks.c | 3 ++- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index d8e3c51a41b1..15d58eedb387 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -837,8 +837,12 @@ xrep_agi_buf_cleanup( { struct xrep_agi *ragi = buf; - xfarray_destroy(ragi->iunlink_prev); - xfarray_destroy(ragi->iunlink_next); + if (ragi->iunlink_prev) + xfarray_destroy(ragi->iunlink_prev); + ragi->iunlink_prev = NULL; + if (ragi->iunlink_next) + xfarray_destroy(ragi->iunlink_next); + ragi->iunlink_next = NULL; xagino_bitmap_destroy(&ragi->iunlink_bmp); } diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index f9191eae13ee..a924b467a844 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -1516,8 +1516,10 @@ xrep_xattr_teardown( xfblob_destroy(rx->pptr_names); if (rx->pptr_recs) xfarray_destroy(rx->pptr_recs); - xfblob_destroy(rx->xattr_blobs); - xfarray_destroy(rx->xattr_records); + if (rx->xattr_blobs) + xfblob_destroy(rx->xattr_blobs); + if (rx->xattr_records) + xfarray_destroy(rx->xattr_records); mutex_destroy(&rx->lock); kfree(rx); } diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index dbfcef6fb7da..f105e49f654b 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -172,8 +172,12 @@ xrep_dir_teardown( struct xrep_dir *rd = sc->buf; xrep_findparent_scan_teardown(&rd->pscan); - xfblob_destroy(rd->dir_names); - xfarray_destroy(rd->dir_entries); + if (rd->dir_names) + xfblob_destroy(rd->dir_names); + rd->dir_names = NULL; + if (rd->dir_entries) + xfarray_destroy(rd->dir_entries); + rd->dir_names = NULL; } /* Set up for a directory repair. */ diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c index e484f8a0886c..e95dc74f1145 100644 --- a/fs/xfs/scrub/dirtree.c +++ b/fs/xfs/scrub/dirtree.c @@ -81,8 +81,12 @@ xchk_dirtree_buf_cleanup( kfree(path); } - xfblob_destroy(dl->path_names); - xfarray_destroy(dl->path_steps); + if (dl->path_names) + xfblob_destroy(dl->path_names); + dl->path_names = NULL; + if (dl->path_steps) + xfarray_destroy(dl->path_steps); + dl->path_steps = NULL; mutex_destroy(&dl->lock); } diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 46488aff908c..e80fe7395d78 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -971,7 +971,8 @@ xchk_nlinks_teardown_scan( xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); - xfarray_destroy(xnc->nlinks); + if (xnc->nlinks) + xfarray_destroy(xnc->nlinks); xnc->nlinks = NULL; xchk_iscan_teardown(&xnc->collect_iscan); From ca27313fb3f23e4ac18532ede4ec1c7cc5814c4a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:38 -0800 Subject: [PATCH 50/67] xfs: check return value of xchk_scrub_create_subord Fix this function to return NULL instead of a mangled ENOMEM, then fix the callers to actually check for a null pointer and return ENOMEM. Most of the corrections here are for code merged between 6.2 and 6.10. Cc: r772577952@gmail.com Cc: # v6.12 Fixes: 1a5f6e08d4e379 ("xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Tested-by: Jiaming Zhang --- fs/xfs/scrub/common.c | 3 +++ fs/xfs/scrub/repair.c | 3 +++ fs/xfs/scrub/scrub.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index affed35a8c96..20e63069088b 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1399,6 +1399,9 @@ xchk_metadata_inode_subtype( int error; sub = xchk_scrub_create_subord(sc, scrub_type); + if (!sub) + return -ENOMEM; + error = sub->sc.ops->scrub(&sub->sc); xchk_scrub_free_subord(sub); return error; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 3ebe27524ce3..ac8c592579bd 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1136,6 +1136,9 @@ xrep_metadata_inode_subtype( * setup/teardown routines. */ sub = xchk_scrub_create_subord(sc, scrub_type); + if (!sub) + return -ENOMEM; + error = sub->sc.ops->scrub(&sub->sc); if (error) goto out; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 670ac2baae0c..c1c6415f5055 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -634,7 +634,7 @@ xchk_scrub_create_subord( sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS); if (!sub) - return ERR_PTR(-ENOMEM); + return NULL; sub->old_smtype = sc->sm->sm_type; sub->old_smflags = sc->sm->sm_flags; From 1c253e11225bc5167217897885b85093e17c2217 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:39 -0800 Subject: [PATCH 51/67] xfs: fix UAF in xchk_btree_check_block_owner We cannot dereference bs->cur when trying to determine if bs->cur aliases bs->sc->sa.{bno,rmap}_cur after the latter has been freed. Fix this by sampling before type before any freeing could happen. The correct temporal ordering was broken when we removed xfs_btnum_t. Cc: r772577952@gmail.com Cc: # v6.9 Fixes: ec793e690f801d ("xfs: remove xfs_btnum_t") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Tested-by: Jiaming Zhang --- fs/xfs/scrub/btree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 40f36db9f07d..1089b1f4c5df 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -372,12 +372,15 @@ xchk_btree_check_block_owner( { xfs_agnumber_t agno; xfs_agblock_t agbno; + bool is_bnobt, is_rmapbt; bool init_sa; int error = 0; if (!bs->cur) return 0; + is_bnobt = xfs_btree_is_bno(bs->cur->bc_ops); + is_rmapbt = xfs_btree_is_rmap(bs->cur->bc_ops); agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); @@ -400,11 +403,11 @@ xchk_btree_check_block_owner( * have to nullify it (to shut down further block owner checks) if * self-xref encounters problems. */ - if (!bs->sc->sa.bno_cur && xfs_btree_is_bno(bs->cur->bc_ops)) + if (!bs->sc->sa.bno_cur && is_bnobt) bs->cur = NULL; xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo); - if (!bs->sc->sa.rmap_cur && xfs_btree_is_rmap(bs->cur->bc_ops)) + if (!bs->sc->sa.rmap_cur && is_rmapbt) bs->cur = NULL; out_free: From 55e03b8cbe2783ec9acfb88e8adb946ed504e117 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 23 Jan 2026 09:27:40 -0800 Subject: [PATCH 52/67] xfs: check for deleted cursors when revalidating two btrees The free space and inode btree repair functions will rebuild both btrees at the same time, after which it needs to evaluate both btrees to confirm that the corruptions are gone. However, Jiaming Zhang ran syzbot and produced a crash in the second xchk_allocbt call. His root-cause analysis is as follows (with minor corrections): In xrep_revalidate_allocbt(), xchk_allocbt() is called twice (first for BNOBT, second for CNTBT). The cause of this issue is that the first call nullified the cursor required by the second call. Let's first enter xrep_revalidate_allocbt() via following call chain: xfs_file_ioctl() -> xfs_ioc_scrubv_metadata() -> xfs_scrub_metadata() -> `sc->ops->repair_eval(sc)` -> xrep_revalidate_allocbt() xchk_allocbt() is called twice in this function. In the first call: /* Note that sc->sm->sm_type is XFS_SCRUB_TYPE_BNOPT now */ xchk_allocbt() -> xchk_btree() -> `bs->scrub_rec(bs, recp)` -> xchk_allocbt_rec() -> xchk_allocbt_xref() -> xchk_allocbt_xref_other() since sm_type is XFS_SCRUB_TYPE_BNOBT, pur is set to &sc->sa.cnt_cur. Kernel called xfs_alloc_get_rec() and returned -EFSCORRUPTED. Call chain: xfs_alloc_get_rec() -> xfs_btree_get_rec() -> xfs_btree_check_block() -> (XFS_IS_CORRUPT || XFS_TEST_ERROR), the former is false and the latter is true, return -EFSCORRUPTED. This should be caused by ioctl$XFS_IOC_ERROR_INJECTION I guess. Back to xchk_allocbt_xref_other(), after receiving -EFSCORRUPTED from xfs_alloc_get_rec(), kernel called xchk_should_check_xref(). In this function, *curpp (points to sc->sa.cnt_cur) is nullified. Back to xrep_revalidate_allocbt(), since sc->sa.cnt_cur has been nullified, it then triggered null-ptr-deref via xchk_allocbt() (second call) -> xchk_btree(). So. The bnobt revalidation failed on a cross-reference attempt, so we deleted the cntbt cursor, and then crashed when we tried to revalidate the cntbt. Therefore, check for a null cntbt cursor before that revalidation, and mark the repair incomplete. Also we can ignore the second tree entirely if the first tree was rebuilt but is already corrupt. Apply the same fix to xrep_revalidate_iallocbt because it has the same problem. Cc: r772577952@gmail.com Link: https://lore.kernel.org/linux-xfs/CANypQFYU5rRPkTy=iG5m1Lp4RWasSgrHXAh3p8YJojxV0X15dQ@mail.gmail.com/T/#m520c7835fad637eccf843c7936c200589427cc7e Cc: # v6.8 Fixes: dbfbf3bdf639a2 ("xfs: repair inode btrees") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Tested-by: Jiaming Zhang --- fs/xfs/scrub/alloc_repair.c | 15 +++++++++++++++ fs/xfs/scrub/ialloc_repair.c | 20 +++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c index f9a9b4327189..5b4c2a39a155 100644 --- a/fs/xfs/scrub/alloc_repair.c +++ b/fs/xfs/scrub/alloc_repair.c @@ -923,7 +923,22 @@ xrep_revalidate_allocbt( if (error) goto out; + /* + * If the bnobt is still corrupt, we've failed to repair the filesystem + * and should just bail out. + * + * If the bnobt fails cross-examination with the cntbt, the scan will + * free the cntbt cursor, so we need to mark the repair incomplete + * and avoid walking off the end of the NULL cntbt cursor. + */ + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out; + sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT; + if (!sc->sa.cnt_cur) { + xchk_set_incomplete(sc); + goto out; + } error = xchk_allocbt(sc); out: sc->sm->sm_type = old_type; diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c index d206054c1ae3..9b63b9d19e1b 100644 --- a/fs/xfs/scrub/ialloc_repair.c +++ b/fs/xfs/scrub/ialloc_repair.c @@ -863,10 +863,24 @@ xrep_revalidate_iallocbt( if (error) goto out; - if (xfs_has_finobt(sc->mp)) { - sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT; - error = xchk_iallocbt(sc); + /* + * If the inobt is still corrupt, we've failed to repair the filesystem + * and should just bail out. + * + * If the inobt fails cross-examination with the finobt, the scan will + * free the finobt cursor, so we need to mark the repair incomplete + * and avoid walking off the end of the NULL finobt cursor. + */ + if (!xfs_has_finobt(sc->mp) || + (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + goto out; + + sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT; + if (!sc->sa.fino_cur) { + xchk_set_incomplete(sc); + goto out; } + error = xchk_iallocbt(sc); out: sc->sm->sm_type = old_type; From c17a1c03493bee4e7882ac79a52b8150cb464e56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Jan 2026 16:10:20 +0100 Subject: [PATCH 53/67] xfs: use a seprate member to track space availabe in the GC scatch buffer When scratch_head wraps back to 0 and scratch_tail is also 0 because no I/O has completed yet, the ring buffer could be mistaken for empty. Fix this by introducing a separate scratch_available member in struct xfs_zone_gc_data. This actually ends up simplifying the code as well. Reported-by: Chris Mason Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index dfa6653210c7..8c08e5519bff 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -131,10 +131,13 @@ struct xfs_zone_gc_data { /* * Scratchpad to buffer GC data, organized as a ring buffer over * discontiguous folios. scratch_head is where the buffer is filled, - * and scratch_tail tracks the buffer space freed. + * scratch_tail tracks the buffer space freed, and scratch_available + * counts the space available in the ring buffer between the head and + * the tail. */ struct folio *scratch_folios[XFS_GC_NR_BUFS]; unsigned int scratch_size; + unsigned int scratch_available; unsigned int scratch_head; unsigned int scratch_tail; @@ -212,6 +215,7 @@ xfs_zone_gc_data_alloc( goto out_free_scratch; } data->scratch_size = XFS_GC_BUF_SIZE * XFS_GC_NR_BUFS; + data->scratch_available = data->scratch_size; INIT_LIST_HEAD(&data->reading); INIT_LIST_HEAD(&data->writing); INIT_LIST_HEAD(&data->resetting); @@ -574,18 +578,6 @@ xfs_zone_gc_ensure_target( return oz; } -static unsigned int -xfs_zone_gc_scratch_available( - struct xfs_zone_gc_data *data) -{ - if (!data->scratch_tail) - return data->scratch_size - data->scratch_head; - - if (!data->scratch_head) - return data->scratch_tail; - return (data->scratch_size - data->scratch_head) + data->scratch_tail; -} - static bool xfs_zone_gc_space_available( struct xfs_zone_gc_data *data) @@ -596,7 +588,7 @@ xfs_zone_gc_space_available( if (!oz) return false; return oz->oz_allocated < rtg_blocks(oz->oz_rtg) && - xfs_zone_gc_scratch_available(data); + data->scratch_available; } static void @@ -625,8 +617,7 @@ xfs_zone_gc_alloc_blocks( if (!oz) return NULL; - *count_fsb = min(*count_fsb, - XFS_B_TO_FSB(mp, xfs_zone_gc_scratch_available(data))); + *count_fsb = min(*count_fsb, XFS_B_TO_FSB(mp, data->scratch_available)); /* * Directly allocate GC blocks from the reserved pool. @@ -730,6 +721,7 @@ xfs_zone_gc_start_chunk( bio->bi_end_io = xfs_zone_gc_end_io; xfs_zone_gc_add_data(chunk); data->scratch_head = (data->scratch_head + len) % data->scratch_size; + data->scratch_available -= len; WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_add_tail(&chunk->entry, &data->reading); @@ -862,6 +854,7 @@ xfs_zone_gc_finish_chunk( data->scratch_tail = (data->scratch_tail + chunk->len) % data->scratch_size; + data->scratch_available += chunk->len; /* * Cycle through the iolock and wait for direct I/O and layouts to From 7da4ebea8332e6b2fb15edc71e5443c15826af49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Jan 2026 16:10:21 +0100 Subject: [PATCH 54/67] xfs: remove xfs_zone_gc_space_available xfs_zone_gc_space_available only has one caller left, so fold it into that. Reorder the checks so that the cheaper scratch_available check is done first. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 8c08e5519bff..7bdc5043cc1a 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -578,19 +578,6 @@ xfs_zone_gc_ensure_target( return oz; } -static bool -xfs_zone_gc_space_available( - struct xfs_zone_gc_data *data) -{ - struct xfs_open_zone *oz; - - oz = xfs_zone_gc_ensure_target(data->mp); - if (!oz) - return false; - return oz->oz_allocated < rtg_blocks(oz->oz_rtg) && - data->scratch_available; -} - static void xfs_zone_gc_end_io( struct bio *bio) @@ -989,9 +976,15 @@ static bool xfs_zone_gc_should_start_new_work( struct xfs_zone_gc_data *data) { + struct xfs_open_zone *oz; + if (xfs_is_shutdown(data->mp)) return false; - if (!xfs_zone_gc_space_available(data)) + if (!data->scratch_available) + return false; + + oz = xfs_zone_gc_ensure_target(data->mp); + if (!oz || oz->oz_allocated == rtg_blocks(oz->oz_rtg)) return false; if (!data->iter.victim_rtg) { From 0ead3b72469e52ca02946b2e5b35fff38bfa061f Mon Sep 17 00:00:00 2001 From: Shin Seong-jun Date: Sat, 24 Jan 2026 00:04:32 +0900 Subject: [PATCH 55/67] xfs: fix spacing style issues in xfs_alloc.c Fix checkpatch.pl errors regarding missing spaces around assignment operators in xfs_alloc_compute_diff() and xfs_alloc_fixup_trees(). Adhere to the Linux kernel coding style by ensuring spaces are placed around the assignment operator '='. Signed-off-by: Shin Seong-jun Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 5bec3365a99a..d99602bcc16f 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -376,8 +376,8 @@ xfs_alloc_compute_diff( xfs_agblock_t freeend; /* end of freespace extent */ xfs_agblock_t newbno1; /* return block number */ xfs_agblock_t newbno2; /* other new block number */ - xfs_extlen_t newlen1=0; /* length with newbno1 */ - xfs_extlen_t newlen2=0; /* length with newbno2 */ + xfs_extlen_t newlen1 = 0; /* length with newbno1 */ + xfs_extlen_t newlen2 = 0; /* length with newbno2 */ xfs_agblock_t wantend; /* end of target extent */ bool userdata = datatype & XFS_ALLOC_USERDATA; @@ -577,8 +577,8 @@ xfs_alloc_fixup_trees( int i; /* operation results */ xfs_agblock_t nfbno1; /* first new free startblock */ xfs_agblock_t nfbno2; /* second new free startblock */ - xfs_extlen_t nflen1=0; /* first new free length */ - xfs_extlen_t nflen2=0; /* second new free length */ + xfs_extlen_t nflen1 = 0; /* first new free length */ + xfs_extlen_t nflen2 = 0; /* second new free length */ struct xfs_mount *mp; bool fixup_longest = false; From 44b9553c3dd043f14903d8ae5d4e7a9797c6d92e Mon Sep 17 00:00:00 2001 From: Raphael Pinsonneault-Thibeault Date: Thu, 29 Jan 2026 13:50:21 -0500 Subject: [PATCH 56/67] xfs: validate log record version against superblock log version Syzbot creates a fuzzed record where xfs_has_logv2() but the xlog_rec_header h_version != XLOG_VERSION_2. This causes a KASAN: slab-out-of-bounds read in xlog_do_recovery_pass() -> xlog_recover_process() -> xlog_cksum(). Fix by adding a check to xlog_valid_rec_header() to abort journal recovery if the xlog_rec_header h_version does not match the super block log version. A file system with a version 2 log will only ever set XLOG_VERSION_2 in its headers (and v1 will only ever set V_1), so if there is any mismatch, either the journal or the superblock has been corrupted and therefore we abort processing with a -EFSCORRUPTED error immediately. Also, refactor the structure of the validity checks for better readability. At the default error level (LOW), XFS_IS_CORRUPT() emits the condition that failed, the file and line number it is located at, then dumps the stack. This gives us everything we need to know about the failure if we do a single validity check per XFS_IS_CORRUPT(). Reported-by: syzbot+9f6d080dece587cfdd4c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9f6d080dece587cfdd4c Tested-by: syzbot+9f6d080dece587cfdd4c@syzkaller.appspotmail.com Fixes: 45cf976008dd ("xfs: fix log recovery buffer allocation for the legacy h_size fixup") Signed-off-by: Raphael Pinsonneault-Thibeault Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log_recover.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 94e8598056eb..935905743f94 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2953,18 +2953,23 @@ xlog_valid_rec_header( xfs_daddr_t blkno, int bufsize) { + struct xfs_mount *mp = log->l_mp; + u32 h_version = be32_to_cpu(rhead->h_version); int hlen; - if (XFS_IS_CORRUPT(log->l_mp, + if (XFS_IS_CORRUPT(mp, rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) return -EFSCORRUPTED; - if (XFS_IS_CORRUPT(log->l_mp, - (!rhead->h_version || - (be32_to_cpu(rhead->h_version) & - (~XLOG_VERSION_OKBITS))))) { - xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", - __func__, be32_to_cpu(rhead->h_version)); - return -EFSCORRUPTED; + + /* + * The log version must match the superblock + */ + if (xfs_has_logv2(mp)) { + if (XFS_IS_CORRUPT(mp, h_version != XLOG_VERSION_2)) + return -EFSCORRUPTED; + } else { + if (XFS_IS_CORRUPT(mp, h_version != XLOG_VERSION_1)) + return -EFSCORRUPTED; } /* @@ -2972,12 +2977,12 @@ xlog_valid_rec_header( * and h_len must not be greater than LR buffer size. */ hlen = be32_to_cpu(rhead->h_len); - if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize)) + if (XFS_IS_CORRUPT(mp, hlen <= 0 || hlen > bufsize)) return -EFSCORRUPTED; - if (XFS_IS_CORRUPT(log->l_mp, - blkno > log->l_logBBsize || blkno > INT_MAX)) + if (XFS_IS_CORRUPT(mp, blkno > log->l_logBBsize || blkno > INT_MAX)) return -EFSCORRUPTED; + return 0; } From 9a228d141536a91bf9e48a21b37ebb0f8eea8273 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:16 +0100 Subject: [PATCH 57/67] xfs: fix the errno sign for the xfs_errortag_{add,clearall} stubs All errno values should be negative in the kernel. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_error.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index fe6a71bbe9cd..3a78c8dfaec8 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -60,8 +60,8 @@ int xfs_errortag_clearall(struct xfs_mount *mp); #define xfs_errortag_del(mp) #define XFS_TEST_ERROR(mp, tag) (false) #define XFS_ERRORTAG_DELAY(mp, tag) ((void)0) -#define xfs_errortag_add(mp, tag) (ENOSYS) -#define xfs_errortag_clearall(mp) (ENOSYS) +#define xfs_errortag_add(mp, tag) (-ENOSYS) +#define xfs_errortag_clearall(mp) (-ENOSYS) #endif /* DEBUG */ /* From 394969e2f9d11427ce493e171949958122dc11ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:17 +0100 Subject: [PATCH 58/67] xfs: allocate m_errortag early Ensure the mount structure always has a valid m_errortag for debug builds. This removes the NULL checking from the runtime code, and prepares for allowing to set errortags from mount. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_error.c | 26 +------------------------- fs/xfs/xfs_super.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 873f2d1a134c..dfa4abf9fd1a 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -114,18 +114,8 @@ int xfs_errortag_init( struct xfs_mount *mp) { - int ret; - - mp->m_errortag = kzalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, - GFP_KERNEL | __GFP_RETRY_MAYFAIL); - if (!mp->m_errortag) - return -ENOMEM; - - ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, + return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, &mp->m_kobj, "errortag"); - if (ret) - kfree(mp->m_errortag); - return ret; } void @@ -133,7 +123,6 @@ xfs_errortag_del( struct xfs_mount *mp) { xfs_sysfs_del(&mp->m_errortag_kobj); - kfree(mp->m_errortag); } static bool @@ -154,8 +143,6 @@ xfs_errortag_enabled( struct xfs_mount *mp, unsigned int tag) { - if (!mp->m_errortag) - return false; if (!xfs_errortag_valid(tag)) return false; @@ -171,17 +158,6 @@ xfs_errortag_test( { unsigned int randfactor; - /* - * To be able to use error injection anywhere, we need to ensure error - * injection mechanism is already initialized. - * - * Code paths like I/O completion can be called before the - * initialization is complete, but be able to inject errors in such - * places is still useful. - */ - if (!mp->m_errortag) - return false; - if (!xfs_errortag_valid(error_tag)) return false; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b6a92f027d64..5029bf63b87d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -40,6 +40,7 @@ #include "xfs_defer.h" #include "xfs_attr_item.h" #include "xfs_xattr.h" +#include "xfs_errortag.h" #include "xfs_iunlink_item.h" #include "xfs_dahash_test.h" #include "xfs_rtbitmap.h" @@ -824,6 +825,9 @@ xfs_mount_free( debugfs_remove(mp->m_debugfs); kfree(mp->m_rtname); kfree(mp->m_logname); +#ifdef DEBUG + kfree(mp->m_errortag); +#endif kfree(mp); } @@ -2266,6 +2270,14 @@ xfs_init_fs_context( mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) return -ENOMEM; +#ifdef DEBUG + mp->m_errortag = kcalloc(XFS_ERRTAG_MAX, sizeof(*mp->m_errortag), + GFP_KERNEL); + if (!mp->m_errortag) { + kfree(mp); + return -ENOMEM; + } +#endif spin_lock_init(&mp->m_sb_lock); for (i = 0; i < XG_TYPE_MAX; i++) From b8862a09d8256a9037293f1da3b4617b21de26f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:18 +0100 Subject: [PATCH 59/67] xfs: don't validate error tags in the I/O path We can trust XFS developers enough to not pass random stuff to XFS_ERROR_TEST/DELAY. Open code the validity check in xfs_errortag_add, which is the only place that receives unvalidated error tag values from user space, and drop the now pointless xfs_errortag_enabled helper. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_errortag.h | 2 +- fs/xfs/xfs_error.c | 38 ++++++++++-------------------------- fs/xfs/xfs_error.h | 2 +- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 57e47077c75a..b7d98471684b 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -53,7 +53,7 @@ * Drop-writes support removed because write error handling cannot trash * pre-existing delalloc extents in any useful way anymore. We retain the * definition so that we can reject it as an invalid value in - * xfs_errortag_valid(). + * xfs_errortag_add(). */ #define XFS_ERRTAG_DROP_WRITES 28 #define XFS_ERRTAG_LOG_BAD_CRC 29 diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index dfa4abf9fd1a..52a1d51126e3 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -125,30 +125,6 @@ xfs_errortag_del( xfs_sysfs_del(&mp->m_errortag_kobj); } -static bool -xfs_errortag_valid( - unsigned int error_tag) -{ - if (error_tag >= XFS_ERRTAG_MAX) - return false; - - /* Error out removed injection types */ - if (error_tag == XFS_ERRTAG_DROP_WRITES) - return false; - return true; -} - -bool -xfs_errortag_enabled( - struct xfs_mount *mp, - unsigned int tag) -{ - if (!xfs_errortag_valid(tag)) - return false; - - return mp->m_errortag[tag] != 0; -} - bool xfs_errortag_test( struct xfs_mount *mp, @@ -158,9 +134,6 @@ xfs_errortag_test( { unsigned int randfactor; - if (!xfs_errortag_valid(error_tag)) - return false; - randfactor = mp->m_errortag[error_tag]; if (!randfactor || get_random_u32_below(randfactor)) return false; @@ -178,8 +151,17 @@ xfs_errortag_add( { BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); - if (!xfs_errortag_valid(error_tag)) + if (error_tag >= XFS_ERRTAG_MAX) return -EINVAL; + + /* Error out removed injection types */ + switch (error_tag) { + case XFS_ERRTAG_DROP_WRITES: + return -EINVAL; + default: + break; + } + mp->m_errortag[error_tag] = xfs_errortag_random_default[error_tag]; return 0; } diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 3a78c8dfaec8..ec22546a8ca8 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -44,7 +44,7 @@ bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag); #define XFS_ERRORTAG_DELAY(mp, tag) \ do { \ might_sleep(); \ - if (!xfs_errortag_enabled((mp), (tag))) \ + if (!mp->m_errortag[tag]) \ break; \ xfs_warn_ratelimited((mp), \ "Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", \ From e2d62bfd99b6b79d7c5a4c543c2d84049f01f24f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:19 +0100 Subject: [PATCH 60/67] xfs: move the guts of XFS_ERRORTAG_DELAY out of line Mirror what is done for the more common XFS_ERRORTAG_TEST version, and also only look at the error tag value once now that we can easily have a local variable. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_error.c | 21 +++++++++++++++++++++ fs/xfs/xfs_error.h | 15 +++------------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 52a1d51126e3..a6f160a4d0e9 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -144,6 +144,27 @@ xfs_errortag_test( return true; } +void +xfs_errortag_delay( + struct xfs_mount *mp, + const char *file, + int line, + unsigned int error_tag) +{ + unsigned int delay = mp->m_errortag[error_tag]; + + might_sleep(); + + if (!delay) + return; + + xfs_warn_ratelimited(mp, +"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", + delay, file, line, + mp->m_super->s_id); + mdelay(delay); +} + int xfs_errortag_add( struct xfs_mount *mp, diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index ec22546a8ca8..b40e7c671d2a 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -40,19 +40,10 @@ bool xfs_errortag_test(struct xfs_mount *mp, const char *file, int line, unsigned int error_tag); #define XFS_TEST_ERROR(mp, tag) \ xfs_errortag_test((mp), __FILE__, __LINE__, (tag)) -bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag); +void xfs_errortag_delay(struct xfs_mount *mp, const char *file, int line, + unsigned int error_tag); #define XFS_ERRORTAG_DELAY(mp, tag) \ - do { \ - might_sleep(); \ - if (!mp->m_errortag[tag]) \ - break; \ - xfs_warn_ratelimited((mp), \ -"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", \ - (mp)->m_errortag[(tag)], __FILE__, __LINE__, \ - (mp)->m_super->s_id); \ - mdelay((mp)->m_errortag[(tag)]); \ - } while (0) - + xfs_errortag_delay((mp), __FILE__, __LINE__, (tag)) int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag); int xfs_errortag_clearall(struct xfs_mount *mp); #else From 4d8f42466a3ba2342b876822ff0582a49e174c9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:20 +0100 Subject: [PATCH 61/67] xfs: use WRITE_ONCE/READ_ONCE for m_errortag There is no synchronization for updating m_errortag, which is fine as it's just a debug tool. It would still be nice to fully avoid the theoretical case of torn values, so use WRITE_ONCE and READ_ONCE to access the members. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_error.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index a6f160a4d0e9..53704f1ed791 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -50,17 +50,18 @@ xfs_errortag_attr_store( { struct xfs_mount *mp = to_mp(kobject); unsigned int error_tag = to_attr(attr)->tag; + unsigned int val; int ret; if (strcmp(buf, "default") == 0) { - mp->m_errortag[error_tag] = - xfs_errortag_random_default[error_tag]; + val = xfs_errortag_random_default[error_tag]; } else { - ret = kstrtouint(buf, 0, &mp->m_errortag[error_tag]); + ret = kstrtouint(buf, 0, &val); if (ret) return ret; } + WRITE_ONCE(mp->m_errortag[error_tag], val); return count; } @@ -71,9 +72,9 @@ xfs_errortag_attr_show( char *buf) { struct xfs_mount *mp = to_mp(kobject); - unsigned int error_tag = to_attr(attr)->tag; - return snprintf(buf, PAGE_SIZE, "%u\n", mp->m_errortag[error_tag]); + return snprintf(buf, PAGE_SIZE, "%u\n", + READ_ONCE(mp->m_errortag[to_attr(attr)->tag])); } static const struct sysfs_ops xfs_errortag_sysfs_ops = { @@ -134,7 +135,7 @@ xfs_errortag_test( { unsigned int randfactor; - randfactor = mp->m_errortag[error_tag]; + randfactor = READ_ONCE(mp->m_errortag[error_tag]); if (!randfactor || get_random_u32_below(randfactor)) return false; @@ -151,7 +152,7 @@ xfs_errortag_delay( int line, unsigned int error_tag) { - unsigned int delay = mp->m_errortag[error_tag]; + unsigned int delay = READ_ONCE(mp->m_errortag[error_tag]); might_sleep(); @@ -183,7 +184,8 @@ xfs_errortag_add( break; } - mp->m_errortag[error_tag] = xfs_errortag_random_default[error_tag]; + WRITE_ONCE(mp->m_errortag[error_tag], + xfs_errortag_random_default[error_tag]); return 0; } @@ -191,7 +193,10 @@ int xfs_errortag_clearall( struct xfs_mount *mp) { - memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX); + unsigned int i; + + for (i = 0; i < XFS_ERRTAG_MAX; i++) + WRITE_ONCE(mp->m_errortag[i], 0); return 0; } #endif /* DEBUG */ From 2d263debd7f1df2091efb4c06eda02ab04b68562 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:21 +0100 Subject: [PATCH 62/67] xfs: allow setting errortags at mount time Add an errortag mount option that enables an errortag with the default injection frequency. This allows injecting errors into the mount process instead of just on live file systems, and thus test mount error handling. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- Documentation/admin-guide/xfs.rst | 8 +++++++ fs/xfs/xfs_error.c | 36 +++++++++++++++++++++++++++++++ fs/xfs/xfs_error.h | 4 ++++ fs/xfs/xfs_super.c | 8 ++++++- 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index c85cd327af28..746ea60eed3f 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -215,6 +215,14 @@ When mounting an XFS filesystem, the following options are accepted. inconsistent namespace presentation during or after a failover event. + errortag=tagname + When specified, enables the error inject tag named "tagname" with the + default frequency. Can be specified multiple times to enable multiple + errortags. Specifying this option on remount will reset the error tag + to the default value if it was set to any other value before. + This option is only supported when CONFIG_XFS_DEBUG is enabled, and + will not be reflected in /proc/self/mounts. + Deprecation of V4 Format ======================== diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 53704f1ed791..d652240a1dca 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -22,6 +22,12 @@ static const unsigned int xfs_errortag_random_default[] = { XFS_ERRTAGS }; #undef XFS_ERRTAG +#define XFS_ERRTAG(_tag, _name, _default) \ + [XFS_ERRTAG_##_tag] = __stringify(_name), +#include "xfs_errortag.h" +static const char *xfs_errortag_names[] = { XFS_ERRTAGS }; +#undef XFS_ERRTAG + struct xfs_errortag_attr { struct attribute attr; unsigned int tag; @@ -189,6 +195,36 @@ xfs_errortag_add( return 0; } +int +xfs_errortag_add_name( + struct xfs_mount *mp, + const char *tag_name) +{ + unsigned int i; + + for (i = 0; i < XFS_ERRTAG_MAX; i++) { + if (xfs_errortag_names[i] && + !strcmp(xfs_errortag_names[i], tag_name)) + return xfs_errortag_add(mp, i); + } + + return -EINVAL; +} + +void +xfs_errortag_copy( + struct xfs_mount *dst_mp, + struct xfs_mount *src_mp) +{ + unsigned int val, i; + + for (i = 0; i < XFS_ERRTAG_MAX; i++) { + val = READ_ONCE(src_mp->m_errortag[i]); + if (val) + WRITE_ONCE(dst_mp->m_errortag[i], val); + } +} + int xfs_errortag_clearall( struct xfs_mount *mp) diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index b40e7c671d2a..05fc1d1cf521 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -45,6 +45,8 @@ void xfs_errortag_delay(struct xfs_mount *mp, const char *file, int line, #define XFS_ERRORTAG_DELAY(mp, tag) \ xfs_errortag_delay((mp), __FILE__, __LINE__, (tag)) int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag); +int xfs_errortag_add_name(struct xfs_mount *mp, const char *tag_name); +void xfs_errortag_copy(struct xfs_mount *dst_mp, struct xfs_mount *src_mp); int xfs_errortag_clearall(struct xfs_mount *mp); #else #define xfs_errortag_init(mp) (0) @@ -52,6 +54,8 @@ int xfs_errortag_clearall(struct xfs_mount *mp); #define XFS_TEST_ERROR(mp, tag) (false) #define XFS_ERRORTAG_DELAY(mp, tag) ((void)0) #define xfs_errortag_add(mp, tag) (-ENOSYS) +#define xfs_errortag_copy(dst_mp, src_mp) ((void)0) +#define xfs_errortag_add_name(mp, tag_name) (-ENOSYS) #define xfs_errortag_clearall(mp) (-ENOSYS) #endif /* DEBUG */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5029bf63b87d..1dcbad1c5d68 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -40,6 +40,7 @@ #include "xfs_defer.h" #include "xfs_attr_item.h" #include "xfs_xattr.h" +#include "xfs_error.h" #include "xfs_errortag.h" #include "xfs_iunlink_item.h" #include "xfs_dahash_test.h" @@ -114,7 +115,7 @@ enum { Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones, - Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, + Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, Opt_errortag, }; #define fsparam_dead(NAME) \ @@ -173,6 +174,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = { fsparam_flag("lifetime", Opt_lifetime), fsparam_flag("nolifetime", Opt_nolifetime), fsparam_string("max_atomic_write", Opt_max_atomic_write), + fsparam_string("errortag", Opt_errortag), {} }; @@ -1593,6 +1595,8 @@ xfs_fs_parse_param( return -EINVAL; } return 0; + case Opt_errortag: + return xfs_errortag_add_name(parsing_mp, param->string); default: xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); return -EINVAL; @@ -2184,6 +2188,8 @@ xfs_fs_reconfigure( if (error) return error; + xfs_errortag_copy(mp, new_mp); + /* Validate new max_atomic_write option before making other changes */ if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) { error = xfs_set_max_atomic_write_opt(mp, From 32ae9b893a1dc341274ffabd3cdcc63134f36060 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:22 +0100 Subject: [PATCH 63/67] xfs: don't mark all discard issued by zoned GC as sync Discard are not usually sync when issued from zoned garbage collection, so drop the REQ_SYNC flag. Fixes: 080d01c41d44 ("xfs: implement zoned garbage collection") Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Hans Holmberg Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 7bdc5043cc1a..60964c926f9f 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -905,7 +905,8 @@ xfs_zone_gc_prepare_reset( if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { if (!bdev_max_discard_sectors(bio->bi_bdev)) return false; - bio->bi_opf = REQ_OP_DISCARD | REQ_SYNC; + bio->bi_opf &= ~REQ_OP_ZONE_RESET; + bio->bi_opf |= REQ_OP_DISCARD; bio->bi_iter.bi_size = XFS_FSB_TO_B(rtg_mount(rtg), rtg_blocks(rtg)); } From 06873dbd940dea955b30efb0b59212f1c858f6d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:23 +0100 Subject: [PATCH 64/67] xfs: refactor zone reset handling Include the actual bio submission in the common zone reset handler to share more code and prepare for adding error injection for zone reset. Note the I plan to refactor the block layer submit_bio_wait and bio_await_chain code in the next merge window to remove some of the code duplication added here. Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Hans Holmberg Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 49 +++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 60964c926f9f..4023448e85d1 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -893,40 +893,55 @@ out: bio_put(&chunk->bio); } -static bool -xfs_zone_gc_prepare_reset( - struct bio *bio, - struct xfs_rtgroup *rtg) +static void +xfs_submit_zone_reset_bio( + struct xfs_rtgroup *rtg, + struct bio *bio) { trace_xfs_zone_reset(rtg); ASSERT(rtg_rmap(rtg)->i_used_blocks == 0); bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { - if (!bdev_max_discard_sectors(bio->bi_bdev)) - return false; + /* + * Also use the bio to drive the state machine when neither + * zone reset nor discard is supported to keep things simple. + */ + if (!bdev_max_discard_sectors(bio->bi_bdev)) { + bio_endio(bio); + return; + } bio->bi_opf &= ~REQ_OP_ZONE_RESET; bio->bi_opf |= REQ_OP_DISCARD; bio->bi_iter.bi_size = XFS_FSB_TO_B(rtg_mount(rtg), rtg_blocks(rtg)); } - return true; + submit_bio(bio); +} + +static void xfs_bio_wait_endio(struct bio *bio) +{ + complete(bio->bi_private); } int xfs_zone_gc_reset_sync( struct xfs_rtgroup *rtg) { - int error = 0; + DECLARE_COMPLETION_ONSTACK(done); struct bio bio; + int error; bio_init(&bio, rtg_mount(rtg)->m_rtdev_targp->bt_bdev, NULL, 0, - REQ_OP_ZONE_RESET); - if (xfs_zone_gc_prepare_reset(&bio, rtg)) - error = submit_bio_wait(&bio); - bio_uninit(&bio); + REQ_OP_ZONE_RESET | REQ_SYNC); + bio.bi_private = &done; + bio.bi_end_io = xfs_bio_wait_endio; + xfs_submit_zone_reset_bio(rtg, &bio); + wait_for_completion_io(&done); + error = blk_status_to_errno(bio.bi_status); + bio_uninit(&bio); return error; } @@ -961,15 +976,7 @@ xfs_zone_gc_reset_zones( chunk->data = data; WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_add_tail(&chunk->entry, &data->resetting); - - /* - * Also use the bio to drive the state machine when neither - * zone reset nor discard is supported to keep things simple. - */ - if (xfs_zone_gc_prepare_reset(bio, rtg)) - submit_bio(bio); - else - bio_endio(bio); + xfs_submit_zone_reset_bio(rtg, bio); } while (next); } From 41374ae69ec3a910950d3888f444f80678c6f308 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:24 +0100 Subject: [PATCH 65/67] xfs: add zone reset error injection Add a new errortag to test that zone reset errors are handled correctly. Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Hans Holmberg Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_errortag.h | 6 ++++-- fs/xfs/xfs_zone_gc.c | 13 +++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index b7d98471684b..6de207fed2d8 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -74,7 +74,8 @@ #define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44 #define XFS_ERRTAG_METAFILE_RESV_CRITICAL 45 #define XFS_ERRTAG_FORCE_ZERO_RANGE 46 -#define XFS_ERRTAG_MAX 47 +#define XFS_ERRTAG_ZONE_RESET 47 +#define XFS_ERRTAG_MAX 48 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -135,7 +136,8 @@ XFS_ERRTAG(WB_DELAY_MS, wb_delay_ms, 3000) \ XFS_ERRTAG(WRITE_DELAY_MS, write_delay_ms, 3000) \ XFS_ERRTAG(EXCHMAPS_FINISH_ONE, exchmaps_finish_one, 1) \ XFS_ERRTAG(METAFILE_RESV_CRITICAL, metafile_resv_crit, 4) \ -XFS_ERRTAG(FORCE_ZERO_RANGE, force_zero_range, 4) +XFS_ERRTAG(FORCE_ZERO_RANGE, force_zero_range, 4) \ +XFS_ERRTAG(ZONE_RESET, zone_reset, 1) #endif /* XFS_ERRTAG */ #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 4023448e85d1..570102184904 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -16,6 +16,8 @@ #include "xfs_rmap.h" #include "xfs_rtbitmap.h" #include "xfs_rtrmap_btree.h" +#include "xfs_errortag.h" +#include "xfs_error.h" #include "xfs_zone_alloc.h" #include "xfs_zone_priv.h" #include "xfs_zones.h" @@ -898,9 +900,17 @@ xfs_submit_zone_reset_bio( struct xfs_rtgroup *rtg, struct bio *bio) { + struct xfs_mount *mp = rtg_mount(rtg); + trace_xfs_zone_reset(rtg); ASSERT(rtg_rmap(rtg)->i_used_blocks == 0); + + if (XFS_TEST_ERROR(mp, XFS_ERRTAG_ZONE_RESET)) { + bio_io_error(bio); + return; + } + bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { /* @@ -913,8 +923,7 @@ xfs_submit_zone_reset_bio( } bio->bi_opf &= ~REQ_OP_ZONE_RESET; bio->bi_opf |= REQ_OP_DISCARD; - bio->bi_iter.bi_size = - XFS_FSB_TO_B(rtg_mount(rtg), rtg_blocks(rtg)); + bio->bi_iter.bi_size = XFS_FSB_TO_B(mp, rtg_blocks(rtg)); } submit_bio(bio); From edf6078212c366459d3c70833290579b200128b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:25 +0100 Subject: [PATCH 66/67] xfs: give the defer_relog stat a xs_ prefix Make this counter naming consistent with all the others. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_defer.c | 2 +- fs/xfs/xfs_stats.c | 6 +++--- fs/xfs/xfs_stats.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 0bd87b40d091..c39e40dcb0b0 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -565,7 +565,7 @@ xfs_defer_relog( continue; trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp); - XFS_STATS_INC((*tpp)->t_mountp, defer_relog); + XFS_STATS_INC((*tpp)->t_mountp, xs_defer_relog); xfs_defer_relog_intent(*tpp, dfp); } diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 9781222e0653..3fe1f5412537 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -23,7 +23,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) uint64_t xs_xstrat_bytes = 0; uint64_t xs_write_bytes = 0; uint64_t xs_read_bytes = 0; - uint64_t defer_relog = 0; + uint64_t xs_defer_relog = 0; static const struct xstats_entry { char *desc; @@ -76,13 +76,13 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes; xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes; xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes; - defer_relog += per_cpu_ptr(stats, i)->s.defer_relog; + xs_defer_relog += per_cpu_ptr(stats, i)->s.xs_defer_relog; } len += scnprintf(buf + len, PATH_MAX-len, "xpc %llu %llu %llu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n", - defer_relog); + xs_defer_relog); len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n", #if defined(DEBUG) 1); diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 15ba1abcf253..d86c6ce35010 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -142,7 +142,7 @@ struct __xfsstats { uint64_t xs_xstrat_bytes; uint64_t xs_write_bytes; uint64_t xs_read_bytes; - uint64_t defer_relog; + uint64_t xs_defer_relog; }; #define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t)) From e33839b514a8af27ba03f9f2a414d154aa980320 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Jan 2026 06:19:26 +0100 Subject: [PATCH 67/67] xfs: add sysfs stats for zoned GC Add counters of read, write and zone_reset operations as well as GC written bytes to sysfs. This way they can be easily used for monitoring tools and test cases. Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_stats.c | 6 +++++- fs/xfs/xfs_stats.h | 6 ++++++ fs/xfs/xfs_zone_gc.c | 7 +++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index 3fe1f5412537..017db0361cd8 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -24,6 +24,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) uint64_t xs_write_bytes = 0; uint64_t xs_read_bytes = 0; uint64_t xs_defer_relog = 0; + uint64_t xs_gc_bytes = 0; static const struct xstats_entry { char *desc; @@ -57,7 +58,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { "rtrmapbt_mem", xfsstats_offset(xs_rtrefcbt_2) }, { "rtrefcntbt", xfsstats_offset(xs_qm_dqreclaims)}, /* we print both series of quota information together */ - { "qm", xfsstats_offset(xs_xstrat_bytes)}, + { "qm", xfsstats_offset(xs_gc_read_calls)}, + { "zoned", xfsstats_offset(__pad1)}, }; /* Loop over all stats groups */ @@ -77,6 +79,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes; xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes; xs_defer_relog += per_cpu_ptr(stats, i)->s.xs_defer_relog; + xs_gc_bytes += per_cpu_ptr(stats, i)->s.xs_gc_bytes; } len += scnprintf(buf + len, PATH_MAX-len, "xpc %llu %llu %llu\n", @@ -89,6 +92,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) #else 0); #endif + len += scnprintf(buf + len, PATH_MAX-len, "gc xpc %llu\n", xs_gc_bytes); return len; } diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index d86c6ce35010..153d2381d0a8 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -138,11 +138,17 @@ struct __xfsstats { uint32_t xs_qm_dqwants; uint32_t xs_qm_dquot; uint32_t xs_qm_dquot_unused; +/* Zone GC counters */ + uint32_t xs_gc_read_calls; + uint32_t xs_gc_write_calls; + uint32_t xs_gc_zone_reset_calls; + uint32_t __pad1; /* Extra precision counters */ uint64_t xs_xstrat_bytes; uint64_t xs_write_bytes; uint64_t xs_read_bytes; uint64_t xs_defer_relog; + uint64_t xs_gc_bytes; }; #define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t)) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 570102184904..1f1f9fc973af 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -712,6 +712,8 @@ xfs_zone_gc_start_chunk( data->scratch_head = (data->scratch_head + len) % data->scratch_size; data->scratch_available -= len; + XFS_STATS_INC(mp, xs_gc_read_calls); + WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_add_tail(&chunk->entry, &data->reading); xfs_zone_gc_iter_advance(iter, irec.rm_blockcount); @@ -815,6 +817,9 @@ xfs_zone_gc_write_chunk( return; } + XFS_STATS_INC(mp, xs_gc_write_calls); + XFS_STATS_ADD(mp, xs_gc_bytes, chunk->len); + WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); list_move_tail(&chunk->entry, &data->writing); @@ -911,6 +916,8 @@ xfs_submit_zone_reset_bio( return; } + XFS_STATS_INC(mp, xs_gc_zone_reset_calls); + bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { /*