mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 02:44:41 +01:00
xfs: start creating infrastructure for health monitoring
Start creating helper functions and infrastructure to pass filesystem health events to a health monitoring file. Since this is an administrative interface, we only support a single health monitor process per filesystem, so we don't need to use anything fancy such as notifier chains (== tons of indirect calls). Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
347b7042fb
commit
a48373e7d3
8 changed files with 317 additions and 0 deletions
|
|
@ -88,6 +88,7 @@ xfs-y += xfs_aops.o \
|
|||
xfs_globals.o \
|
||||
xfs_handle.o \
|
||||
xfs_health.o \
|
||||
xfs_healthmon.o \
|
||||
xfs_icache.o \
|
||||
xfs_ioctl.o \
|
||||
xfs_iomap.o \
|
||||
|
|
|
|||
|
|
@ -1003,6 +1003,12 @@ struct xfs_rtgroup_geometry {
|
|||
#define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */
|
||||
#define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */
|
||||
|
||||
struct xfs_health_monitor {
|
||||
__u64 flags; /* flags */
|
||||
__u8 format; /* output format */
|
||||
__u8 pad[23]; /* zeroes */
|
||||
};
|
||||
|
||||
/*
|
||||
* ioctl commands that are used by Linux filesystems
|
||||
*/
|
||||
|
|
@ -1042,6 +1048,7 @@ struct xfs_rtgroup_geometry {
|
|||
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
|
||||
#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
|
||||
#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry)
|
||||
#define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor)
|
||||
|
||||
/*
|
||||
* ioctl commands that replace IRIX syssgi()'s
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
#include "xfs_da_btree.h"
|
||||
#include "xfs_quota_defs.h"
|
||||
#include "xfs_rtgroup.h"
|
||||
#include "xfs_healthmon.h"
|
||||
|
||||
#include <linux/fserror.h>
|
||||
|
||||
|
|
|
|||
262
fs/xfs/xfs_healthmon.c
Normal file
262
fs/xfs/xfs_healthmon.c
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (c) 2024-2026 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_da_format.h"
|
||||
#include "xfs_da_btree.h"
|
||||
#include "xfs_quota_defs.h"
|
||||
#include "xfs_rtgroup.h"
|
||||
#include "xfs_healthmon.h"
|
||||
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/eventpoll.h>
|
||||
#include <linux/poll.h>
|
||||
|
||||
/*
|
||||
* Live Health Monitoring
|
||||
* ======================
|
||||
*
|
||||
* Autonomous self-healing of XFS filesystems requires a means for the kernel
|
||||
* to send filesystem health events to a monitoring daemon in userspace. To
|
||||
* accomplish this, we establish a thread_with_file kthread object to handle
|
||||
* translating internal events about filesystem health into a format that can
|
||||
* be parsed easily by userspace. When those internal events occur, the core
|
||||
* filesystem code calls this health monitor to convey the events to userspace.
|
||||
* Userspace reads events from the file descriptor returned by the ioctl.
|
||||
*
|
||||
* The healthmon abstraction has a weak reference to the host filesystem mount
|
||||
* so that the queueing and processing of the events do not pin the mount and
|
||||
* cannot slow down the main filesystem. The healthmon object can exist past
|
||||
* the end of the filesystem mount.
|
||||
*/
|
||||
|
||||
/* sign of a detached health monitor */
|
||||
#define DETACHED_MOUNT_COOKIE ((uintptr_t)0)
|
||||
|
||||
/* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */
|
||||
static DEFINE_SPINLOCK(xfs_healthmon_lock);
|
||||
|
||||
/* Grab a reference to the healthmon object for a given mount, if any. */
|
||||
static struct xfs_healthmon *
|
||||
xfs_healthmon_get(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xfs_healthmon *hm;
|
||||
|
||||
rcu_read_lock();
|
||||
hm = mp->m_healthmon;
|
||||
if (hm && !refcount_inc_not_zero(&hm->ref))
|
||||
hm = NULL;
|
||||
rcu_read_unlock();
|
||||
|
||||
return hm;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release the reference to a healthmon object. If there are no more holders,
|
||||
* free the health monitor after an RCU grace period to eliminate possibility
|
||||
* of races with xfs_healthmon_get.
|
||||
*/
|
||||
static void
|
||||
xfs_healthmon_put(
|
||||
struct xfs_healthmon *hm)
|
||||
{
|
||||
if (refcount_dec_and_test(&hm->ref))
|
||||
kfree_rcu_mightsleep(hm);
|
||||
}
|
||||
|
||||
/* Attach a health monitor to an xfs_mount. Only one allowed at a time. */
|
||||
STATIC int
|
||||
xfs_healthmon_attach(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_healthmon *hm)
|
||||
{
|
||||
spin_lock(&xfs_healthmon_lock);
|
||||
if (mp->m_healthmon != NULL) {
|
||||
spin_unlock(&xfs_healthmon_lock);
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
refcount_inc(&hm->ref);
|
||||
mp->m_healthmon = hm;
|
||||
hm->mount_cookie = (uintptr_t)mp->m_super;
|
||||
spin_unlock(&xfs_healthmon_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Detach a xfs mount from a specific healthmon instance. */
|
||||
STATIC void
|
||||
xfs_healthmon_detach(
|
||||
struct xfs_healthmon *hm)
|
||||
{
|
||||
spin_lock(&xfs_healthmon_lock);
|
||||
if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) {
|
||||
spin_unlock(&xfs_healthmon_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL;
|
||||
hm->mount_cookie = DETACHED_MOUNT_COOKIE;
|
||||
spin_unlock(&xfs_healthmon_lock);
|
||||
|
||||
xfs_healthmon_put(hm);
|
||||
}
|
||||
|
||||
/* Detach the xfs mount from this healthmon instance. */
|
||||
void
|
||||
xfs_healthmon_unmount(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xfs_healthmon *hm = xfs_healthmon_get(mp);
|
||||
|
||||
if (!hm)
|
||||
return;
|
||||
|
||||
xfs_healthmon_detach(hm);
|
||||
xfs_healthmon_put(hm);
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
xfs_healthmon_read_iter(
|
||||
struct kiocb *iocb,
|
||||
struct iov_iter *to)
|
||||
{
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* Free the health monitoring information. */
|
||||
STATIC int
|
||||
xfs_healthmon_release(
|
||||
struct inode *inode,
|
||||
struct file *file)
|
||||
{
|
||||
struct xfs_healthmon *hm = file->private_data;
|
||||
|
||||
/*
|
||||
* We might be closing the healthmon file before the filesystem
|
||||
* unmounts, because userspace processes can terminate at any time and
|
||||
* for any reason. Null out xfs_mount::m_healthmon so that another
|
||||
* process can create another health monitor file.
|
||||
*/
|
||||
xfs_healthmon_detach(hm);
|
||||
|
||||
xfs_healthmon_put(hm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Validate ioctl parameters. */
|
||||
static inline bool
|
||||
xfs_healthmon_validate(
|
||||
const struct xfs_health_monitor *hmo)
|
||||
{
|
||||
if (hmo->flags)
|
||||
return false;
|
||||
if (hmo->format)
|
||||
return false;
|
||||
if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Emit some data about the health monitoring fd. */
|
||||
static void
|
||||
xfs_healthmon_show_fdinfo(
|
||||
struct seq_file *m,
|
||||
struct file *file)
|
||||
{
|
||||
struct xfs_healthmon *hm = file->private_data;
|
||||
|
||||
seq_printf(m, "state:\t%s\ndev:\t%d:%d\n",
|
||||
hm->mount_cookie == DETACHED_MOUNT_COOKIE ?
|
||||
"dead" : "alive",
|
||||
MAJOR(hm->dev), MINOR(hm->dev));
|
||||
}
|
||||
|
||||
static const struct file_operations xfs_healthmon_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.show_fdinfo = xfs_healthmon_show_fdinfo,
|
||||
.read_iter = xfs_healthmon_read_iter,
|
||||
.release = xfs_healthmon_release,
|
||||
};
|
||||
|
||||
/*
|
||||
* Create a health monitoring file. Returns an index to the fd table or a
|
||||
* negative errno.
|
||||
*/
|
||||
long
|
||||
xfs_ioc_health_monitor(
|
||||
struct file *file,
|
||||
struct xfs_health_monitor __user *arg)
|
||||
{
|
||||
struct xfs_health_monitor hmo;
|
||||
struct xfs_healthmon *hm;
|
||||
struct xfs_inode *ip = XFS_I(file_inode(file));
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* The only intended user of the health monitoring system should be the
|
||||
* xfs_healer daemon running on behalf of the whole filesystem in the
|
||||
* initial user namespace. IOWs, we don't allow unprivileged userspace
|
||||
* (they can use fsnotify) nor do we allow containers.
|
||||
*/
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
if (ip->i_ino != mp->m_sb.sb_rootino)
|
||||
return -EPERM;
|
||||
if (current_user_ns() != &init_user_ns)
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&hmo, arg, sizeof(hmo)))
|
||||
return -EFAULT;
|
||||
|
||||
if (!xfs_healthmon_validate(&hmo))
|
||||
return -EINVAL;
|
||||
|
||||
hm = kzalloc(sizeof(*hm), GFP_KERNEL);
|
||||
if (!hm)
|
||||
return -ENOMEM;
|
||||
hm->dev = mp->m_super->s_dev;
|
||||
refcount_set(&hm->ref, 1);
|
||||
|
||||
/*
|
||||
* Try to attach this health monitor to the xfs_mount. The monitor is
|
||||
* considered live and will receive events if this succeeds.
|
||||
*/
|
||||
ret = xfs_healthmon_attach(mp, hm);
|
||||
if (ret)
|
||||
goto out_hm;
|
||||
|
||||
/*
|
||||
* Create the anonymous file and install a fd for it. If it succeeds,
|
||||
* the file owns hm and can go away at any time, so we must not access
|
||||
* it again. This must go last because we can't undo a fd table
|
||||
* installation.
|
||||
*/
|
||||
ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm,
|
||||
O_CLOEXEC | O_RDONLY);
|
||||
if (ret < 0)
|
||||
goto out_mp;
|
||||
|
||||
return ret;
|
||||
|
||||
out_mp:
|
||||
xfs_healthmon_detach(hm);
|
||||
out_hm:
|
||||
ASSERT(refcount_read(&hm->ref) == 1);
|
||||
xfs_healthmon_put(hm);
|
||||
return ret;
|
||||
}
|
||||
36
fs/xfs/xfs_healthmon.h
Normal file
36
fs/xfs/xfs_healthmon.h
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* Copyright (c) 2024-2026 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_HEALTHMON_H__
|
||||
#define __XFS_HEALTHMON_H__
|
||||
|
||||
struct xfs_healthmon {
|
||||
/*
|
||||
* Weak reference to the xfs filesystem that is being monitored. It
|
||||
* will be set to zero when the filesystem detaches from the monitor.
|
||||
* Do not dereference this pointer.
|
||||
*/
|
||||
uintptr_t mount_cookie;
|
||||
|
||||
/*
|
||||
* Device number of the filesystem being monitored. This is for
|
||||
* consistent tracing even after unmount.
|
||||
*/
|
||||
dev_t dev;
|
||||
|
||||
/*
|
||||
* Reference count of this structure. The open healthmon fd holds one
|
||||
* ref, the xfs_mount holds another ref if it points to this object,
|
||||
* and running event handlers hold their own refs.
|
||||
*/
|
||||
refcount_t ref;
|
||||
};
|
||||
|
||||
void xfs_healthmon_unmount(struct xfs_mount *mp);
|
||||
|
||||
long xfs_ioc_health_monitor(struct file *file,
|
||||
struct xfs_health_monitor __user *arg);
|
||||
|
||||
#endif /* __XFS_HEALTHMON_H__ */
|
||||
|
|
@ -41,6 +41,7 @@
|
|||
#include "xfs_exchrange.h"
|
||||
#include "xfs_handle.h"
|
||||
#include "xfs_rtgroup.h"
|
||||
#include "xfs_healthmon.h"
|
||||
|
||||
#include <linux/mount.h>
|
||||
#include <linux/fileattr.h>
|
||||
|
|
@ -1419,6 +1420,9 @@ xfs_file_ioctl(
|
|||
case XFS_IOC_COMMIT_RANGE:
|
||||
return xfs_ioc_commit_range(filp, arg);
|
||||
|
||||
case XFS_IOC_HEALTH_MONITOR:
|
||||
return xfs_ioc_health_monitor(filp, arg);
|
||||
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@
|
|||
#include "xfs_rtrefcount_btree.h"
|
||||
#include "scrub/stats.h"
|
||||
#include "xfs_zone_alloc.h"
|
||||
#include "xfs_healthmon.h"
|
||||
|
||||
static DEFINE_MUTEX(xfs_uuid_table_mutex);
|
||||
static int xfs_uuid_table_size;
|
||||
|
|
@ -625,6 +626,7 @@ xfs_unmount_flush_inodes(
|
|||
cancel_delayed_work_sync(&mp->m_reclaim_work);
|
||||
xfs_reclaim_inodes(mp);
|
||||
xfs_health_unmount(mp);
|
||||
xfs_healthmon_unmount(mp);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ struct xfs_ail;
|
|||
struct xfs_quotainfo;
|
||||
struct xfs_da_geometry;
|
||||
struct xfs_perag;
|
||||
struct xfs_healthmon;
|
||||
|
||||
/* dynamic preallocation free space thresholds, 5% down to 1% */
|
||||
enum {
|
||||
|
|
@ -342,6 +343,9 @@ typedef struct xfs_mount {
|
|||
|
||||
/* Hook to feed dirent updates to an active online repair. */
|
||||
struct xfs_hooks m_dir_update_hooks;
|
||||
|
||||
/* Private data referring to a health monitor object. */
|
||||
struct xfs_healthmon *m_healthmon;
|
||||
} xfs_mount_t;
|
||||
|
||||
#define M_IGEO(mp) (&(mp)->m_ino_geo)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue