mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
struct mount: relocate MNT_WRITE_HOLD bit
... from ->mnt_flags to LSB of ->mnt_pprev_for_sb. This is safe - we always set and clear it within the same mount_lock scope, so we won't interfere with list operations - traversals are always forward, so they don't even look at ->mnt_prev_for_sb and both insertions and removals are in mount_lock scopes of their own, so that bit will be clear in *all* mount instances during those. Reviewed-by: Christian Brauner <brauner@kernel.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
09a1b33c08
commit
3371fa2f27
3 changed files with 42 additions and 20 deletions
25
fs/mount.h
25
fs/mount.h
|
|
@ -66,7 +66,8 @@ struct mount {
|
|||
struct list_head mnt_child; /* and going through their mnt_child */
|
||||
struct mount *mnt_next_for_sb; /* the next two fields are hlist_node, */
|
||||
struct mount * __aligned(1) *mnt_pprev_for_sb;
|
||||
/* except that LSB of pprev will be stolen */
|
||||
/* except that LSB of pprev is stolen */
|
||||
#define WRITE_HOLD 1 /* ... for use by mnt_hold_writers() */
|
||||
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
|
||||
struct list_head mnt_list;
|
||||
struct list_head mnt_expire; /* link in fs-specific expiry list */
|
||||
|
|
@ -244,4 +245,26 @@ static inline struct mount *topmost_overmount(struct mount *m)
|
|||
return m;
|
||||
}
|
||||
|
||||
static inline bool __test_write_hold(struct mount * __aligned(1) *val)
|
||||
{
|
||||
return (unsigned long)val & WRITE_HOLD;
|
||||
}
|
||||
|
||||
static inline bool test_write_hold(const struct mount *m)
|
||||
{
|
||||
return __test_write_hold(m->mnt_pprev_for_sb);
|
||||
}
|
||||
|
||||
static inline void set_write_hold(struct mount *m)
|
||||
{
|
||||
m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb
|
||||
| WRITE_HOLD);
|
||||
}
|
||||
|
||||
static inline void clear_write_hold(struct mount *m)
|
||||
{
|
||||
m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb
|
||||
& ~WRITE_HOLD);
|
||||
}
|
||||
|
||||
struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
|
||||
|
|
|
|||
|
|
@ -509,20 +509,20 @@ int mnt_get_write_access(struct vfsmount *m)
|
|||
mnt_inc_writers(mnt);
|
||||
/*
|
||||
* The store to mnt_inc_writers must be visible before we pass
|
||||
* MNT_WRITE_HOLD loop below, so that the slowpath can see our
|
||||
* incremented count after it has set MNT_WRITE_HOLD.
|
||||
* WRITE_HOLD loop below, so that the slowpath can see our
|
||||
* incremented count after it has set WRITE_HOLD.
|
||||
*/
|
||||
smp_mb();
|
||||
might_lock(&mount_lock.lock);
|
||||
while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
|
||||
while (__test_write_hold(READ_ONCE(mnt->mnt_pprev_for_sb))) {
|
||||
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
||||
cpu_relax();
|
||||
} else {
|
||||
/*
|
||||
* This prevents priority inversion, if the task
|
||||
* setting MNT_WRITE_HOLD got preempted on a remote
|
||||
* setting WRITE_HOLD got preempted on a remote
|
||||
* CPU, and it prevents life lock if the task setting
|
||||
* MNT_WRITE_HOLD has a lower priority and is bound to
|
||||
* WRITE_HOLD has a lower priority and is bound to
|
||||
* the same CPU as the task that is spinning here.
|
||||
*/
|
||||
preempt_enable();
|
||||
|
|
@ -533,7 +533,7 @@ int mnt_get_write_access(struct vfsmount *m)
|
|||
}
|
||||
/*
|
||||
* The barrier pairs with the barrier sb_start_ro_state_change() making
|
||||
* sure that if we see MNT_WRITE_HOLD cleared, we will also see
|
||||
* sure that if we see WRITE_HOLD cleared, we will also see
|
||||
* s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
|
||||
* mnt_is_readonly() and bail in case we are racing with remount
|
||||
* read-only.
|
||||
|
|
@ -672,15 +672,15 @@ EXPORT_SYMBOL(mnt_drop_write_file);
|
|||
* @mnt.
|
||||
*
|
||||
* Context: This function expects lock_mount_hash() to be held serializing
|
||||
* setting MNT_WRITE_HOLD.
|
||||
* setting WRITE_HOLD.
|
||||
* Return: On success 0 is returned.
|
||||
* On error, -EBUSY is returned.
|
||||
*/
|
||||
static inline int mnt_hold_writers(struct mount *mnt)
|
||||
{
|
||||
mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
|
||||
set_write_hold(mnt);
|
||||
/*
|
||||
* After storing MNT_WRITE_HOLD, we'll read the counters. This store
|
||||
* After storing WRITE_HOLD, we'll read the counters. This store
|
||||
* should be visible before we do.
|
||||
*/
|
||||
smp_mb();
|
||||
|
|
@ -696,9 +696,9 @@ static inline int mnt_hold_writers(struct mount *mnt)
|
|||
* sum up each counter, if we read a counter before it is incremented,
|
||||
* but then read another CPU's count which it has been subsequently
|
||||
* decremented from -- we would see more decrements than we should.
|
||||
* MNT_WRITE_HOLD protects against this scenario, because
|
||||
* WRITE_HOLD protects against this scenario, because
|
||||
* mnt_want_write first increments count, then smp_mb, then spins on
|
||||
* MNT_WRITE_HOLD, so it can't be decremented by another CPU while
|
||||
* WRITE_HOLD, so it can't be decremented by another CPU while
|
||||
* we're counting up here.
|
||||
*/
|
||||
if (mnt_get_writers(mnt) > 0)
|
||||
|
|
@ -720,14 +720,14 @@ static inline int mnt_hold_writers(struct mount *mnt)
|
|||
*/
|
||||
static inline void mnt_unhold_writers(struct mount *mnt)
|
||||
{
|
||||
if (!(mnt->mnt_flags & MNT_WRITE_HOLD))
|
||||
if (!test_write_hold(mnt))
|
||||
return;
|
||||
/*
|
||||
* MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
|
||||
* MNT_READONLY must become visible before ~WRITE_HOLD, so writers
|
||||
* that become unheld will see MNT_READONLY.
|
||||
*/
|
||||
smp_wmb();
|
||||
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
|
||||
clear_write_hold(mnt);
|
||||
}
|
||||
|
||||
static inline void mnt_del_instance(struct mount *m)
|
||||
|
|
@ -766,7 +766,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
|
|||
{
|
||||
int err = 0;
|
||||
|
||||
/* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */
|
||||
/* Racy optimization. Recheck the counter under WRITE_HOLD */
|
||||
if (atomic_long_read(&sb->s_remove_count))
|
||||
return -EBUSY;
|
||||
|
||||
|
|
@ -784,8 +784,8 @@ int sb_prepare_remount_readonly(struct super_block *sb)
|
|||
if (!err)
|
||||
sb_start_ro_state_change(sb);
|
||||
for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) {
|
||||
if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
|
||||
m->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
|
||||
if (test_write_hold(m))
|
||||
clear_write_hold(m);
|
||||
}
|
||||
unlock_mount_hash();
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ enum mount_flags {
|
|||
MNT_NOSYMFOLLOW = 0x80,
|
||||
|
||||
MNT_SHRINKABLE = 0x100,
|
||||
MNT_WRITE_HOLD = 0x200,
|
||||
|
||||
MNT_INTERNAL = 0x4000,
|
||||
|
||||
|
|
@ -52,7 +51,7 @@ enum mount_flags {
|
|||
| MNT_READONLY | MNT_NOSYMFOLLOW,
|
||||
MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
|
||||
|
||||
MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED |
|
||||
MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED |
|
||||
MNT_SYNC_UMOUNT | MNT_LOCKED
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue