preparations to taking MNT_WRITE_HOLD out of ->mnt_flags

We have an unpleasant wart in accessibility rules for struct mount.  There
are per-superblock lists of mounts, used by sb_prepare_remount_readonly()
to check if any of those is currently claimed for write access and to
block further attempts to get write access on those until we are done.

As soon as it is attached to a filesystem, mount becomes reachable
via that list.  Only sb_prepare_remount_readonly() traverses it and
it only accesses a few members of struct mount.  Unfortunately,
->mnt_flags is one of those and it is modified - MNT_WRITE_HOLD set
and then cleared.  It is done under mount_lock, so from the locking
rules POV everything's fine.

However, it has easily overlooked implications - once mount has been
attached to a filesystem, it has to be treated as globally visible.
In particular, initializing ->mnt_flags *must* be done either prior
to that point or under mount_lock.  All other members are still
private at that point.

Life gets simpler if we move that bit (and that's *all* that can get
touched by access via this list) out of ->mnt_flags.  It's not even
hard to do - currently the list is implemented as list_head one,
anchored in super_block->s_mounts and linked via mount->mnt_instance.

As the first step, switch it to hlist-like open-coded structure -
address of the first mount in the set is stored in ->s_mounts
and ->mnt_instance replaced with ->mnt_next_for_sb and ->mnt_pprev_for_sb -
the former either NULL or pointing to the next mount in set, the
latter - address of either ->s_mounts or ->mnt_next_for_sb in the
previous element of the set.

In the next commit we'll steal the LSB of ->mnt_pprev_for_sb as
replacement for MNT_WRITE_HOLD.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
Al Viro 2025-08-27 12:33:11 -04:00
parent 5d132cfafb
commit 09a1b33c08
4 changed files with 36 additions and 13 deletions

View File

@ -64,7 +64,9 @@ struct mount {
#endif #endif
struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */ struct list_head mnt_child; /* and going through their mnt_child */
struct list_head mnt_instance; /* mount instance on sb->s_mounts */ struct mount *mnt_next_for_sb; /* the next two fields are hlist_node, */
struct mount * __aligned(1) *mnt_pprev_for_sb;
/* except that LSB of pprev will be stolen */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list; struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */ struct list_head mnt_expire; /* link in fs-specific expiry list */

View File

@ -730,6 +730,27 @@ static inline void mnt_unhold_writers(struct mount *mnt)
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
} }
static inline void mnt_del_instance(struct mount *m)
{
struct mount **p = m->mnt_pprev_for_sb;
struct mount *next = m->mnt_next_for_sb;
if (next)
next->mnt_pprev_for_sb = p;
*p = next;
}
static inline void mnt_add_instance(struct mount *m, struct super_block *s)
{
struct mount *first = s->s_mounts;
if (first)
first->mnt_pprev_for_sb = &m->mnt_next_for_sb;
m->mnt_next_for_sb = first;
m->mnt_pprev_for_sb = &s->s_mounts;
s->s_mounts = m;
}
static int mnt_make_readonly(struct mount *mnt) static int mnt_make_readonly(struct mount *mnt)
{ {
int ret; int ret;
@ -743,7 +764,6 @@ static int mnt_make_readonly(struct mount *mnt)
int sb_prepare_remount_readonly(struct super_block *sb) int sb_prepare_remount_readonly(struct super_block *sb)
{ {
struct mount *mnt;
int err = 0; int err = 0;
/* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */ /* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */
@ -751,9 +771,9 @@ int sb_prepare_remount_readonly(struct super_block *sb)
return -EBUSY; return -EBUSY;
lock_mount_hash(); lock_mount_hash();
list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) {
if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { if (!(m->mnt.mnt_flags & MNT_READONLY)) {
err = mnt_hold_writers(mnt); err = mnt_hold_writers(m);
if (err) if (err)
break; break;
} }
@ -763,9 +783,9 @@ int sb_prepare_remount_readonly(struct super_block *sb)
if (!err) if (!err)
sb_start_ro_state_change(sb); sb_start_ro_state_change(sb);
list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) {
if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; m->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
} }
unlock_mount_hash(); unlock_mount_hash();
@ -1207,7 +1227,7 @@ static void setup_mnt(struct mount *m, struct dentry *root)
m->mnt_parent = m; m->mnt_parent = m;
lock_mount_hash(); lock_mount_hash();
list_add_tail(&m->mnt_instance, &s->s_mounts); mnt_add_instance(m, s);
unlock_mount_hash(); unlock_mount_hash();
} }
@ -1425,7 +1445,7 @@ static void mntput_no_expire(struct mount *mnt)
mnt->mnt.mnt_flags |= MNT_DOOMED; mnt->mnt.mnt_flags |= MNT_DOOMED;
rcu_read_unlock(); rcu_read_unlock();
list_del(&mnt->mnt_instance); mnt_del_instance(mnt);
if (unlikely(!list_empty(&mnt->mnt_expire))) if (unlikely(!list_empty(&mnt->mnt_expire)))
list_del(&mnt->mnt_expire); list_del(&mnt->mnt_expire);

View File

@ -323,7 +323,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
if (!s) if (!s)
return NULL; return NULL;
INIT_LIST_HEAD(&s->s_mounts);
s->s_user_ns = get_user_ns(user_ns); s->s_user_ns = get_user_ns(user_ns);
init_rwsem(&s->s_umount); init_rwsem(&s->s_umount);
lockdep_set_class(&s->s_umount, &type->s_umount_key); lockdep_set_class(&s->s_umount, &type->s_umount_key);
@ -408,7 +407,7 @@ static void __put_super(struct super_block *s)
list_del_init(&s->s_list); list_del_init(&s->s_list);
WARN_ON(s->s_dentry_lru.node); WARN_ON(s->s_dentry_lru.node);
WARN_ON(s->s_inode_lru.node); WARN_ON(s->s_inode_lru.node);
WARN_ON(!list_empty(&s->s_mounts)); WARN_ON(s->s_mounts);
call_rcu(&s->rcu, destroy_super_rcu); call_rcu(&s->rcu, destroy_super_rcu);
} }
} }

View File

@ -1324,6 +1324,8 @@ struct sb_writers {
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
}; };
struct mount;
struct super_block { struct super_block {
struct list_head s_list; /* Keep this first */ struct list_head s_list; /* Keep this first */
dev_t s_dev; /* search index; _not_ kdev_t */ dev_t s_dev; /* search index; _not_ kdev_t */
@ -1358,7 +1360,7 @@ struct super_block {
__u16 s_encoding_flags; __u16 s_encoding_flags;
#endif #endif
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct mount *s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
struct file *s_bdev_file; struct file *s_bdev_file;
struct backing_dev_info *s_bdi; struct backing_dev_info *s_bdi;