mount-related bugfixes

this cycle regression (well, bugfix for this cycle bugfix for v6.15-rc1 regression)
 	do_move_mount(): split the checks in subtree-of-our-ns and entire-anon cases
 	selftests/mount_setattr: adapt detached mount propagation test
 v6.15	fs: allow clone_private_mount() for a path on real rootfs
 v6.11	fs/fhandle.c: fix a race in call of has_locked_children()
 v5.15	fix propagation graph breakage by MOVE_MOUNT_SET_GROUP move_mount(2)
 v5.15	clone_private_mnt(): make sure that caller has CAP_SYS_ADMIN in the right userns
 v5.7	path_overmount(): avoid false negatives
 v3.12	finish_automount(): don't leak MNT_LOCKED from parent to child
 v2.6.15	do_change_type(): refuse to operate on unmounted/not ours mounts
 
 Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQQqUNBr3gm4hGXdBJlZ7Krx/gZQ6wUCaEXGDwAKCRBZ7Krx/gZQ
 61WQAPwNBpcwum3F5fqT8rcKymqAUFpc0+rluJoBi+qfCQA9ywEAwn+Kh5qqtz++
 cdVnUYQxBrh0u5IOzMEFITlgfYFJZA4=
 =BIeU
 -----END PGP SIGNATURE-----

Merge tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull mount fixes from Al Viro:
 "Various mount-related bugfixes:

   - split the do_move_mount() checks in subtree-of-our-ns and
     entire-anon cases and adapt detached mount propagation selftest for
     mount_setattr

   - allow clone_private_mount() for a path on real rootfs

   - fix a race in call of has_locked_children()

   - fix move_mount propagation graph breakage by MOVE_MOUNT_SET_GROUP

   - make sure clone_private_mnt() caller has CAP_SYS_ADMIN in the right
     userns

   - avoid false negatives in path_overmount()

   - don't leak MNT_LOCKED from parent to child in finish_automount()

   - do_change_type(): refuse to operate on unmounted/not ours mounts"

* tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  do_change_type(): refuse to operate on unmounted/not ours mounts
  clone_private_mnt(): make sure that caller has CAP_SYS_ADMIN in the right userns
  selftests/mount_setattr: adapt detached mount propagation test
  do_move_mount(): split the checks in subtree-of-our-ns and entire-anon cases
  fs: allow clone_private_mount() for a path on real rootfs
  fix propagation graph breakage by MOVE_MOUNT_SET_GROUP move_mount(2)
  finish_automount(): don't leak MNT_LOCKED from parent to child
  path_overmount(): avoid false negatives
  fs/fhandle.c: fix a race in call of has_locked_children()
This commit is contained in:
Linus Torvalds 2025-06-08 10:35:12 -07:00
commit 35b574a6c2
3 changed files with 74 additions and 59 deletions

View File

@ -2410,7 +2410,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock(); namespace_unlock();
} }
bool has_locked_children(struct mount *mnt, struct dentry *dentry) static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
{ {
struct mount *child; struct mount *child;
@ -2424,6 +2424,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
return false; return false;
} }
bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
bool res;
read_seqlock_excl(&mount_lock);
res = __has_locked_children(mnt, dentry);
read_sequnlock_excl(&mount_lock);
return res;
}
/* /*
* Check that there aren't references to earlier/same mount namespaces in the * Check that there aren't references to earlier/same mount namespaces in the
* specified subtree. Such references can act as pins for mount namespaces * specified subtree. Such references can act as pins for mount namespaces
@ -2468,23 +2478,27 @@ struct vfsmount *clone_private_mount(const struct path *path)
if (IS_MNT_UNBINDABLE(old_mnt)) if (IS_MNT_UNBINDABLE(old_mnt))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
if (mnt_has_parent(old_mnt)) { /*
if (!check_mnt(old_mnt)) * Make sure the source mount is acceptable.
return ERR_PTR(-EINVAL); * Anything mounted in our mount namespace is allowed.
} else { * Otherwise, it must be the root of an anonymous mount
if (!is_mounted(&old_mnt->mnt)) * namespace, and we need to make sure no namespace
* loops get created.
*/
if (!check_mnt(old_mnt)) {
if (!is_mounted(&old_mnt->mnt) ||
!is_anon_ns(old_mnt->mnt_ns) ||
mnt_has_parent(old_mnt))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
/* Make sure this isn't something purely kernel internal. */
if (!is_anon_ns(old_mnt->mnt_ns))
return ERR_PTR(-EINVAL);
/* Make sure we don't create mount namespace loops. */
if (!check_for_nsfs_mounts(old_mnt)) if (!check_for_nsfs_mounts(old_mnt))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
if (has_locked_children(old_mnt, path->dentry)) if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
if (__has_locked_children(old_mnt, path->dentry))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
@ -2930,6 +2944,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL; return -EINVAL;
namespace_lock(); namespace_lock();
if (!check_mnt(mnt)) {
err = -EINVAL;
goto out_unlock;
}
if (type == MS_SHARED) { if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse); err = invent_group_ids(mnt, recurse);
if (err) if (err)
@ -3021,7 +3039,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse)
if (!may_copy_tree(old_path)) if (!may_copy_tree(old_path))
return mnt; return mnt;
if (!recurse && has_locked_children(old, old_path->dentry)) if (!recurse && __has_locked_children(old, old_path->dentry))
return mnt; return mnt;
if (recurse) if (recurse)
@ -3414,7 +3432,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
goto out; goto out;
/* From mount should not have locked children in place of To's root */ /* From mount should not have locked children in place of To's root */
if (has_locked_children(from, to->mnt.mnt_root)) if (__has_locked_children(from, to->mnt.mnt_root))
goto out; goto out;
/* Setting sharing groups is only allowed on private mounts */ /* Setting sharing groups is only allowed on private mounts */
@ -3428,7 +3446,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
if (IS_MNT_SLAVE(from)) { if (IS_MNT_SLAVE(from)) {
struct mount *m = from->mnt_master; struct mount *m = from->mnt_master;
list_add(&to->mnt_slave, &m->mnt_slave_list); list_add(&to->mnt_slave, &from->mnt_slave);
to->mnt_master = m; to->mnt_master = m;
} }
@ -3453,18 +3471,25 @@ static int do_set_group(struct path *from_path, struct path *to_path)
* Check if path is overmounted, i.e., if there's a mount on top of * Check if path is overmounted, i.e., if there's a mount on top of
* @path->mnt with @path->dentry as mountpoint. * @path->mnt with @path->dentry as mountpoint.
* *
* Context: This function expects namespace_lock() to be held. * Context: namespace_sem must be held at least shared.
* MUST NOT be called under lock_mount_hash() (there one should just
* call __lookup_mnt() and check if it returns NULL).
* Return: If path is overmounted true is returned, false if not. * Return: If path is overmounted true is returned, false if not.
*/ */
static inline bool path_overmounted(const struct path *path) static inline bool path_overmounted(const struct path *path)
{ {
unsigned seq = read_seqbegin(&mount_lock);
bool no_child;
rcu_read_lock(); rcu_read_lock();
if (unlikely(__lookup_mnt(path->mnt, path->dentry))) { no_child = !__lookup_mnt(path->mnt, path->dentry);
rcu_read_unlock(); rcu_read_unlock();
return true; if (need_seqretry(&mount_lock, seq)) {
read_seqlock_excl(&mount_lock);
no_child = !__lookup_mnt(path->mnt, path->dentry);
read_sequnlock_excl(&mount_lock);
} }
rcu_read_unlock(); return unlikely(!no_child);
return false;
} }
/** /**
@ -3623,37 +3648,41 @@ static int do_move_mount(struct path *old_path,
ns = old->mnt_ns; ns = old->mnt_ns;
err = -EINVAL; err = -EINVAL;
if (!may_use_mount(p))
goto out;
/* The thing moved must be mounted... */ /* The thing moved must be mounted... */
if (!is_mounted(&old->mnt)) if (!is_mounted(&old->mnt))
goto out; goto out;
/* ... and either ours or the root of anon namespace */ if (check_mnt(old)) {
if (!(attached ? check_mnt(old) : is_anon_ns(ns))) /* if the source is in our namespace... */
/* ... it should be detachable from parent */
if (!mnt_has_parent(old) || IS_MNT_LOCKED(old))
goto out; goto out;
/* ... and the target should be in our namespace */
if (is_anon_ns(ns) && ns == p->mnt_ns) { if (!check_mnt(p))
/*
* Ending up with two files referring to the root of the
* same anonymous mount namespace would cause an error
* as this would mean trying to move the same mount
* twice into the mount tree which would be rejected
* later. But be explicit about it right here.
*/
goto out; goto out;
} else if (is_anon_ns(p->mnt_ns)) { } else {
/* /*
* Don't allow moving an attached mount tree to an * otherwise the source must be the root of some anon namespace.
* anonymous mount tree. * AV: check for mount being root of an anon namespace is worth
* an inlined predicate...
*/ */
if (!is_anon_ns(ns) || mnt_has_parent(old))
goto out;
/*
* Bail out early if the target is within the same namespace -
* subsequent checks would've rejected that, but they lose
* some corner cases if we check it early.
*/
if (ns == p->mnt_ns)
goto out;
/*
* Target should be either in our namespace or in an acceptable
* anon namespace, sensu check_anonymous_mnt().
*/
if (!may_use_mount(p))
goto out; goto out;
} }
if (old->mnt.mnt_flags & MNT_LOCKED)
goto out;
if (!path_mounted(old_path)) if (!path_mounted(old_path))
goto out; goto out;

View File

@ -65,7 +65,8 @@ enum mount_flags {
MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED |
MNT_LOCKED,
}; };
struct vfsmount { struct vfsmount {

View File

@ -2079,24 +2079,9 @@ TEST_F(mount_setattr, detached_tree_propagation)
* means that the device information will be different for any * means that the device information will be different for any
* statx() that was taken from /mnt/A before the mount compared * statx() that was taken from /mnt/A before the mount compared
* to one after the mount. * to one after the mount.
*
* Since we already now that the device information between the
* stx1 and stx2 samples are identical we also now that stx2 and
* stx3 device information will necessarily differ.
*/ */
ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor); ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor);
/*
* If mount propagation worked correctly then the tmpfs mount
* that was created after the mount namespace was unshared will
* have propagated onto /mnt/A in the detached mount tree.
*
* Verify that the device information for stx3 and stx4 are
* identical. It is already established that stx3 is different
* from both stx1 and stx2 sampled before the tmpfs mount was
* done so if stx3 and stx4 are identical the proof is done.
*/
ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
EXPECT_EQ(close(fd_tree), 0); EXPECT_EQ(close(fd_tree), 0);
} }