mount-related bugfixes

this cycle regression (well, bugfix for this cycle bugfix for v6.15-rc1 regression) do_move_mount(): split the checks in subtree-of-our-ns and entire-anon cases selftests/mount_setattr: adapt detached mount propagation test v6.15 fs: allow clone_private_mount() for a path on real rootfs v6.11 fs/fhandle.c: fix a race in call of has_locked_children() v5.15 fix propagation graph breakage by MOVE_MOUNT_SET_GROUP move_mount(2) v5.15 clone_private_mnt(): make sure that caller has CAP_SYS_ADMIN in the right userns v5.7 path_overmount(): avoid false negatives v3.12 finish_automount(): don't leak MNT_LOCKED from parent to child v2.6.15 do_change_type(): refuse to operate on unmounted/not ours mounts Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQQqUNBr3gm4hGXdBJlZ7Krx/gZQ6wUCaEXGDwAKCRBZ7Krx/gZQ 61WQAPwNBpcwum3F5fqT8rcKymqAUFpc0+rluJoBi+qfCQA9ywEAwn+Kh5qqtz++ cdVnUYQxBrh0u5IOzMEFITlgfYFJZA4= =BIeU -----END PGP SIGNATURE----- Merge tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs Pull mount fixes from Al Viro: "Various mount-related bugfixes: - split the do_move_mount() checks in subtree-of-our-ns and entire-anon cases and adapt detached mount propagation selftest for mount_setattr - allow clone_private_mount() for a path on real rootfs - fix a race in call of has_locked_children() - fix move_mount propagation graph breakage by MOVE_MOUNT_SET_GROUP - make sure clone_private_mnt() caller has CAP_SYS_ADMIN in the right userns - avoid false negatives in path_overmount() - don't leak MNT_LOCKED from parent to child in finish_automount() - do_change_type(): refuse to operate on unmounted/not ours mounts" * tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: do_change_type(): refuse to operate on unmounted/not ours mounts clone_private_mnt(): make sure that caller has CAP_SYS_ADMIN in the right userns selftests/mount_setattr: adapt detached mount propagation test do_move_mount(): split the checks in subtree-of-our-ns and entire-anon cases fs: allow clone_private_mount() for a path on real rootfs fix propagation graph breakage by MOVE_MOUNT_SET_GROUP move_mount(2) finish_automount(): don't leak MNT_LOCKED from parent to child path_overmount(): avoid false negatives fs/fhandle.c: fix a race in call of has_locked_children()
2025-06-08 10:35:12 -07:00 · 2025-06-08 10:35:12 -07:00 · 35b574a6c2
parent 522cd6acd2 12f147ddd6
commit 35b574a6c2
3 changed files with 74 additions and 59 deletions
--- a/fs/namespace.c
+++ b/fs/namespace.c
@ -2410,7 +2410,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
 	namespace_unlock();
 }
-bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
 {
 	struct mount *child;
@ -2424,6 +2424,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
 	return false;
 }
 bool has_locked_children(struct mount *mnt, struct dentry *dentry)
 {
 	bool res;
 	read_seqlock_excl(&mount_lock);
 	res = __has_locked_children(mnt, dentry);
 	read_sequnlock_excl(&mount_lock);
 	return res;
 }
 /*
 * Check that there aren't references to earlier/same mount namespaces in the
 * specified subtree.  Such references can act as pins for mount namespaces
@ -2468,23 +2478,27 @@ struct vfsmount *clone_private_mount(const struct path *path)
 	if (IS_MNT_UNBINDABLE(old_mnt))
 		return ERR_PTR(-EINVAL);
-	if (mnt_has_parent(old_mnt)) {
+	/*
-		if (!check_mnt(old_mnt))
+	 * Make sure the source mount is acceptable.
-			return ERR_PTR(-EINVAL);
+	 * Anything mounted in our mount namespace is allowed.
-	} else {
+	 * Otherwise, it must be the root of an anonymous mount
-		if (!is_mounted(&old_mnt->mnt))
+	 * namespace, and we need to make sure no namespace
 	 * loops get created.
 	 */
 	if (!check_mnt(old_mnt)) {
 		if (!is_mounted(&old_mnt->mnt) ||
 			!is_anon_ns(old_mnt->mnt_ns) ||
 			mnt_has_parent(old_mnt))
 			return ERR_PTR(-EINVAL);
 		/* Make sure this isn't something purely kernel internal. */
 		if (!is_anon_ns(old_mnt->mnt_ns))
 			return ERR_PTR(-EINVAL);
 		/* Make sure we don't create mount namespace loops. */
 		if (!check_for_nsfs_mounts(old_mnt))
 			return ERR_PTR(-EINVAL);
 	}
-	if (has_locked_children(old_mnt, path->dentry))
+        if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
 	if (__has_locked_children(old_mnt, path->dentry))
 		return ERR_PTR(-EINVAL);
 	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
@ -2930,6 +2944,10 @@ static int do_change_type(struct path *path, int ms_flags)
 		return -EINVAL;
 	namespace_lock();
 	if (!check_mnt(mnt)) {
 		err = -EINVAL;
 		goto out_unlock;
 	}
 	if (type == MS_SHARED) {
 		err = invent_group_ids(mnt, recurse);
 		if (err)
@ -3021,7 +3039,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse)
 	if (!may_copy_tree(old_path))
 		return mnt;
-	if (!recurse && has_locked_children(old, old_path->dentry))
+	if (!recurse && __has_locked_children(old, old_path->dentry))
 		return mnt;
 	if (recurse)
@ -3414,7 +3432,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
 		goto out;
 	/* From mount should not have locked children in place of To's root */
-	if (has_locked_children(from, to->mnt.mnt_root))
+	if (__has_locked_children(from, to->mnt.mnt_root))
 		goto out;
 	/* Setting sharing groups is only allowed on private mounts */
@ -3428,7 +3446,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
 	if (IS_MNT_SLAVE(from)) {
 		struct mount *m = from->mnt_master;
-		list_add(&to->mnt_slave, &m->mnt_slave_list);
+		list_add(&to->mnt_slave, &from->mnt_slave);
 		to->mnt_master = m;
 	}
@ -3453,18 +3471,25 @@ static int do_set_group(struct path *from_path, struct path *to_path)
 * Check if path is overmounted, i.e., if there's a mount on top of
 * @path->mnt with @path->dentry as mountpoint.
 *
- * Context: This function expects namespace_lock() to be held.
+ * Context: namespace_sem must be held at least shared.
 * MUST NOT be called under lock_mount_hash() (there one should just
 * call __lookup_mnt() and check if it returns NULL).
 * Return: If path is overmounted true is returned, false if not.
 */
 static inline bool path_overmounted(const struct path *path)
 {
 	unsigned seq = read_seqbegin(&mount_lock);
 	bool no_child;
 	rcu_read_lock();
-	if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {
+	no_child = !__lookup_mnt(path->mnt, path->dentry);
 	rcu_read_unlock();
-		return true;
+	if (need_seqretry(&mount_lock, seq)) {
 		read_seqlock_excl(&mount_lock);
 		no_child = !__lookup_mnt(path->mnt, path->dentry);
 		read_sequnlock_excl(&mount_lock);
 	}
-	rcu_read_unlock();
+	return unlikely(!no_child);
 	return false;
 }
 /**
@ -3623,37 +3648,41 @@ static int do_move_mount(struct path *old_path,
 	ns = old->mnt_ns;
 	err = -EINVAL;
 	if (!may_use_mount(p))
 		goto out;
 	/* The thing moved must be mounted... */
 	if (!is_mounted(&old->mnt))
 		goto out;
-	/* ... and either ours or the root of anon namespace */
+	if (check_mnt(old)) {
-	if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
+		/* if the source is in our namespace... */
 		/* ... it should be detachable from parent */
 		if (!mnt_has_parent(old) || IS_MNT_LOCKED(old))
 			goto out;
-
+		/* ... and the target should be in our namespace */
-	if (is_anon_ns(ns) && ns == p->mnt_ns) {
+		if (!check_mnt(p))
 		/*
 		 * Ending up with two files referring to the root of the
 		 * same anonymous mount namespace would cause an error
 		 * as this would mean trying to move the same mount
 		 * twice into the mount tree which would be rejected
 		 * later. But be explicit about it right here.
 		 */
 			goto out;
-	} else if (is_anon_ns(p->mnt_ns)) {
+	} else {
 		/*
-		 * Don't allow moving an attached mount tree to an
+		 * otherwise the source must be the root of some anon namespace.
-		 * anonymous mount tree.
+		 * AV: check for mount being root of an anon namespace is worth
 		 * an inlined predicate...
 		 */
 		if (!is_anon_ns(ns) || mnt_has_parent(old))
 			goto out;
 		/*
 		 * Bail out early if the target is within the same namespace -
 		 * subsequent checks would've rejected that, but they lose
 		 * some corner cases if we check it early.
 		 */
 		if (ns == p->mnt_ns)
 			goto out;
 		/*
 		 * Target should be either in our namespace or in an acceptable
 		 * anon namespace, sensu check_anonymous_mnt().
 		 */
 		if (!may_use_mount(p))
 			goto out;
 	}
 	if (old->mnt.mnt_flags & MNT_LOCKED)
 		goto out;
 	if (!path_mounted(old_path))
 		goto out;
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@ -65,7 +65,8 @@ enum mount_flags {
 	MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
 	MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
-			     MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED,
+			     MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED |
 			     MNT_LOCKED,
 };
 struct vfsmount {
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@ -2079,24 +2079,9 @@ TEST_F(mount_setattr, detached_tree_propagation)
 	 * means that the device information will be different for any
 	 * statx() that was taken from /mnt/A before the mount compared
 	 * to one after the mount.
 	 *
 	 * Since we already now that the device information between the
 	 * stx1 and stx2 samples are identical we also now that stx2 and
 	 * stx3 device information will necessarily differ.
 	 */
 	ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
-
+	ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor);
 	/*
 	 * If mount propagation worked correctly then the tmpfs mount
 	 * that was created after the mount namespace was unshared will
 	 * have propagated onto /mnt/A in the detached mount tree.
 	 *
 	 * Verify that the device information for stx3 and stx4 are
 	 * identical. It is already established that stx3 is different
 	 * from both stx1 and stx2 sampled before the tmpfs mount was
 	 * done so if stx3 and stx4 are identical the proof is done.
 	 */
 	ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
 	EXPECT_EQ(close(fd_tree), 0);
 }